63#define DEBUG_TYPE "openmp-ir-builder"
70 cl::desc(
"Use optimistic attributes describing "
71 "'as-if' properties of runtime calls."),
75 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
76 cl::desc(
"Factor for the unroll threshold to account for code "
77 "simplifications still taking place"),
88 if (!IP1.isSet() || !IP2.isSet())
90 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
95 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
96 case OMPScheduleType::UnorderedStaticChunked:
97 case OMPScheduleType::UnorderedStatic:
98 case OMPScheduleType::UnorderedDynamicChunked:
99 case OMPScheduleType::UnorderedGuidedChunked:
100 case OMPScheduleType::UnorderedRuntime:
101 case OMPScheduleType::UnorderedAuto:
102 case OMPScheduleType::UnorderedTrapezoidal:
103 case OMPScheduleType::UnorderedGreedy:
104 case OMPScheduleType::UnorderedBalanced:
105 case OMPScheduleType::UnorderedGuidedIterativeChunked:
106 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
107 case OMPScheduleType::UnorderedSteal:
108 case OMPScheduleType::UnorderedStaticBalancedChunked:
109 case OMPScheduleType::UnorderedGuidedSimd:
110 case OMPScheduleType::UnorderedRuntimeSimd:
111 case OMPScheduleType::OrderedStaticChunked:
112 case OMPScheduleType::OrderedStatic:
113 case OMPScheduleType::OrderedDynamicChunked:
114 case OMPScheduleType::OrderedGuidedChunked:
115 case OMPScheduleType::OrderedRuntime:
116 case OMPScheduleType::OrderedAuto:
117 case OMPScheduleType::OrderdTrapezoidal:
118 case OMPScheduleType::NomergeUnorderedStaticChunked:
119 case OMPScheduleType::NomergeUnorderedStatic:
120 case OMPScheduleType::NomergeUnorderedDynamicChunked:
121 case OMPScheduleType::NomergeUnorderedGuidedChunked:
122 case OMPScheduleType::NomergeUnorderedRuntime:
123 case OMPScheduleType::NomergeUnorderedAuto:
124 case OMPScheduleType::NomergeUnorderedTrapezoidal:
125 case OMPScheduleType::NomergeUnorderedGreedy:
126 case OMPScheduleType::NomergeUnorderedBalanced:
127 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
128 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
129 case OMPScheduleType::NomergeUnorderedSteal:
130 case OMPScheduleType::NomergeOrderedStaticChunked:
131 case OMPScheduleType::NomergeOrderedStatic:
132 case OMPScheduleType::NomergeOrderedDynamicChunked:
133 case OMPScheduleType::NomergeOrderedGuidedChunked:
134 case OMPScheduleType::NomergeOrderedRuntime:
135 case OMPScheduleType::NomergeOrderedAuto:
136 case OMPScheduleType::NomergeOrderedTrapezoidal:
144 SchedType & OMPScheduleType::MonotonicityMask;
145 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
156 if (Features.
count(
"+wavefrontsize64"))
157 return omp::getAMDGPUGridValues<64>();
158 return omp::getAMDGPUGridValues<32>();
169 bool HasSimdModifier) {
171 switch (ClauseKind) {
172 case OMP_SCHEDULE_Default:
173 case OMP_SCHEDULE_Static:
174 return HasChunks ? OMPScheduleType::BaseStaticChunked
175 : OMPScheduleType::BaseStatic;
176 case OMP_SCHEDULE_Dynamic:
177 return OMPScheduleType::BaseDynamicChunked;
178 case OMP_SCHEDULE_Guided:
179 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
180 : OMPScheduleType::BaseGuidedChunked;
181 case OMP_SCHEDULE_Auto:
183 case OMP_SCHEDULE_Runtime:
184 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
185 : OMPScheduleType::BaseRuntime;
193 bool HasOrderedClause) {
194 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
195 OMPScheduleType::None &&
196 "Must not have ordering nor monotonicity flags already set");
199 ? OMPScheduleType::ModifierOrdered
200 : OMPScheduleType::ModifierUnordered;
201 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
204 if (OrderingScheduleType ==
205 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
206 return OMPScheduleType::OrderedGuidedChunked;
207 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
208 OMPScheduleType::ModifierOrdered))
209 return OMPScheduleType::OrderedRuntime;
211 return OrderingScheduleType;
217 bool HasSimdModifier,
bool HasMonotonic,
218 bool HasNonmonotonic,
bool HasOrderedClause) {
219 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
220 OMPScheduleType::None &&
221 "Must not have monotonicity flags already set");
222 assert((!HasMonotonic || !HasNonmonotonic) &&
223 "Monotonic and Nonmonotonic are contradicting each other");
226 return ScheduleType | OMPScheduleType::ModifierMonotonic;
227 }
else if (HasNonmonotonic) {
228 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
238 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
239 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
245 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
253 bool HasSimdModifier,
bool HasMonotonicModifier,
254 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
260 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
261 HasNonmonotonicModifier, HasOrderedClause);
274 VType = vAlloca->getAllocatedType();
302 auto *Br = cast<BranchInst>(Term);
303 assert(!Br->isConditional() &&
304 "BB's terminator must be an unconditional branch (or degenerate)");
307 Br->setSuccessor(0,
Target);
312 NewBr->setDebugLoc(
DL);
317 assert(New->getFirstInsertionPt() == New->begin() &&
318 "Target BB must not have PHI nodes");
322 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
350 New->replaceSuccessorsPhiUsesWith(Old, New);
395 const Twine &
Name =
"",
bool AsPtr =
true) {
403 FakeVal = FakeValAddr;
431enum OpenMPOffloadingRequiresDirFlags {
433 OMP_REQ_UNDEFINED = 0x000,
435 OMP_REQ_NONE = 0x001,
437 OMP_REQ_REVERSE_OFFLOAD = 0x002,
439 OMP_REQ_UNIFIED_ADDRESS = 0x004,
441 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
443 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
450 : RequiresFlags(OMP_REQ_UNDEFINED) {}
453 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
454 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
455 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
456 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
457 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
458 RequiresFlags(OMP_REQ_UNDEFINED) {
459 if (HasRequiresReverseOffload)
460 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
461 if (HasRequiresUnifiedAddress)
462 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
463 if (HasRequiresUnifiedSharedMemory)
464 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
465 if (HasRequiresDynamicAllocators)
466 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
470 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
474 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
478 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
482 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
487 :
static_cast<int64_t
>(OMP_REQ_NONE);
492 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
494 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
499 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
501 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
506 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
508 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
513 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
515 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
528 constexpr const size_t MaxDim = 3;
536 Value *NumThreads3D =
539 seq<unsigned>(1, std::min(KernelArgs.
NumTeams.size(), MaxDim)))
543 seq<unsigned>(1, std::min(KernelArgs.
NumThreads.size(), MaxDim)))
567 auto FnAttrs = Attrs.getFnAttrs();
568 auto RetAttrs = Attrs.getRetAttrs();
570 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
575 bool Param =
true) ->
void {
576 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
577 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
578 if (HasSignExt || HasZeroExt) {
579 assert(AS.getNumAttributes() == 1 &&
580 "Currently not handling extension attr combined with others.");
582 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
585 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
592#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
593#include "llvm/Frontend/OpenMP/OMPKinds.def"
597#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
599 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
600 addAttrSet(RetAttrs, RetAttrSet, false); \
601 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
602 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
603 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
605#include "llvm/Frontend/OpenMP/OMPKinds.def"
619#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
621 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
623 Fn = M.getFunction(Str); \
625#include "llvm/Frontend/OpenMP/OMPKinds.def"
631#define OMP_RTL(Enum, Str, ...) \
633 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
635#include "llvm/Frontend/OpenMP/OMPKinds.def"
639 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
649 LLVMContext::MD_callback,
651 2, {-1, -1},
true)}));
664 assert(Fn &&
"Failed to create OpenMP runtime function");
671 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
672 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
687 for (
auto Inst =
Block->getReverseIterator()->begin();
688 Inst !=
Block->getReverseIterator()->end();) {
689 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
713 ParallelRegionBlockSet.
clear();
715 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
734 ".omp_par", ArgsInZeroAddressSpace);
738 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
740 "Expected OpenMP outlining to be possible!");
742 for (
auto *V : OI.ExcludeArgsFromAggregate)
749 if (TargetCpuAttr.isStringAttribute())
752 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
753 if (TargetFeaturesAttr.isStringAttribute())
754 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
757 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
759 "OpenMP outlined functions should not return a value!");
771 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
778 "Expected instructions to add in the outlined region entry");
785 if (
I.isTerminator())
788 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
791 OI.EntryBB->moveBefore(&ArtificialEntry);
798 if (OI.PostOutlineCB)
799 OI.PostOutlineCB(*OutlinedFn);
830 errs() <<
"Error of kind: " << Kind
831 <<
" when emitting offload entries and metadata during "
832 "OMPIRBuilder finalization \n";
839 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
841 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
867 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
871 if (UsedArray.
empty())
878 GV->setSection(
"llvm.metadata");
887 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
895 unsigned Reserve2Flags) {
897 LocFlags |= OMP_IDENT_FLAG_KMPC;
905 ConstantInt::get(
Int32, Reserve2Flags),
906 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
913 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
914 if (
GV.getInitializer() == Initializer)
919 M, OpenMPIRBuilder::Ident,
934 SrcLocStrSize = LocStr.
size();
943 if (
GV.isConstant() &&
GV.hasInitializer() &&
944 GV.getInitializer() == Initializer)
955 unsigned Line,
unsigned Column,
961 Buffer.
append(FunctionName);
963 Buffer.
append(std::to_string(Line));
965 Buffer.
append(std::to_string(Column));
973 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
984 if (
DIFile *DIF = DIL->getFile())
985 if (std::optional<StringRef> Source = DIF->getSource())
991 DIL->getColumn(), SrcLocStrSize);
1003 "omp_global_thread_num");
1008 bool ForceSimpleCall,
bool CheckCancelFlag) {
1018 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1021 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1024 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1027 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1030 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1043 bool UseCancelBarrier =
1048 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1049 : OMPRTL___kmpc_barrier),
1052 if (UseCancelBarrier && CheckCancelFlag)
1062 omp::Directive CanceledDirective) {
1074 Value *CancelKind =
nullptr;
1075 switch (CanceledDirective) {
1076#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1077 case DirectiveEnum: \
1078 CancelKind = Builder.getInt32(Value); \
1080#include "llvm/Frontend/OpenMP/OMPKinds.def"
1092 if (CanceledDirective == OMPD_parallel) {
1096 omp::Directive::OMPD_unknown,
1110 UI->eraseFromParent();
1123 auto *KernelArgsPtr =
1136 NumThreads, HostPtr, KernelArgsPtr};
1164 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1168 Value *Return =
nullptr;
1188 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1189 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1205 emitBlock(OffloadContBlock, CurFn,
true);
1210 Value *CancelFlag, omp::Directive CanceledDirective,
1213 "Unexpected cancellation!");
1266 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1269 "Expected at least tid and bounded tid as arguments");
1270 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1273 assert(CI &&
"Expected call instruction to outlined function");
1274 CI->
getParent()->setName(
"omp_parallel");
1277 Type *PtrTy = OMPIRBuilder->VoidPtr;
1281 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1285 Value *Args = ArgsAlloca;
1293 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1305 Value *Parallel51CallArgs[] = {
1309 NumThreads ? NumThreads : Builder.
getInt32(-1),
1312 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1315 Builder.
getInt64(NumCapturedVars)};
1320 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1335 I->eraseFromParent();
1357 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1358 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1366 F->addMetadata(LLVMContext::MD_callback,
1375 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1378 "Expected at least tid and bounded tid as arguments");
1379 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1382 CI->
getParent()->setName(
"omp_parallel");
1386 Value *ForkCallArgs[] = {
1387 Ident, Builder.
getInt32(NumCapturedVars),
1388 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1391 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1400 auto PtrTy = OMPIRBuilder->VoidPtr;
1401 if (IfCondition && NumCapturedVars == 0) {
1405 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1423 I->eraseFromParent();
1431 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1458 if (ProcBind != OMP_PROC_BIND_default) {
1462 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1490 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1495 "zero.addr.ascast");
1519 if (IP.getBlock()->end() == IP.getPoint()) {
1525 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1526 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1527 "Unexpected insertion point for finalization call!");
1563 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1566 assert(BodyGenCB &&
"Expected body generation callback!");
1568 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1571 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1577 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1579 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1580 ThreadID, ToBeDeletedVec);
1585 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1587 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1604 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1605 Blocks.push_back(PRegOutlinedExitBB);
1616 ".omp_par", ArgsInZeroAddressSpace);
1621 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1627 if (
auto *
GV = dyn_cast_if_present<GlobalVariable>(
I))
1628 return GV->getValueType() == OpenMPIRBuilder::Ident;
1633 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1639 if (&V == TIDAddr || &V == ZeroAddr) {
1645 for (
Use &U : V.uses())
1646 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1647 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1657 if (!V.getType()->isPointerTy()) {
1676 Value *ReplacementValue =
nullptr;
1677 CallInst *CI = dyn_cast<CallInst>(&V);
1679 ReplacementValue = PrivTID;
1682 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue);
1690 assert(ReplacementValue &&
1691 "Expected copy/create callback to set replacement value!");
1692 if (ReplacementValue == &V)
1697 UPtr->set(ReplacementValue);
1716 for (
Value *Input : Inputs) {
1718 if (
Error Err = PrivHelper(*Input))
1722 for (
Value *Output : Outputs)
1726 "OpenMP outlining should not produce live-out values!");
1728 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1731 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1739 assert(FiniInfo.DK == OMPD_parallel &&
1740 "Unexpected finalization stack state!");
1745 if (
Error Err = FiniCB(PreFiniIP))
1751 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1752 UI->eraseFromParent();
1818 if (Dependencies.
empty())
1838 Type *DependInfo = OMPBuilder.DependInfo;
1841 Value *DepArray =
nullptr;
1847 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1849 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1855 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1860 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1862 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1867 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1870 static_cast<unsigned int>(Dep.DepKind)),
1913 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1924 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1926 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1927 Mergeable, Priority, EventHandle, TaskAllocaBB,
1928 ToBeDeleted](
Function &OutlinedFn)
mutable {
1930 assert(OutlinedFn.getNumUses() == 1 &&
1931 "there must be a single user for the outlined function");
1932 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1936 bool HasShareds = StaleCI->
arg_size() > 1;
1984 assert(ArgStructAlloca &&
1985 "Unable to find the alloca instruction corresponding to arguments "
1986 "for extracted function");
1989 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1990 "arguments for extracted function");
1998 TaskAllocFn, {Ident, ThreadID,
Flags,
1999 TaskSize, SharedsSize,
2007 OMPRTL___kmpc_task_allow_completion_event);
2035 Constant *Zero = ConstantInt::get(Int32Ty, 0);
2044 TaskStructType, TaskGEP, {Zero, ConstantInt::get(Int32Ty, 4)});
2048 PriorityData, {Zero, Zero});
2052 Value *DepArray =
nullptr;
2053 if (Dependencies.
size()) {
2068 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2075 static_cast<unsigned int>(RTLDependInfoFields::Len));
2082 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2085 static_cast<unsigned int>(Dep.DepKind)),
2116 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2122 if (Dependencies.
size()) {
2146 if (Dependencies.
size()) {
2167 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2171 I->eraseFromParent();
2220 if (IP.getBlock()->end() != IP.getPoint())
2231 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2232 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2233 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2264 unsigned CaseNumber = 0;
2265 for (
auto SectionCB : SectionCBs) {
2283 Value *LB = ConstantInt::get(I32Ty, 0);
2284 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2285 Value *ST = ConstantInt::get(I32Ty, 1);
2287 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2292 applyStaticWorkshareLoop(Loc.
DL, *
LoopInfo, AllocaIP, !IsNowait);
2299 assert(FiniInfo.DK == OMPD_sections &&
2300 "Unexpected finalization stack state!");
2307 AfterIP = {FiniBB, FiniBB->
begin()};
2321 if (IP.getBlock()->end() != IP.getPoint())
2340 Directive OMPD = Directive::OMPD_sections;
2343 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2354Value *OpenMPIRBuilder::getGPUThreadID() {
2357 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2361Value *OpenMPIRBuilder::getGPUWarpSize() {
2366Value *OpenMPIRBuilder::getNVPTXWarpID() {
2371Value *OpenMPIRBuilder::getNVPTXLaneID() {
2373 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2374 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2379Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2384 assert(FromSize > 0 &&
"From size must be greater than zero");
2385 assert(ToSize > 0 &&
"To size must be greater than zero");
2386 if (FromType == ToType)
2388 if (FromSize == ToSize)
2403Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2408 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2412 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2416 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2417 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2418 Value *WarpSizeCast =
2420 Value *ShuffleCall =
2422 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2425void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2441 Value *ElemPtr = DstAddr;
2443 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2455 if ((
Size / IntSize) > 1) {
2479 Value *Res = createRuntimeShuffleFunction(
2488 Value *LocalElemPtr =
2495 Value *Res = createRuntimeShuffleFunction(
2509void OpenMPIRBuilder::emitReductionListCopy(
2510 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2512 CopyOptionsTy CopyOptions) {
2515 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2519 for (
auto En :
enumerate(ReductionInfos)) {
2520 const ReductionInfo &RI = En.value();
2521 Value *SrcElementAddr =
nullptr;
2522 Value *DestElementAddr =
nullptr;
2523 Value *DestElementPtrAddr =
nullptr;
2525 bool ShuffleInElement =
false;
2528 bool UpdateDestListPtr =
false;
2532 ReductionArrayTy, SrcBase,
2533 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2539 ReductionArrayTy, DestBase,
2540 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2546 ".omp.reduction.element");
2549 DestElementAddr = DestAlloca;
2552 DestElementAddr->
getName() +
".ascast");
2554 ShuffleInElement =
true;
2555 UpdateDestListPtr =
true;
2567 if (ShuffleInElement) {
2568 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2569 RemoteLaneOffset, ReductionArrayTy);
2571 switch (RI.EvaluationKind) {
2580 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2582 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2584 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2586 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2589 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2591 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2613 if (UpdateDestListPtr) {
2616 DestElementAddr->
getName() +
".ascast");
2632 "_omp_reduction_inter_warp_copy_func", &
M);
2655 "__openmp_nvptx_data_transfer_temporary_storage";
2659 if (!TransferMedium) {
2668 Value *GPUThreadID = getGPUThreadID();
2670 Value *LaneID = getNVPTXLaneID();
2672 Value *WarpID = getNVPTXWarpID();
2681 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2685 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2688 NumWarpsAlloca->
getName() +
".ascast");
2699 for (
auto En :
enumerate(ReductionInfos)) {
2704 const ReductionInfo &RI = En.value();
2706 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2709 unsigned NumIters = RealTySize / TySize;
2712 Value *Cnt =
nullptr;
2713 Value *CntAddr =
nullptr;
2723 CntAddr->
getName() +
".ascast");
2743 omp::Directive::OMPD_unknown,
2747 return BarrierIP1.takeError();
2758 auto *RedListArrayTy =
2764 {ConstantInt::get(IndexTy, 0),
2765 ConstantInt::get(IndexTy, En.index())});
2791 omp::Directive::OMPD_unknown,
2795 return BarrierIP2.takeError();
2802 Value *NumWarpsVal =
2805 Value *IsActiveThread =
2816 Value *TargetElemPtrPtr =
2818 {ConstantInt::get(IndexTy, 0),
2819 ConstantInt::get(IndexTy, En.index())});
2820 Value *TargetElemPtrVal =
2822 Value *TargetElemPtr = TargetElemPtrVal;
2828 Value *SrcMediumValue =
2847 RealTySize %= TySize;
2857Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2863 {Builder.getPtrTy(), Builder.getInt16Ty(),
2864 Builder.getInt16Ty(), Builder.getInt16Ty()},
2868 "_omp_reduction_shuffle_and_reduce_func", &
M);
2889 Type *ReduceListArgType = ReduceListArg->
getType();
2893 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2895 LaneIDArg->
getName() +
".addr");
2897 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2899 AlgoVerArg->
getName() +
".addr");
2906 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2909 ReduceListAlloca, ReduceListArgType,
2910 ReduceListAlloca->
getName() +
".ascast");
2912 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2914 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2915 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2917 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2920 RemoteReductionListAlloca->
getName() +
".ascast");
2929 Value *RemoteLaneOffset =
2938 emitReductionListCopy(
2940 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2971 Value *RemoteOffsetComp =
2988 ->addFnAttr(Attribute::NoUnwind);
3009 ReductionInfos, RemoteListAddrCast, ReduceList);
3022Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3029 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3033 "_omp_reduction_list_to_global_copy_func", &
M);
3050 BufferArg->
getName() +
".addr");
3057 BufferArgAlloca->
getName() +
".ascast");
3062 ReduceListArgAlloca->
getName() +
".ascast");
3068 Value *LocalReduceList =
3070 Value *BufferArgVal =
3075 for (
auto En :
enumerate(ReductionInfos)) {
3076 const ReductionInfo &RI = En.value();
3077 auto *RedListArrayTy =
3081 RedListArrayTy, LocalReduceList,
3082 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3090 ReductionsBufferTy, BufferVD, 0, En.index());
3092 switch (RI.EvaluationKind) {
3100 RI.ElementType, ElemPtr, 0, 0,
".realp");
3102 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3104 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3106 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3109 RI.ElementType, GlobVal, 0, 0,
".realp");
3111 RI.ElementType, GlobVal, 0, 1,
".imagp");
3132Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3139 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3143 "_omp_reduction_list_to_global_reduce_func", &
M);
3160 BufferArg->
getName() +
".addr");
3165 auto *RedListArrayTy =
3170 Value *LocalReduceList =
3175 BufferArgAlloca->
getName() +
".ascast");
3180 ReduceListArgAlloca->
getName() +
".ascast");
3183 LocalReduceList->
getName() +
".ascast");
3193 for (
auto En :
enumerate(ReductionInfos)) {
3195 RedListArrayTy, LocalReduceListAddrCast,
3196 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3201 ReductionsBufferTy, BufferVD, 0, En.index());
3209 ->addFnAttr(Attribute::NoUnwind);
3215Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3222 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3226 "_omp_reduction_global_to_list_copy_func", &
M);
3243 BufferArg->
getName() +
".addr");
3250 BufferArgAlloca->
getName() +
".ascast");
3255 ReduceListArgAlloca->
getName() +
".ascast");
3260 Value *LocalReduceList =
3266 for (
auto En :
enumerate(ReductionInfos)) {
3268 auto *RedListArrayTy =
3272 RedListArrayTy, LocalReduceList,
3273 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3280 ReductionsBufferTy, BufferVD, 0, En.index());
3323Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3330 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3334 "_omp_reduction_global_to_list_reduce_func", &
M);
3351 BufferArg->
getName() +
".addr");
3361 Value *LocalReduceList =
3366 BufferArgAlloca->
getName() +
".ascast");
3371 ReduceListArgAlloca->
getName() +
".ascast");
3374 LocalReduceList->
getName() +
".ascast");
3384 for (
auto En :
enumerate(ReductionInfos)) {
3386 RedListArrayTy, ReductionList,
3387 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3392 ReductionsBufferTy, BufferVD, 0, En.index());
3400 ->addFnAttr(Attribute::NoUnwind);
3406std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3407 std::string Suffix =
3409 return (
Name + Suffix).str();
3414 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3416 {Builder.getPtrTy(), Builder.getPtrTy()},
3418 std::string
Name = getReductionFuncName(ReducerName);
3430 Value *LHSArrayPtr =
nullptr;
3431 Value *RHSArrayPtr =
nullptr;
3442 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3444 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3454 for (
auto En :
enumerate(ReductionInfos)) {
3455 const ReductionInfo &RI = En.value();
3457 RedArrayTy, RHSArrayPtr,
3458 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3461 RHSI8Ptr, RI.PrivateVariable->getType(),
3462 RHSI8Ptr->
getName() +
".ascast");
3465 RedArrayTy, LHSArrayPtr,
3466 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3469 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3481 return AfterIP.takeError();
3483 return ReductionFunc;
3489 for (
auto En :
enumerate(ReductionInfos)) {
3490 unsigned Index = En.index();
3491 const ReductionInfo &RI = En.value();
3492 Value *LHSFixupPtr, *RHSFixupPtr;
3494 Builder.
saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3499 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3500 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3504 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3505 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3511 return ReductionFunc;
3519 assert(RI.Variable &&
"expected non-null variable");
3520 assert(RI.PrivateVariable &&
"expected non-null private variable");
3521 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3522 "expected non-null reduction generator callback");
3525 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3526 "expected variables and their private equivalents to have the same "
3529 assert(RI.Variable->getType()->isPointerTy() &&
3530 "expected variables to be pointers");
3537 bool IsNoWait,
bool IsTeamsReduction,
bool HasDistribute,
3539 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3553 if (ReductionInfos.
size() == 0)
3568 if (!ReductionResult)
3570 Function *ReductionFunc = *ReductionResult;
3574 if (GridValue.has_value())
3592 Value *ReductionListAlloca =
3595 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3599 for (
auto En :
enumerate(ReductionInfos)) {
3602 RedArrayTy, ReductionList,
3603 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3610 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3612 emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3620 unsigned MaxDataSize = 0;
3622 for (
auto En :
enumerate(ReductionInfos)) {
3624 if (
Size > MaxDataSize)
3626 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3628 Value *ReductionDataSize =
3630 if (!IsTeamsReduction) {
3631 Value *SarFuncCast =
3635 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3638 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3643 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3645 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3646 Function *LtGCFunc = emitListToGlobalCopyFunction(
3647 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3648 Function *LtGRFunc = emitListToGlobalReduceFunction(
3649 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3650 Function *GtLCFunc = emitGlobalToListCopyFunction(
3651 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3652 Function *GtLRFunc = emitGlobalToListReduceFunction(
3653 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3657 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3659 Value *Args3[] = {SrcLocInfo,
3660 KernelTeamsReductionPtr,
3672 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3689 for (
auto En :
enumerate(ReductionInfos)) {
3696 Value *LHSPtr, *RHSPtr;
3698 &LHSPtr, &RHSPtr, CurFunc));
3703 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3707 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3711 assert(
false &&
"Unhandled ReductionGenCBKind");
3727 ".omp.reduction.func", &M);
3738 assert(RI.Variable &&
"expected non-null variable");
3739 assert(RI.PrivateVariable &&
"expected non-null private variable");
3740 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
3741 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3742 "expected variables and their private equivalents to have the same "
3744 assert(RI.Variable->getType()->isPointerTy() &&
3745 "expected variables to be pointers");
3758 unsigned NumReductions = ReductionInfos.
size();
3765 for (
auto En :
enumerate(ReductionInfos)) {
3766 unsigned Index = En.index();
3769 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3784 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3789 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3792 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3794 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3795 : RuntimeFunction::OMPRTL___kmpc_reduce);
3798 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3799 ReductionFunc, Lock},
3818 for (
auto En :
enumerate(ReductionInfos)) {
3824 if (!IsByRef[En.index()]) {
3826 "red.value." +
Twine(En.index()));
3828 Value *PrivateRedValue =
3830 "red.private.value." +
Twine(En.index()));
3841 if (!IsByRef[En.index()])
3845 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3846 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3854 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3878 for (
auto En :
enumerate(ReductionInfos)) {
3881 RedArrayTy, LHSArrayPtr, 0, En.index());
3886 RedArrayTy, RHSArrayPtr, 0, En.index());
3900 if (!IsByRef[En.index()])
3916 Directive OMPD = Directive::OMPD_master;
3921 Value *Args[] = {Ident, ThreadId};
3929 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3940 Directive OMPD = Directive::OMPD_masked;
3946 Value *ArgsEnd[] = {Ident, ThreadId};
3954 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3989 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4002 "omp_" +
Name +
".next",
true);
4013 CL->Header = Header;
4032 NextBB, NextBB,
Name);
4057 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4067 auto *IndVarTy = cast<IntegerType>(Start->getType());
4068 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4069 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4075 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
4103 Value *CountIfLooping;
4104 if (InclusiveStop) {
4114 "omp_" +
Name +
".tripcount");
4135 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4138 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4144 InsertPointTy AllocaIP,
4145 bool NeedsBarrier) {
4146 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4148 "Require dedicated allocate IP");
4160 Type *IVTy =
IV->getType();
4180 Constant *One = ConstantInt::get(IVTy, 1);
4188 Constant *SchedulingType = ConstantInt::get(
4189 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
4194 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4195 PUpperBound, PStride, One,
Zero});
4200 CLI->setTripCount(TripCount);
4222 omp::Directive::OMPD_for,
false,
4225 return BarrierIP.takeError();
4235OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4237 InsertPointTy AllocaIP,
4240 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4241 assert(ChunkSize &&
"Chunk size is required");
4246 Type *IVTy =
IV->getType();
4248 "Max supported tripcount bitwidth is 64 bits");
4250 :
Type::getInt64Ty(Ctx);
4253 Constant *One = ConstantInt::get(InternalIVTy, 1);
4265 Value *PLowerBound =
4267 Value *PUpperBound =
4276 Value *CastedChunkSize =
4278 Value *CastedTripCount =
4281 Constant *SchedulingType = ConstantInt::get(
4282 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4296 SchedulingType, PLastIter,
4297 PLowerBound, PUpperBound,
4302 Value *FirstChunkStart =
4304 Value *FirstChunkStop =
4309 Value *NextChunkStride =
4314 Value *DispatchCounter;
4322 DispatchCounter = Counter;
4325 FirstChunkStart, CastedTripCount, NextChunkStride,
4349 Value *IsLastChunk =
4351 Value *CountUntilOrigTripCount =
4354 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4355 Value *BackcastedChunkTC =
4357 CLI->setTripCount(BackcastedChunkTC);
4362 Value *BackcastedDispatchCounter =
4379 return AfterIP.takeError();
4400 case WorksharingLoopType::ForStaticLoop:
4403 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4406 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4408 case WorksharingLoopType::DistributeStaticLoop:
4411 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4414 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4416 case WorksharingLoopType::DistributeForStaticLoop:
4419 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4422 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4425 if (Bitwidth != 32 && Bitwidth != 64) {
4447 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4448 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4453 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4454 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4459 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4460 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4461 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4498 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
4506 "Expected unique undroppable user of outlined function");
4507 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
4508 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
4510 "Expected outlined function call to be located in loop preheader");
4512 if (OutlinedFnCallInstruction->
arg_size() > 1)
4519 LoopBodyArg, ParallelTaskPtr, TripCount,
4522 for (
auto &ToBeDeletedItem : ToBeDeleted)
4523 ToBeDeletedItem->eraseFromParent();
4529 InsertPointTy AllocaIP,
4542 OI.OuterAllocaBB = AllocaIP.getBlock();
4547 "omp.prelatch",
true);
4567 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
4569 ParallelRegionBlockSet.
end());
4589 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
4598 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
4599 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
4605 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
4612 OI.PostOutlineCB = [=, ToBeDeletedVec =
4613 std::move(ToBeDeleted)](
Function &OutlinedFn) {
4615 ToBeDeletedVec, LoopType);
4623 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
4624 bool HasSimdModifier,
bool HasMonotonicModifier,
4625 bool HasNonmonotonicModifier,
bool HasOrderedClause,
4628 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
4630 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
4631 HasNonmonotonicModifier, HasOrderedClause);
4633 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
4634 OMPScheduleType::ModifierOrdered;
4635 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
4636 case OMPScheduleType::BaseStatic:
4637 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
4639 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4640 NeedsBarrier, ChunkSize);
4642 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
4644 case OMPScheduleType::BaseStaticChunked:
4646 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4647 NeedsBarrier, ChunkSize);
4649 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
4652 case OMPScheduleType::BaseRuntime:
4653 case OMPScheduleType::BaseAuto:
4654 case OMPScheduleType::BaseGreedy:
4655 case OMPScheduleType::BaseBalanced:
4656 case OMPScheduleType::BaseSteal:
4657 case OMPScheduleType::BaseGuidedSimd:
4658 case OMPScheduleType::BaseRuntimeSimd:
4660 "schedule type does not support user-defined chunk sizes");
4662 case OMPScheduleType::BaseDynamicChunked:
4663 case OMPScheduleType::BaseGuidedChunked:
4664 case OMPScheduleType::BaseGuidedIterativeChunked:
4665 case OMPScheduleType::BaseGuidedAnalyticalChunked:
4666 case OMPScheduleType::BaseStaticBalancedChunked:
4667 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4668 NeedsBarrier, ChunkSize);
4684 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
4687 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
4700 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
4703 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
4715 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
4718 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
4724 InsertPointTy AllocaIP,
4726 bool NeedsBarrier,
Value *Chunk) {
4727 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4729 "Require dedicated allocate IP");
4731 "Require valid schedule type");
4733 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
4734 OMPScheduleType::ModifierOrdered;
4745 Type *IVTy =
IV->getType();
4763 Constant *One = ConstantInt::get(IVTy, 1);
4784 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4788 {SrcLoc, ThreadNum, SchedulingType, One,
4789 UpperBound, One, Chunk});
4799 PLowerBound, PUpperBound, PStride});
4800 Constant *Zero32 = ConstantInt::get(I32Type, 0);
4809 auto *PI = cast<PHINode>(Phi);
4810 PI->setIncomingBlock(0, OuterCond);
4811 PI->setIncomingValue(0, LowerBound);
4815 auto *Br = cast<BranchInst>(Term);
4816 Br->setSuccessor(0, OuterCond);
4824 auto *CI = cast<CmpInst>(Comp);
4825 CI->setOperand(1, UpperBound);
4828 auto *BI = cast<BranchInst>(Branch);
4829 assert(BI->getSuccessor(1) == Exit);
4830 BI->setSuccessor(1, OuterCond);
4844 omp::Directive::OMPD_for,
false,
4847 return BarrierIP.takeError();
4866 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
4867 for (
Use &U : BB->uses()) {
4868 auto *UseInst = dyn_cast<Instruction>(U.getUser());
4871 if (BBsToErase.count(UseInst->getParent()))
4878 while (BBsToErase.remove_if(HasRemainingUses)) {
4889 assert(
Loops.size() >= 1 &&
"At least one loop required");
4890 size_t NumLoops =
Loops.size();
4894 return Loops.front();
4906 Loop->collectControlBlocks(OldControlBBs);
4910 if (ComputeIP.
isSet())
4917 Value *CollapsedTripCount =
nullptr;
4920 "All loops to collapse must be valid canonical loops");
4921 Value *OrigTripCount = L->getTripCount();
4922 if (!CollapsedTripCount) {
4923 CollapsedTripCount = OrigTripCount;
4935 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
4943 Value *Leftover = Result->getIndVar();
4945 NewIndVars.
resize(NumLoops);
4946 for (
int i = NumLoops - 1; i >= 1; --i) {
4947 Value *OrigTripCount =
Loops[i]->getTripCount();
4950 NewIndVars[i] = NewIndVar;
4955 NewIndVars[0] = Leftover;
4964 BasicBlock *ContinueBlock = Result->getBody();
4966 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
4973 ContinueBlock =
nullptr;
4974 ContinuePred = NextSrc;
4981 for (
size_t i = 0; i < NumLoops - 1; ++i)
4982 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
4988 for (
size_t i = NumLoops - 1; i > 0; --i)
4989 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
4992 ContinueWith(Result->getLatch(),
nullptr);
4999 for (
size_t i = 0; i < NumLoops; ++i)
5000 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5014std::vector<CanonicalLoopInfo *>
5018 "Must pass as many tile sizes as there are loops");
5019 int NumLoops =
Loops.size();
5020 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5032 Loop->collectControlBlocks(OldControlBBs);
5040 assert(L->isValid() &&
"All input loops must be valid canonical loops");
5041 OrigTripCounts.
push_back(L->getTripCount());
5052 for (
int i = 0; i < NumLoops - 1; ++i) {
5065 for (
int i = 0; i < NumLoops; ++i) {
5067 Value *OrigTripCount = OrigTripCounts[i];
5080 Value *FloorTripOverflow =
5086 "omp_floor" +
Twine(i) +
".tripcount",
true);
5094 std::vector<CanonicalLoopInfo *> Result;
5095 Result.reserve(NumLoops * 2);
5108 auto EmbeddNewLoop =
5109 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5112 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
5117 Enter = EmbeddedLoop->
getBody();
5119 OutroInsertBefore = EmbeddedLoop->
getLatch();
5120 return EmbeddedLoop;
5124 const Twine &NameBase) {
5127 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5128 Result.push_back(EmbeddedLoop);
5132 EmbeddNewLoops(FloorCount,
"floor");
5138 for (
int i = 0; i < NumLoops; ++i) {
5142 Value *FloorIsEpilogue =
5144 Value *TileTripCount =
5151 EmbeddNewLoops(TileCounts,
"tile");
5156 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5165 BodyEnter =
nullptr;
5166 BodyEntered = ExitBB;
5179 for (
int i = 0; i < NumLoops; ++i) {
5182 Value *OrigIndVar = OrigIndVars[i];
5210 if (Properties.
empty())
5233 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5237 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5245 if (
I.mayReadOrWriteMemory()) {
5249 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5271 const Twine &NamePrefix) {
5318 VMap[
Block] = NewBB;
5328 if (TargetTriple.
isX86()) {
5329 if (Features.
lookup(
"avx512f"))
5331 else if (Features.
lookup(
"avx"))
5335 if (TargetTriple.
isPPC())
5337 if (TargetTriple.
isWasm())
5344 Value *IfCond, OrderKind Order,
5363 if (AlignedVars.
size()) {
5365 for (
auto &AlignedItem : AlignedVars) {
5366 Value *AlignedPtr = AlignedItem.first;
5367 Value *Alignment = AlignedItem.second;
5368 Instruction *loadInst = dyn_cast<Instruction>(AlignedPtr);
5378 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
5382 "Cannot find value which corresponds to original loop latch");
5383 assert(isa<BasicBlock>(MappedLatch) &&
5384 "Cannot cast mapped latch block value to BasicBlock");
5385 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
5414 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5422 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5430 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
5432 if (Simdlen || Safelen) {
5436 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
5462static std::unique_ptr<TargetMachine>
5466 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
5467 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
5468 const std::string &
Triple = M->getTargetTriple();
5478 std::nullopt, OptLevel));
5502 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
5517 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
5522 nullptr, ORE,
static_cast<int>(OptLevel),
5543 <<
" Threshold=" << UP.
Threshold <<
"\n"
5546 <<
" PartialOptSizeThreshold="
5565 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
5566 Ptr = Load->getPointerOperand();
5567 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
5568 Ptr = Store->getPointerOperand();
5572 Ptr =
Ptr->stripPointerCasts();
5574 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
5575 if (Alloca->getParent() == &
F->getEntryBlock())
5595 int MaxTripCount = 0;
5596 bool MaxOrZero =
false;
5597 unsigned TripMultiple = 0;
5599 bool UseUpperBound =
false;
5601 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
5603 unsigned Factor = UP.
Count;
5604 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
5615 assert(Factor >= 0 &&
"Unroll factor must not be negative");
5631 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
5644 *UnrolledCLI =
Loop;
5649 "unrolling only makes sense with a factor of 2 or larger");
5651 Type *IndVarTy =
Loop->getIndVarType();
5658 std::vector<CanonicalLoopInfo *>
LoopNest =
5673 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
5676 (*UnrolledCLI)->assertOK();
5694 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
5713 if (!CPVars.
empty()) {
5718 Directive OMPD = Directive::OMPD_single;
5723 Value *Args[] = {Ident, ThreadId};
5732 if (
Error Err = FiniCB(IP))
5753 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
5760 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
5763 ConstantInt::get(
Int64, 0), CPVars[
I],
5766 }
else if (!IsNowait) {
5769 omp::Directive::OMPD_unknown,
false,
5784 Directive OMPD = Directive::OMPD_critical;
5789 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
5790 Value *Args[] = {Ident, ThreadId, LockVar};
5807 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5815 const Twine &
Name,
bool IsDependSource) {
5818 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
5819 "OpenMP runtime requires depend vec with i64 type");
5832 for (
unsigned I = 0;
I < NumLoops; ++
I) {
5846 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
5864 Directive OMPD = Directive::OMPD_ordered;
5873 Value *Args[] = {Ident, ThreadId};
5883 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5889 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
5890 bool HasFinalize,
bool IsCancellable) {
5899 if (!isa_and_nonnull<BranchInst>(SplitPos))
5906 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
5917 "Unexpected control flow graph state!!");
5919 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
5921 return AfterIP.takeError();
5923 "Unexpected Control Flow State!");
5929 "Unexpected Insertion point location!");
5932 auto InsertBB = merged ? ExitPredBB : ExitBB;
5933 if (!isa_and_nonnull<BranchInst>(SplitPos))
5943 if (!Conditional || !EntryCall)
5963 UI->eraseFromParent();
5971 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
5979 "Unexpected finalization stack state!");
5982 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
5984 if (
Error Err = Fi.FiniCB(FinIP))
6033 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
6035 "copyin.not.master.end");
6090 Value *DependenceAddress,
bool HaveNowaitClause) {
6098 if (Device ==
nullptr)
6100 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6101 if (NumDependences ==
nullptr) {
6102 NumDependences = ConstantInt::get(
Int32, 0);
6106 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6108 Ident, ThreadId, InteropVar, InteropTypeVal,
6109 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6118 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6126 if (Device ==
nullptr)
6128 if (NumDependences ==
nullptr) {
6129 NumDependences = ConstantInt::get(
Int32, 0);
6133 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6135 Ident, ThreadId, InteropVar, Device,
6136 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6145 Value *NumDependences,
6146 Value *DependenceAddress,
6147 bool HaveNowaitClause) {
6154 if (Device ==
nullptr)
6156 if (NumDependences ==
nullptr) {
6157 NumDependences = ConstantInt::get(
Int32, 0);
6161 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6163 Ident, ThreadId, InteropVar, Device,
6164 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6194 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
6195 "expected num_threads and num_teams to be specified");
6214 const std::string DebugPrefix =
"_debug__";
6215 if (KernelName.
ends_with(DebugPrefix)) {
6216 KernelName = KernelName.
drop_back(DebugPrefix.length());
6223 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
6228 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
6229 if (MaxThreadsVal < 0)
6230 MaxThreadsVal = std::max(
6233 if (MaxThreadsVal > 0)
6244 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6247 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6248 Constant *DynamicEnvironmentInitializer =
6252 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6254 DL.getDefaultGlobalsAddressSpace());
6258 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6259 ? DynamicEnvironmentGV
6261 DynamicEnvironmentPtr);
6264 ConfigurationEnvironment, {
6265 UseGenericStateMachineVal,
6266 MayUseNestedParallelismVal,
6273 ReductionBufferLength,
6276 KernelEnvironment, {
6277 ConfigurationEnvironmentInitializer,
6281 std::string KernelEnvironmentName =
6282 (KernelName +
"_kernel_environment").str();
6285 KernelEnvironmentInitializer, KernelEnvironmentName,
6287 DL.getDefaultGlobalsAddressSpace());
6291 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6292 ? KernelEnvironmentGV
6294 KernelEnvironmentPtr);
6295 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6323 UI->eraseFromParent();
6331 int32_t TeamsReductionDataSize,
6332 int32_t TeamsReductionBufferLength) {
6337 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6341 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6347 const std::string DebugPrefix =
"_debug__";
6349 KernelName = KernelName.
drop_back(DebugPrefix.length());
6350 auto *KernelEnvironmentGV =
6352 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6353 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6355 KernelEnvironmentInitializer,
6356 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6358 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6360 KernelEnvironmentGV->setInitializer(NewInitializer);
6365 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6369 auto *KernelOp = dyn_cast<ConstantAsMetadata>(
Op->getOperand(0));
6370 if (!KernelOp || KernelOp->getValue() != &
Kernel)
6372 auto *Prop = dyn_cast<MDString>(
Op->getOperand(1));
6373 if (!Prop || Prop->getString() !=
Name)
6385 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->
getOperand(2));
6386 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6389 OldVal->getValue()->getType(),
6390 Min ? std::min(OldLimit,
Value) : std::max(OldLimit,
Value))));
6399 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6404std::pair<int32_t, int32_t>
6406 int32_t ThreadLimit =
6411 if (!Attr.isValid() || !Attr.isStringAttribute())
6412 return {0, ThreadLimit};
6415 if (!llvm::to_integer(UBStr, UB, 10))
6416 return {0, ThreadLimit};
6417 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6418 if (!llvm::to_integer(LBStr, LB, 10))
6424 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6425 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6426 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6428 return {0, ThreadLimit};
6438 llvm::utostr(LB) +
"," + llvm::utostr(UB));
6445std::pair<int32_t, int32_t>
6452 int32_t LB, int32_t UB) {
6457 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
6462void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
6477 assert(OutlinedFn &&
"The outlined function must exist if embedded");
6486Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
6492 "Named kernel already exists?");
6510 OutlinedFn = *CBResult;
6512 OutlinedFn =
nullptr;
6518 if (!IsOffloadEntry)
6521 std::string EntryFnIDName =
6523 ? std::string(EntryFnName)
6527 EntryFnName, EntryFnIDName);
6535 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
6536 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
6537 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
6539 EntryInfo, EntryAddr, OutlinedFnID,
6541 return OutlinedFnID;
6570 bool IsStandAlone = !BodyGenCB;
6596 SrcLocInfo, DeviceID,
6603 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
6607 if (
Info.HasNoWait) {
6617 if (
Info.HasNoWait) {
6621 emitBlock(OffloadContBlock, CurFn,
true);
6627 bool RequiresOuterTargetTask =
Info.HasNoWait;
6628 if (!RequiresOuterTargetTask)
6629 cantFail(TaskBodyCB(
nullptr,
nullptr,
6633 {},
Info.HasNoWait));
6636 omp::OMPRTL___tgt_target_data_begin_mapper);
6640 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
6641 if (isa<AllocaInst>(DeviceMap.second.second)) {
6689 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6711 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
6727 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
6731 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
6743 bool IsGPUDistribute) {
6744 assert((IVSize == 32 || IVSize == 64) &&
6745 "IV size is not compatible with the omp runtime");
6747 if (IsGPUDistribute)
6749 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
6750 : omp::OMPRTL___kmpc_distribute_static_init_4u)
6751 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
6752 : omp::OMPRTL___kmpc_distribute_static_init_8u);
6754 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
6755 : omp::OMPRTL___kmpc_for_static_init_4u)
6756 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
6757 : omp::OMPRTL___kmpc_for_static_init_8u);
6764 assert((IVSize == 32 || IVSize == 64) &&
6765 "IV size is not compatible with the omp runtime");
6767 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
6768 : omp::OMPRTL___kmpc_dispatch_init_4u)
6769 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
6770 : omp::OMPRTL___kmpc_dispatch_init_8u);
6777 assert((IVSize == 32 || IVSize == 64) &&
6778 "IV size is not compatible with the omp runtime");
6780 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
6781 : omp::OMPRTL___kmpc_dispatch_next_4u)
6782 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
6783 : omp::OMPRTL___kmpc_dispatch_next_8u);
6790 assert((IVSize == 32 || IVSize == 64) &&
6791 "IV size is not compatible with the omp runtime");
6793 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
6794 : omp::OMPRTL___kmpc_dispatch_fini_4u)
6795 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
6796 : omp::OMPRTL___kmpc_dispatch_fini_8u);
6820 for (
auto &Arg : Inputs)
6821 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
6825 for (
auto &Arg : Inputs)
6826 ParameterTypes.
push_back(Arg->getType());
6830 auto M = BB->getModule();
6841 if (TargetCpuAttr.isStringAttribute())
6842 Func->addFnAttr(TargetCpuAttr);
6844 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
6845 if (TargetFeaturesAttr.isStringAttribute())
6846 Func->addFnAttr(TargetFeaturesAttr);
6851 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
6867 DB.createSubroutineType(DB.getOrCreateTypeArray({}));
6869 DISubprogram::SPFlagOptimized |
6870 DISubprogram::SPFlagLocalToUnit;
6873 CU, FuncName, FuncName, SP->getFile(),
DL.getLine(), Ty,
6874 DL.getLine(), DINode::DIFlags::FlagArtificial, SPFlags);
6877 Func->setSubprogram(OutlinedSP);
6882 OutlinedSP,
DL.getInlinedAt()));
6905 splitBB(Builder,
true,
"outlined.body");
6920 auto AllocaIP = Builder.
saveIP();
6925 const auto &ArgRange =
6927 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
6948 if (
auto *Const = dyn_cast<Constant>(Input))
6953 if (
auto *Instr = dyn_cast<Instruction>(
User))
6954 if (Instr->getFunction() == Func)
6955 Instr->replaceUsesOfWith(Input, InputCopy);
6961 for (
auto InArg :
zip(Inputs, ArgRange)) {
6962 Value *Input = std::get<0>(InArg);
6963 Argument &Arg = std::get<1>(InArg);
6964 Value *InputCopy =
nullptr;
6967 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP());
6988 if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
6989 llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
6990 llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
6991 DeferredReplacement.
push_back(std::make_pair(Input, InputCopy));
6995 ReplaceValue(Input, InputCopy, Func);
6999 for (
auto Deferred : DeferredReplacement)
7000 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7040 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7041 Type *TaskTy = OMPBuilder.Task;
7046 ".omp_target_task_proxy_func",
7048 ProxyFn->getArg(0)->setName(
"thread.id");
7049 ProxyFn->getArg(1)->setName(
"task");
7055 bool HasShareds = StaleCI->
arg_size() > 1;
7061 "StaleCI with shareds should have exactly two arguments.");
7063 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
7064 assert(ArgStructAlloca &&
7065 "Unable to find the alloca instruction corresponding to arguments "
7066 "for extracted function");
7067 auto *ArgStructType = cast<StructType>(ArgStructAlloca->getAllocatedType());
7070 Builder.
CreateAlloca(ArgStructType,
nullptr,
"structArg");
7071 Value *TaskT = ProxyFn->getArg(1);
7072 Value *ThreadId = ProxyFn->getArg(0);
7073 Value *SharedsSize =
7074 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7081 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7084 Builder.
CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
7102 EntryFnName, Inputs, CBFunc,
7107 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7215 TargetTaskAllocaBB->
begin());
7219 OI.
EntryBB = TargetTaskAllocaBB;
7225 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7229 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7233 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, HasNoWait,
7234 DeviceID](
Function &OutlinedFn)
mutable {
7236 "there must be a single user for the outlined function");
7239 bool HasShareds = StaleCI->
arg_size() > 1;
7243 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
7262 OMPRTL___kmpc_omp_target_task_alloc);
7280 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
7281 assert(ArgStructAlloca &&
7282 "Unable to find the alloca instruction corresponding to arguments "
7283 "for extracted function");
7284 auto *ArgStructType =
7285 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
7286 assert(ArgStructType &&
"Unable to find struct type corresponding to "
7287 "arguments for extracted function");
7308 TaskSize, SharedsSize,
7356 }
else if (DepArray) {
7375 I->eraseFromParent();
7393 DeviceAddrCB, CustomMapperCB);
7406 bool HasNoWait =
false) {
7417 bool HasDependencies = Dependencies.size() > 0;
7418 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7437 EmitTargetCallFallbackCB, KArgs,
7438 DeviceID, RTLoc, TargetTaskAllocaIP);
7441 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
7448 auto &&EmitTargetCallElse =
7454 if (RequiresOuterTargetTask) {
7460 Dependencies, HasNoWait);
7462 return EmitTargetCallFallbackCB(Builder.
saveIP());
7469 auto &&EmitTargetCallThen =
7486 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
7491 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
7508 Value *MaxThreadsClause =
7510 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
7513 for (
auto [TeamsVal, TargetVal] :
zip_equal(
7515 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
7516 Value *NumThreads = InitMaxThreadsClause(TargetVal);
7518 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
7519 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
7524 unsigned NumTargetItems =
Info.NumberOfPtrs;
7542 NumTeamsC, NumThreadsC,
7543 DynCGGroupMem, HasNoWait);
7550 if (RequiresOuterTargetTask)
7551 return OMPBuilder.
emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
7552 Dependencies, HasNoWait);
7555 EmitTargetCallFallbackCB, KArgs,
7556 DeviceID, RTLoc, AllocaIP);
7566 if (!OutlinedFnID) {
7578 EmitTargetCallElse, AllocaIP));
7602 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
7603 OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB))
7611 OutlinedFn, OutlinedFnID, Args, GenMapInfoCB, Dependencies,
7626 return OS.str().str();
7640 assert(Elem.second->getValueType() == Ty &&
7641 "OMP internal variable has different type than requested");
7657 GV->setAlignment(std::max(TypeAlign, PtrAlign));
7664Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
7665 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
7666 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
7677 return SizePtrToInt;
7682 std::string VarName) {
7690 return MaptypesArrayGlobal;
7695 unsigned NumOperands,
7704 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
7708 ArrI64Ty,
nullptr,
".offload_sizes");
7719 int64_t DeviceID,
unsigned NumOperands) {
7725 Value *ArgsBaseGEP =
7727 {Builder.getInt32(0), Builder.getInt32(0)});
7730 {Builder.getInt32(0), Builder.getInt32(0)});
7731 Value *ArgSizesGEP =
7733 {Builder.getInt32(0), Builder.getInt32(0)});
7739 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
7746 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
7747 "expected region end call to runtime only when end call is separate");
7749 auto VoidPtrTy = UnqualPtrTy;
7750 auto VoidPtrPtrTy = UnqualPtrTy;
7752 auto Int64PtrTy = UnqualPtrTy;
7754 if (!
Info.NumberOfPtrs) {
7766 Info.RTArgs.BasePointersArray,
7777 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
7778 :
Info.RTArgs.MapTypesArray,
7784 if (!
Info.EmitDebug)
7793 if (!
Info.HasMapper)
7818 "struct.descriptor_dim");
7820 enum { OffsetFD = 0, CountFD, StrideFD };
7824 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
7827 if (NonContigInfo.
Dims[
I] == 1)
7834 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
7835 unsigned RevIdx = EE -
II - 1;
7838 {Builder.getInt64(0), Builder.getInt64(II)});
7842 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
7847 NonContigInfo.
Counts[L][RevIdx], CountLVal,
7852 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
7861 Info.RTArgs.PointersArray, 0,
I);
7868void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
7872 StringRef Prefix = IsInit ?
".init" :
".del";
7882 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7883 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
7893 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7894 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
7919 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7920 OpenMPOffloadMappingFlags::OMP_MAP_TO |
7921 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
7925 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7926 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
7930 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
7931 ArraySize, MapTypeArg, MapName};
7958 MapperFn->
addFnAttr(Attribute::NoInline);
7959 MapperFn->
addFnAttr(Attribute::NoUnwind);
7990 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
7991 MapType, MapName, ElementSize, HeadBB,
8017 Value *OffloadingArgs[] = {MapperHandle};
8021 Value *ShiftedPreviousSize =
8025 for (
unsigned I = 0;
I <
Info.BasePointers.size(); ++
I) {
8028 Value *CurBeginArg =
8037 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8039 Value *MemberMapType =
8057 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8058 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8059 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8075 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8076 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8077 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8083 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8084 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8091 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8092 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8098 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8099 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8106 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8107 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8118 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8119 CurSizeArg, CurMapType, CurNameArg};
8121 if (CustomMapperCB && CustomMapperCB(
I, &ChildMapperFn)) {
8136 "omp.arraymap.next");
8145 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8146 MapType, MapName, ElementSize, DoneBB,
8164 Info.clearArrayInfo();
8167 if (
Info.NumberOfPtrs == 0)
8177 PointerArrayType,
nullptr,
".offload_baseptrs");
8180 PointerArrayType,
nullptr,
".offload_ptrs");
8182 PointerArrayType,
nullptr,
".offload_mappers");
8183 Info.RTArgs.MappersArray = MappersArray;
8190 ConstantInt::get(Int64Ty, 0));
8192 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
8193 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
8194 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
8195 if (IsNonContiguous &&
8196 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8198 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
8206 RuntimeSizes.
set(
I);
8209 if (RuntimeSizes.
all()) {
8212 SizeArrayType,
nullptr,
".offload_sizes");
8218 auto *SizesArrayGbl =
8223 if (!RuntimeSizes.
any()) {
8224 Info.RTArgs.SizesArray = SizesArrayGbl;
8230 SizeArrayType,
nullptr,
".offload_sizes");
8235 SizesArrayGbl, OffloadSizeAlign,
8240 Info.RTArgs.SizesArray = Buffer;
8248 for (
auto mapFlag : CombinedInfo.
Types)
8250 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8254 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
8259 auto *MapNamesArrayGbl =
8261 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
8262 Info.EmitDebug =
true;
8264 Info.RTArgs.MapNamesArray =
8266 Info.EmitDebug =
false;
8271 if (
Info.separateBeginEndCalls()) {
8272 bool EndMapTypesDiffer =
false;
8274 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8275 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
8276 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8277 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8278 EndMapTypesDiffer =
true;
8281 if (EndMapTypesDiffer) {
8283 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
8288 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
8296 if (
Info.requiresDevicePointerInfo()) {
8303 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
8305 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
8307 DeviceAddrCB(
I, BP);
8319 if (RuntimeSizes.
test(
I)) {
8333 if (
Value *CustomMFunc = CustomMapperCB(
I))
8337 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
8343 Info.NumberOfPtrs == 0)
8388 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
8389 auto CondConstant = CI->getSExtValue();
8421bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
8425 "Unexpected Atomic Ordering.");
8489 assert(
X.Var->getType()->isPointerTy() &&
8490 "OMP Atomic expects a pointer to target memory");
8491 Type *XElemTy =
X.ElemTy;
8494 "OMP atomic read expected a scalar type");
8496 Value *XRead =
nullptr;
8502 XRead = cast<Value>(XLD);
8515 XRead = AtomicLoadRes.first;
8530 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
8531 if (XRead->
getType() != V.Var->getType())
8544 assert(
X.Var->getType()->isPointerTy() &&
8545 "OMP Atomic expects a pointer to target memory");
8546 Type *XElemTy =
X.ElemTy;
8549 "OMP atomic write expected a scalar type");
8564 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
8577 Type *XTy =
X.Var->getType();
8579 "OMP Atomic expects a pointer to target memory");
8580 Type *XElemTy =
X.ElemTy;
8583 "OMP atomic update expected a scalar type");
8586 "OpenMP atomic does not support LT or GT operations");
8590 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8591 X.IsVolatile, IsXBinopExpr);
8593 return AtomicResult.takeError();
8594 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
8599Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
8636 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
8639 bool emitRMWOp =
false;
8647 emitRMWOp = XElemTy;
8650 emitRMWOp = (IsXBinopExpr && XElemTy);
8657 std::pair<Value *, Value *> Res;
8664 Res.second = Res.first;
8666 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
8679 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
8686 X->getName() +
".atomic.cont");
8690 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
8693 PHI->addIncoming(AtomicLoadRes.first, CurBB);
8698 Value *Upd = *CBResult;
8702 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
8703 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
8708 Res.first = OldExprVal;
8735 X->getName() +
".atomic.cont");
8739 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
8742 PHI->addIncoming(OldVal, CurBB);
8748 X->getName() +
".atomic.fltCast");
8751 X->getName() +
".atomic.ptrCast");
8758 Value *Upd = *CBResult;
8765 Result->setVolatile(VolatileX);
8771 Res.first = OldExprVal;
8791 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
8796 Type *XTy =
X.Var->getType();
8798 "OMP Atomic expects a pointer to target memory");
8799 Type *XElemTy =
X.ElemTy;
8802 "OMP atomic capture expected a scalar type");
8804 "OpenMP atomic does not support LT or GT operations");
8811 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
8812 X.IsVolatile, IsXBinopExpr);
8815 Value *CapturedVal =
8816 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
8817 if (CapturedVal->
getType() != V.Var->getType())
8821 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
8833 IsPostfixUpdate, IsFailOnly, Failure);
8845 assert(
X.Var->getType()->isPointerTy() &&
8846 "OMP atomic expects a pointer to target memory");
8849 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
8850 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
8855 if (
Op == OMPAtomicCompareOp::EQ) {
8874 "OldValue and V must be of same type");
8875 if (IsPostfixUpdate) {
8893 CurBBTI,
X.Var->getName() +
".atomic.exit");
8913 Value *CapturedValue =
8921 assert(R.Var->getType()->isPointerTy() &&
8922 "r.var must be of pointer type");
8923 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
8926 Value *ResultCast = R.IsSigned
8932 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
8933 "Op should be either max or min at this point");
8934 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
8974 Value *CapturedValue =
nullptr;
8975 if (IsPostfixUpdate) {
8976 CapturedValue = OldValue;
9008 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
9055 bool SubClausesPresent =
9056 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9059 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9060 "if lowerbound is non-null, then upperbound must also be non-null "
9061 "for bounds on num_teams");
9063 if (NumTeamsUpper ==
nullptr)
9066 if (NumTeamsLower ==
nullptr)
9067 NumTeamsLower = NumTeamsUpper;
9071 "argument to if clause must be an integer value");
9076 ConstantInt::get(IfExpr->
getType(), 0));
9085 if (ThreadLimit ==
nullptr)
9091 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9096 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9108 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9110 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9112 auto HostPostOutlineCB = [
this, Ident,
9113 ToBeDeleted](
Function &OutlinedFn)
mutable {
9118 "there must be a single user for the outlined function");
9123 "Outlined function must have two or three arguments only");
9125 bool HasShared = OutlinedFn.
arg_size() == 3;
9133 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9134 "outlined function.");
9141 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9145 I->eraseFromParent();
9160 std::string VarName) {
9169 return MapNamesArrayGlobal;
9174void OpenMPIRBuilder::initializeTypes(
Module &M) {
9177#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
9178#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
9179 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
9180 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
9181#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
9182 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
9183 VarName##Ptr = PointerType::getUnqual(VarName);
9184#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
9185 T = StructType::getTypeByName(Ctx, StructName); \
9187 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
9189 VarName##Ptr = PointerType::getUnqual(T);
9190#include "llvm/Frontend/OpenMP/OMPKinds.def"
9201 while (!Worklist.
empty()) {
9205 if (BlockSet.
insert(SuccBB).second)
9217 "omp_offloading_entries");
9241 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
9260 auto &&GetMDInt = [
this](
unsigned V) {
9268 auto &&TargetRegionMetadataEmitter =
9269 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
9284 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
9285 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
9286 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
9287 GetMDInt(E.getOrder())};
9290 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
9299 auto &&DeviceGlobalVarMetadataEmitter =
9300 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
9310 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
9311 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
9315 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
9322 DeviceGlobalVarMetadataEmitter);
9324 for (
const auto &E : OrderedEntries) {
9325 assert(E.first &&
"All ordered entries must exist!");
9326 if (
const auto *CE =
9327 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
9329 if (!CE->getID() || !CE->getAddress()) {
9341 }
else if (
const auto *CE =
dyn_cast<
9352 if (!CE->getAddress()) {
9357 if (CE->getVarSize() == 0)
9363 "Declaret target link address is set.");
9366 if (!CE->getAddress()) {
9378 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
9379 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
9387 Flags, CE->getLinkage(), CE->getVarName());
9390 Flags, CE->getLinkage());
9411 unsigned FileID,
unsigned Line,
unsigned Count) {
9414 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
9421 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
9424 EntryInfo.
Line, NewCount);
9431 auto FileIDInfo = CallBack();
9434 "getTargetEntryUniqueInfo, error message: " +
9440 std::get<1>(FileIDInfo));
9446 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9448 !(Remain & 1); Remain = Remain >> 1)
9466 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9468 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9475 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
9476 Flags |= MemberOfFlag;
9482 bool IsDeclaration,
bool IsExternallyVisible,
9484 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
9485 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
9486 std::function<
Constant *()> GlobalInitializer,
9502 if (!IsExternallyVisible)
9504 OS <<
"_decl_tgt_ref_ptr";
9513 auto *
GV = cast<GlobalVariable>(
Ptr);
9517 if (GlobalInitializer)
9518 GV->setInitializer(GlobalInitializer());
9524 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
9525 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
9526 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
9529 return cast<Constant>(
Ptr);
9538 bool IsDeclaration,
bool IsExternallyVisible,
9540 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
9541 std::vector<Triple> TargetTriple,
9542 std::function<
Constant *()> GlobalInitializer,
9559 VarName = MangledName;
9567 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
9583 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
9584 GvAddrRef->setConstant(
true);
9586 GvAddrRef->setInitializer(
Addr);
9587 GeneratedRefs.push_back(GvAddrRef);
9597 VarName = (
Addr) ?
Addr->getName() :
"";
9601 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
9602 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
9603 LlvmPtrTy, GlobalInitializer, VariableLinkage);
9604 VarName = (
Addr) ?
Addr->getName() :
"";
9625 auto &&GetMDInt = [MN](
unsigned Idx) {
9626 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
9627 return cast<ConstantInt>(V->getValue())->getZExtValue();
9630 auto &&GetMDString = [MN](
unsigned Idx) {
9631 auto *V = cast<MDString>(MN->getOperand(
Idx));
9632 return V->getString();
9635 switch (GetMDInt(0)) {
9663 if (HostFilePath.
empty())
9667 if (std::error_code Err = Buf.getError()) {
9669 "OpenMPIRBuilder: " +
9677 if (std::error_code Err =
M.getError()) {
9679 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
9691 return OffloadEntriesTargetRegion.empty() &&
9692 OffloadEntriesDeviceGlobalVar.empty();
9695unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
9697 auto It = OffloadEntriesTargetRegionCount.find(
9698 getTargetRegionEntryCountKey(EntryInfo));
9699 if (It == OffloadEntriesTargetRegionCount.end())
9704void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
9706 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
9707 EntryInfo.
Count + 1;
9713 OffloadEntriesTargetRegion[EntryInfo] =
9715 OMPTargetRegionEntryTargetRegion);
9716 ++OffloadingEntriesNum;
9722 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
9725 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
9731 if (!hasTargetRegionEntryInfo(EntryInfo)) {
9734 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
9735 Entry.setAddress(
Addr);
9737 Entry.setFlags(
Flags);
9740 hasTargetRegionEntryInfo(EntryInfo,
true))
9742 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
9743 "Target region entry already registered!");
9745 OffloadEntriesTargetRegion[EntryInfo] = Entry;
9746 ++OffloadingEntriesNum;
9748 incrementTargetRegionEntryInfoCount(EntryInfo);
9755 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
9757 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
9758 if (It == OffloadEntriesTargetRegion.end()) {
9762 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
9770 for (
const auto &It : OffloadEntriesTargetRegion) {
9771 Action(It.first, It.second);
9777 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order,
Flags);
9778 ++OffloadingEntriesNum;
9786 if (!hasDeviceGlobalVarEntryInfo(VarName))
9788 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9789 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
9790 if (Entry.getVarSize() == 0) {
9791 Entry.setVarSize(VarSize);
9792 Entry.setLinkage(Linkage);
9796 Entry.setVarSize(VarSize);
9797 Entry.setLinkage(Linkage);
9798 Entry.setAddress(
Addr);
9800 if (hasDeviceGlobalVarEntryInfo(VarName)) {
9801 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9802 assert(Entry.isValid() && Entry.getFlags() ==
Flags &&
9803 "Entry not initialized!");
9804 if (Entry.getVarSize() == 0) {
9805 Entry.setVarSize(VarSize);
9806 Entry.setLinkage(Linkage);
9811 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
9815 OffloadEntriesDeviceGlobalVar.try_emplace(
9816 VarName, OffloadingEntriesNum,
Addr, VarSize,
Flags, Linkage,
"");
9817 ++OffloadingEntriesNum;
9824 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
9825 Action(E.getKey(), E.getValue());
9832void CanonicalLoopInfo::collectControlBlocks(
9839 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
9851void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
9855 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
9863void CanonicalLoopInfo::mapIndVar(
9873 for (
Use &U : OldIV->
uses()) {
9874 auto *
User = dyn_cast<Instruction>(U.getUser());
9877 if (
User->getParent() == getCond())
9879 if (
User->getParent() == getLatch())
9885 Value *NewIV = Updater(OldIV);
9888 for (
Use *U : ReplacableUses)
9909 "Preheader must terminate with unconditional branch");
9911 "Preheader must jump to header");
9914 assert(isa<BranchInst>(Header->getTerminator()) &&
9915 "Header must terminate with unconditional branch");
9916 assert(Header->getSingleSuccessor() ==
Cond &&
9917 "Header must jump to exiting block");
9920 assert(
Cond->getSinglePredecessor() == Header &&
9921 "Exiting block only reachable from header");
9923 assert(isa<BranchInst>(
Cond->getTerminator()) &&
9924 "Exiting block must terminate with conditional branch");
9926 "Exiting block must have two successors");
9927 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
9928 "Exiting block's first successor jump to the body");
9929 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
9930 "Exiting block's second successor must exit the loop");
9934 "Body only reachable from exiting block");
9939 "Latch must terminate with unconditional branch");
9947 assert(isa<BranchInst>(Exit->getTerminator()) &&
9948 "Exit block must terminate with unconditional branch");
9950 "Exit block must jump to after block");
9954 "After block only reachable from exit block");
9958 assert(IndVar &&
"Canonical induction variable not found?");
9960 "Induction variable must be an integer");
9962 "Induction variable must be a PHI in the loop header");
9963 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
9965 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
9966 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
9968 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
9970 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
9971 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
9972 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
9975 Value *TripCount = getTripCount();
9976 assert(TripCount &&
"Loop trip count not found?");
9978 "Trip count and induction variable must have the same type");
9980 auto *CmpI = cast<CmpInst>(&
Cond->front());
9982 "Exit condition must be a signed less-than comparison");
9984 "Exit condition must compare the induction variable");
9986 "Exit condition must compare with the trip count");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI)
Create an entry point for a target task with the following.
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static llvm::Value * emitImplicitCast(IRBuilder<> &Builder, llvm::Value *XRead, llvm::Value *V)
Emit an implicit cast to convert XRead to type of variable V.
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector< llvm::OpenMPIRBuilder::DependData > Dependencies={}, bool HasNoWait=false)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static ConstantInt * getFalse(LLVMContext &Context)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
DISPFlags
Debug info subprogram flags.
Type array for a subprogram.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Analysis pass which computes a DominatorTree.
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS, const Twine &Name="")
Return the i64 difference between two pointer values, dividing out the size of the pointed-to objects...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
IntegerType * getIndexTy(const DataLayout &DL, unsigned AddrSpace)
Fetch the type of an integer that should be used to index GEP operations within AddressSpace.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name="")
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
GlobalVariable * CreateGlobalString(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Make a new global variable with initializer type i8*.
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
LLVMContext & getContext() const
Get the global data context.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
StringRef getName() const
Get a short "name" for the module.
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
void setGridValue(omp::GV G)
StringRef separator() const
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
omp::GV getGridValue() const
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp task
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
Generate a target-task for the target construct.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
void emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, SmallVector< DependData > Dependencies={}, bool HasNowait=false)
Generator for '#omp target'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, bool HasDistribute=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
Function * emitUserDefinedMapper(function_ref< MapInfosTy &(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, function_ref< bool(unsigned int, Function **)> CustomMapperCB=nullptr)
Emit the user-defined mapper function.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
BodyGenTy
Type of BodyGen to use for region codegen.
InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
CodeGenOptLevel
Code generation optimization level.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * DynCGGroupMem
The size of the dynamic shared memory.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
unsigned GV_Warp_Size
The default value of maximum number of threads in a worker warp.