63#define DEBUG_TYPE "openmp-ir-builder"
70 cl::desc(
"Use optimistic attributes describing "
71 "'as-if' properties of runtime calls."),
75 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
76 cl::desc(
"Factor for the unroll threshold to account for code "
77 "simplifications still taking place"),
88 if (!IP1.isSet() || !IP2.isSet())
90 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
95 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
96 case OMPScheduleType::UnorderedStaticChunked:
97 case OMPScheduleType::UnorderedStatic:
98 case OMPScheduleType::UnorderedDynamicChunked:
99 case OMPScheduleType::UnorderedGuidedChunked:
100 case OMPScheduleType::UnorderedRuntime:
101 case OMPScheduleType::UnorderedAuto:
102 case OMPScheduleType::UnorderedTrapezoidal:
103 case OMPScheduleType::UnorderedGreedy:
104 case OMPScheduleType::UnorderedBalanced:
105 case OMPScheduleType::UnorderedGuidedIterativeChunked:
106 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
107 case OMPScheduleType::UnorderedSteal:
108 case OMPScheduleType::UnorderedStaticBalancedChunked:
109 case OMPScheduleType::UnorderedGuidedSimd:
110 case OMPScheduleType::UnorderedRuntimeSimd:
111 case OMPScheduleType::OrderedStaticChunked:
112 case OMPScheduleType::OrderedStatic:
113 case OMPScheduleType::OrderedDynamicChunked:
114 case OMPScheduleType::OrderedGuidedChunked:
115 case OMPScheduleType::OrderedRuntime:
116 case OMPScheduleType::OrderedAuto:
117 case OMPScheduleType::OrderdTrapezoidal:
118 case OMPScheduleType::NomergeUnorderedStaticChunked:
119 case OMPScheduleType::NomergeUnorderedStatic:
120 case OMPScheduleType::NomergeUnorderedDynamicChunked:
121 case OMPScheduleType::NomergeUnorderedGuidedChunked:
122 case OMPScheduleType::NomergeUnorderedRuntime:
123 case OMPScheduleType::NomergeUnorderedAuto:
124 case OMPScheduleType::NomergeUnorderedTrapezoidal:
125 case OMPScheduleType::NomergeUnorderedGreedy:
126 case OMPScheduleType::NomergeUnorderedBalanced:
127 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
128 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
129 case OMPScheduleType::NomergeUnorderedSteal:
130 case OMPScheduleType::NomergeOrderedStaticChunked:
131 case OMPScheduleType::NomergeOrderedStatic:
132 case OMPScheduleType::NomergeOrderedDynamicChunked:
133 case OMPScheduleType::NomergeOrderedGuidedChunked:
134 case OMPScheduleType::NomergeOrderedRuntime:
135 case OMPScheduleType::NomergeOrderedAuto:
136 case OMPScheduleType::NomergeOrderedTrapezoidal:
144 SchedType & OMPScheduleType::MonotonicityMask;
145 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
156 if (Features.
count(
"+wavefrontsize64"))
157 return omp::getAMDGPUGridValues<64>();
158 return omp::getAMDGPUGridValues<32>();
169 bool HasSimdModifier) {
171 switch (ClauseKind) {
172 case OMP_SCHEDULE_Default:
173 case OMP_SCHEDULE_Static:
174 return HasChunks ? OMPScheduleType::BaseStaticChunked
175 : OMPScheduleType::BaseStatic;
176 case OMP_SCHEDULE_Dynamic:
177 return OMPScheduleType::BaseDynamicChunked;
178 case OMP_SCHEDULE_Guided:
179 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
180 : OMPScheduleType::BaseGuidedChunked;
181 case OMP_SCHEDULE_Auto:
183 case OMP_SCHEDULE_Runtime:
184 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
185 : OMPScheduleType::BaseRuntime;
193 bool HasOrderedClause) {
194 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
195 OMPScheduleType::None &&
196 "Must not have ordering nor monotonicity flags already set");
199 ? OMPScheduleType::ModifierOrdered
200 : OMPScheduleType::ModifierUnordered;
201 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
204 if (OrderingScheduleType ==
205 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
206 return OMPScheduleType::OrderedGuidedChunked;
207 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
208 OMPScheduleType::ModifierOrdered))
209 return OMPScheduleType::OrderedRuntime;
211 return OrderingScheduleType;
217 bool HasSimdModifier,
bool HasMonotonic,
218 bool HasNonmonotonic,
bool HasOrderedClause) {
219 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
220 OMPScheduleType::None &&
221 "Must not have monotonicity flags already set");
222 assert((!HasMonotonic || !HasNonmonotonic) &&
223 "Monotonic and Nonmonotonic are contradicting each other");
226 return ScheduleType | OMPScheduleType::ModifierMonotonic;
227 }
else if (HasNonmonotonic) {
228 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
238 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
239 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
245 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
253 bool HasSimdModifier,
bool HasMonotonicModifier,
254 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
260 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
261 HasNonmonotonicModifier, HasOrderedClause);
275 auto *Br = cast<BranchInst>(Term);
276 assert(!Br->isConditional() &&
277 "BB's terminator must be an unconditional branch (or degenerate)");
280 Br->setSuccessor(0,
Target);
285 NewBr->setDebugLoc(
DL);
290 assert(New->getFirstInsertionPt() == New->begin() &&
291 "Target BB must not have PHI nodes");
295 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
323 New->replaceSuccessorsPhiUsesWith(Old, New);
368 const Twine &
Name =
"",
bool AsPtr =
true) {
376 FakeVal = FakeValAddr;
404enum OpenMPOffloadingRequiresDirFlags {
406 OMP_REQ_UNDEFINED = 0x000,
408 OMP_REQ_NONE = 0x001,
410 OMP_REQ_REVERSE_OFFLOAD = 0x002,
412 OMP_REQ_UNIFIED_ADDRESS = 0x004,
414 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
416 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
423 : RequiresFlags(OMP_REQ_UNDEFINED) {}
426 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
427 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
428 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
429 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
430 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
431 RequiresFlags(OMP_REQ_UNDEFINED) {
432 if (HasRequiresReverseOffload)
433 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
434 if (HasRequiresUnifiedAddress)
435 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
436 if (HasRequiresUnifiedSharedMemory)
437 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
438 if (HasRequiresDynamicAllocators)
439 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
443 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
447 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
451 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
455 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
460 :
static_cast<int64_t
>(OMP_REQ_NONE);
465 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
467 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
472 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
474 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
479 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
481 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
486 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
488 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
501 constexpr const size_t MaxDim = 3;
509 Value *NumThreads3D =
512 seq<unsigned>(1, std::min(KernelArgs.
NumTeams.size(), MaxDim)))
516 seq<unsigned>(1, std::min(KernelArgs.
NumThreads.size(), MaxDim)))
540 auto FnAttrs = Attrs.getFnAttrs();
541 auto RetAttrs = Attrs.getRetAttrs();
543 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
548 bool Param =
true) ->
void {
549 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
550 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
551 if (HasSignExt || HasZeroExt) {
552 assert(AS.getNumAttributes() == 1 &&
553 "Currently not handling extension attr combined with others.");
555 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
558 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
565#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
566#include "llvm/Frontend/OpenMP/OMPKinds.def"
570#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
572 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
573 addAttrSet(RetAttrs, RetAttrSet, false); \
574 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
575 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
576 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
578#include "llvm/Frontend/OpenMP/OMPKinds.def"
592#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
594 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
596 Fn = M.getFunction(Str); \
598#include "llvm/Frontend/OpenMP/OMPKinds.def"
604#define OMP_RTL(Enum, Str, ...) \
606 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
612 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
622 LLVMContext::MD_callback,
624 2, {-1, -1},
true)}));
637 assert(Fn &&
"Failed to create OpenMP runtime function");
644 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
645 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
660 for (
auto Inst =
Block->getReverseIterator()->begin();
661 Inst !=
Block->getReverseIterator()->end();) {
662 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
686 ParallelRegionBlockSet.
clear();
688 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
707 ".omp_par", ArgsInZeroAddressSpace);
711 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
713 "Expected OpenMP outlining to be possible!");
715 for (
auto *V : OI.ExcludeArgsFromAggregate)
722 if (TargetCpuAttr.isStringAttribute())
725 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
726 if (TargetFeaturesAttr.isStringAttribute())
727 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
730 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
732 "OpenMP outlined functions should not return a value!");
744 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
751 "Expected instructions to add in the outlined region entry");
758 if (
I.isTerminator())
761 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
764 OI.EntryBB->moveBefore(&ArtificialEntry);
771 if (OI.PostOutlineCB)
772 OI.PostOutlineCB(*OutlinedFn);
803 errs() <<
"Error of kind: " << Kind
804 <<
" when emitting offload entries and metadata during "
805 "OMPIRBuilder finalization \n";
812 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
814 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
836 unsigned Reserve2Flags) {
838 LocFlags |= OMP_IDENT_FLAG_KMPC;
846 ConstantInt::get(
Int32, Reserve2Flags),
847 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
854 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
855 if (
GV.getInitializer() == Initializer)
860 M, OpenMPIRBuilder::Ident,
875 SrcLocStrSize = LocStr.
size();
884 if (
GV.isConstant() &&
GV.hasInitializer() &&
885 GV.getInitializer() == Initializer)
896 unsigned Line,
unsigned Column,
902 Buffer.
append(FunctionName);
904 Buffer.
append(std::to_string(Line));
906 Buffer.
append(std::to_string(Column));
914 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
925 if (
DIFile *DIF = DIL->getFile())
926 if (std::optional<StringRef> Source = DIF->getSource())
932 DIL->getColumn(), SrcLocStrSize);
944 "omp_global_thread_num");
949 bool ForceSimpleCall,
bool CheckCancelFlag) {
959 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
962 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
965 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
968 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
971 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
984 bool UseCancelBarrier =
989 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
990 : OMPRTL___kmpc_barrier),
993 if (UseCancelBarrier && CheckCancelFlag)
1002 omp::Directive CanceledDirective) {
1014 Value *CancelKind =
nullptr;
1015 switch (CanceledDirective) {
1016#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1017 case DirectiveEnum: \
1018 CancelKind = Builder.getInt32(Value); \
1020#include "llvm/Frontend/OpenMP/OMPKinds.def"
1031 auto ExitCB = [
this, CanceledDirective, Loc](
InsertPointTy IP) {
1032 if (CanceledDirective == OMPD_parallel) {
1036 omp::Directive::OMPD_unknown,
false,
1046 UI->eraseFromParent();
1059 auto *KernelArgsPtr =
1072 NumThreads, HostPtr, KernelArgsPtr};
1100 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1104 Value *Return =
nullptr;
1124 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1125 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1138 emitBlock(OffloadContBlock, CurFn,
true);
1143 omp::Directive CanceledDirective,
1146 "Unexpected cancellation!");
1196 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1199 "Expected at least tid and bounded tid as arguments");
1200 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1203 assert(CI &&
"Expected call instruction to outlined function");
1204 CI->
getParent()->setName(
"omp_parallel");
1207 Type *PtrTy = OMPIRBuilder->VoidPtr;
1211 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1215 Value *Args = ArgsAlloca;
1223 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1235 Value *Parallel51CallArgs[] = {
1239 NumThreads ? NumThreads : Builder.
getInt32(-1),
1242 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1245 Builder.
getInt64(NumCapturedVars)};
1250 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1265 I->eraseFromParent();
1287 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1288 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1296 F->addMetadata(LLVMContext::MD_callback,
1305 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1308 "Expected at least tid and bounded tid as arguments");
1309 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1312 CI->
getParent()->setName(
"omp_parallel");
1316 Value *ForkCallArgs[] = {
1317 Ident, Builder.
getInt32(NumCapturedVars),
1318 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1321 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1330 auto PtrTy = OMPIRBuilder->VoidPtr;
1331 if (IfCondition && NumCapturedVars == 0) {
1335 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1353 I->eraseFromParent();
1361 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1388 if (ProcBind != OMP_PROC_BIND_default) {
1392 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1420 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1425 "zero.addr.ascast");
1449 if (IP.getBlock()->end() == IP.getPoint()) {
1455 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1456 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1457 "Unexpected insertion point for finalization call!");
1493 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1496 assert(BodyGenCB &&
"Expected body generation callback!");
1498 BodyGenCB(InnerAllocaIP, CodeGenIP);
1500 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1506 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1508 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1509 ThreadID, ToBeDeletedVec);
1514 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1516 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1533 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1534 Blocks.push_back(PRegOutlinedExitBB);
1545 ".omp_par", ArgsInZeroAddressSpace);
1550 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1553 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1558 auto PrivHelper = [&](
Value &V) {
1559 if (&V == TIDAddr || &V == ZeroAddr) {
1565 for (
Use &U : V.uses())
1566 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1567 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1577 if (!V.getType()->isPointerTy()) {
1596 Value *ReplacementValue =
nullptr;
1597 CallInst *CI = dyn_cast<CallInst>(&V);
1599 ReplacementValue = PrivTID;
1602 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue));
1607 assert(ReplacementValue &&
1608 "Expected copy/create callback to set replacement value!");
1609 if (ReplacementValue == &V)
1614 UPtr->set(ReplacementValue);
1631 for (
Value *Input : Inputs) {
1636 for (
Value *Output : Outputs)
1640 "OpenMP outlining should not produce live-out values!");
1642 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1645 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1653 assert(FiniInfo.DK == OMPD_parallel &&
1654 "Unexpected finalization stack state!");
1664 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1665 UI->eraseFromParent();
1731 if (Dependencies.
empty())
1751 Type *DependInfo = OMPBuilder.DependInfo;
1754 Value *DepArray =
nullptr;
1760 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1762 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1768 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1773 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1775 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1780 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1783 static_cast<unsigned int>(Dep.DepKind)),
1826 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1836 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1838 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1839 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
1841 assert(OutlinedFn.getNumUses() == 1 &&
1842 "there must be a single user for the outlined function");
1843 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1847 bool HasShareds = StaleCI->
arg_size() > 1;
1886 assert(ArgStructAlloca &&
1887 "Unable to find the alloca instruction corresponding to arguments "
1888 "for extracted function");
1891 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1892 "arguments for extracted function");
1900 TaskAllocFn, {Ident, ThreadID,
Flags,
1901 TaskSize, SharedsSize,
1913 Value *DepArray =
nullptr;
1914 if (Dependencies.
size()) {
1929 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1936 static_cast<unsigned int>(RTLDependInfoFields::Len));
1943 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1946 static_cast<unsigned int>(Dep.DepKind)),
1977 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
1983 if (Dependencies.
size()) {
2007 if (Dependencies.
size()) {
2028 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2032 I->eraseFromParent();
2080 if (IP.getBlock()->end() != IP.getPoint())
2091 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2092 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2093 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2124 unsigned CaseNumber = 0;
2125 for (
auto SectionCB : SectionCBs) {
2141 Value *LB = ConstantInt::get(I32Ty, 0);
2142 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2143 Value *ST = ConstantInt::get(I32Ty, 1);
2145 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2147 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
2151 assert(FiniInfo.DK == OMPD_sections &&
2152 "Unexpected finalization stack state!");
2158 AfterIP = {FiniBB, FiniBB->
begin()};
2172 if (IP.getBlock()->end() != IP.getPoint())
2191 Directive OMPD = Directive::OMPD_sections;
2194 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2206 std::vector<WeakTrackingVH> &
List) {
2213 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
2217 if (UsedArray.
empty())
2224 GV->setSection(
"llvm.metadata");
2227Value *OpenMPIRBuilder::getGPUThreadID() {
2230 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2234Value *OpenMPIRBuilder::getGPUWarpSize() {
2239Value *OpenMPIRBuilder::getNVPTXWarpID() {
2244Value *OpenMPIRBuilder::getNVPTXLaneID() {
2246 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2247 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2252Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2257 assert(FromSize > 0 &&
"From size must be greater than zero");
2258 assert(ToSize > 0 &&
"To size must be greater than zero");
2259 if (FromType == ToType)
2261 if (FromSize == ToSize)
2271 CastItem,
FromType->getPointerTo());
2276Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2281 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2285 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2289 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2290 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2291 Value *WarpSizeCast =
2293 Value *ShuffleCall =
2295 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2298void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2314 Value *ElemPtr = DstAddr;
2316 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2328 if ((
Size / IntSize) > 1) {
2352 Value *Res = createRuntimeShuffleFunction(
2361 Value *LocalElemPtr =
2368 Value *Res = createRuntimeShuffleFunction(
2382void OpenMPIRBuilder::emitReductionListCopy(
2383 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2385 CopyOptionsTy CopyOptions) {
2388 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2392 for (
auto En :
enumerate(ReductionInfos)) {
2393 const ReductionInfo &RI = En.value();
2394 Value *SrcElementAddr =
nullptr;
2395 Value *DestElementAddr =
nullptr;
2396 Value *DestElementPtrAddr =
nullptr;
2398 bool ShuffleInElement =
false;
2401 bool UpdateDestListPtr =
false;
2405 ReductionArrayTy, SrcBase,
2406 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2412 ReductionArrayTy, DestBase,
2413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2419 ".omp.reduction.element");
2422 DestElementAddr = DestAlloca;
2425 DestElementAddr->
getName() +
".ascast");
2427 ShuffleInElement =
true;
2428 UpdateDestListPtr =
true;
2440 if (ShuffleInElement) {
2441 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2442 RemoteLaneOffset, ReductionArrayTy);
2444 switch (RI.EvaluationKind) {
2453 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2455 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2457 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2459 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2462 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2464 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2486 if (UpdateDestListPtr) {
2489 DestElementAddr->
getName() +
".ascast");
2495Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
2505 "_omp_reduction_inter_warp_copy_func", &
M);
2528 "__openmp_nvptx_data_transfer_temporary_storage";
2532 if (!TransferMedium) {
2541 Value *GPUThreadID = getGPUThreadID();
2543 Value *LaneID = getNVPTXLaneID();
2545 Value *WarpID = getNVPTXWarpID();
2554 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2558 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2561 NumWarpsAlloca->
getName() +
".ascast");
2572 for (
auto En :
enumerate(ReductionInfos)) {
2577 const ReductionInfo &RI = En.value();
2579 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2582 unsigned NumIters = RealTySize / TySize;
2585 Value *Cnt =
nullptr;
2586 Value *CntAddr =
nullptr;
2596 CntAddr->
getName() +
".ascast");
2615 omp::Directive::OMPD_unknown,
2628 auto *RedListArrayTy =
2634 {ConstantInt::get(IndexTy, 0),
2635 ConstantInt::get(IndexTy, En.index())});
2660 omp::Directive::OMPD_unknown,
2669 Value *NumWarpsVal =
2672 Value *IsActiveThread =
2683 Value *TargetElemPtrPtr =
2685 {ConstantInt::get(IndexTy, 0),
2686 ConstantInt::get(IndexTy, En.index())});
2687 Value *TargetElemPtrVal =
2689 Value *TargetElemPtr = TargetElemPtrVal;
2695 Value *SrcMediumValue =
2714 RealTySize %= TySize;
2724Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2730 {Builder.getPtrTy(), Builder.getInt16Ty(),
2731 Builder.getInt16Ty(), Builder.getInt16Ty()},
2735 "_omp_reduction_shuffle_and_reduce_func", &
M);
2756 Type *ReduceListArgType = ReduceListArg->
getType();
2760 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2762 LaneIDArg->
getName() +
".addr");
2764 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2766 AlgoVerArg->
getName() +
".addr");
2773 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2776 ReduceListAlloca, ReduceListArgType,
2777 ReduceListAlloca->
getName() +
".ascast");
2779 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2781 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2782 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2784 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2787 RemoteReductionListAlloca->
getName() +
".ascast");
2796 Value *RemoteLaneOffset =
2805 emitReductionListCopy(
2807 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2838 Value *RemoteOffsetComp =
2855 ->addFnAttr(Attribute::NoUnwind);
2876 ReductionInfos, RemoteListAddrCast, ReduceList);
2889Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
2896 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
2900 "_omp_reduction_list_to_global_copy_func", &
M);
2917 BufferArg->
getName() +
".addr");
2924 BufferArgAlloca->
getName() +
".ascast");
2929 ReduceListArgAlloca->
getName() +
".ascast");
2935 Value *LocalReduceList =
2937 Value *BufferArgVal =
2942 for (
auto En :
enumerate(ReductionInfos)) {
2943 const ReductionInfo &RI = En.value();
2944 auto *RedListArrayTy =
2948 RedListArrayTy, LocalReduceList,
2949 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2957 ReductionsBufferTy, BufferVD, 0, En.index());
2959 switch (RI.EvaluationKind) {
2967 RI.ElementType, ElemPtr, 0, 0,
".realp");
2969 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2971 RI.ElementType, ElemPtr, 0, 1,
".imagp");
2973 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2976 RI.ElementType, GlobVal, 0, 0,
".realp");
2978 RI.ElementType, GlobVal, 0, 1,
".imagp");
2999Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3006 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3010 "_omp_reduction_list_to_global_reduce_func", &
M);
3027 BufferArg->
getName() +
".addr");
3032 auto *RedListArrayTy =
3037 Value *LocalReduceList =
3042 BufferArgAlloca->
getName() +
".ascast");
3047 ReduceListArgAlloca->
getName() +
".ascast");
3050 LocalReduceList->
getName() +
".ascast");
3060 for (
auto En :
enumerate(ReductionInfos)) {
3062 RedListArrayTy, LocalReduceListAddrCast,
3063 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3068 ReductionsBufferTy, BufferVD, 0, En.index());
3076 ->addFnAttr(Attribute::NoUnwind);
3082Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3089 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3093 "_omp_reduction_global_to_list_copy_func", &
M);
3110 BufferArg->
getName() +
".addr");
3117 BufferArgAlloca->
getName() +
".ascast");
3122 ReduceListArgAlloca->
getName() +
".ascast");
3127 Value *LocalReduceList =
3133 for (
auto En :
enumerate(ReductionInfos)) {
3135 auto *RedListArrayTy =
3139 RedListArrayTy, LocalReduceList,
3140 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3147 ReductionsBufferTy, BufferVD, 0, En.index());
3190Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3197 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3201 "_omp_reduction_global_to_list_reduce_func", &
M);
3218 BufferArg->
getName() +
".addr");
3228 Value *LocalReduceList =
3233 BufferArgAlloca->
getName() +
".ascast");
3238 ReduceListArgAlloca->
getName() +
".ascast");
3241 LocalReduceList->
getName() +
".ascast");
3251 for (
auto En :
enumerate(ReductionInfos)) {
3253 RedListArrayTy, ReductionList,
3254 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3259 ReductionsBufferTy, BufferVD, 0, En.index());
3267 ->addFnAttr(Attribute::NoUnwind);
3273std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3274 std::string Suffix =
3276 return (
Name + Suffix).str();
3279Function *OpenMPIRBuilder::createReductionFunction(
3281 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3283 {Builder.getPtrTy(), Builder.getPtrTy()},
3285 std::string
Name = getReductionFuncName(ReducerName);
3297 Value *LHSArrayPtr =
nullptr;
3298 Value *RHSArrayPtr =
nullptr;
3309 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3311 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3321 for (
auto En :
enumerate(ReductionInfos)) {
3322 const ReductionInfo &RI = En.value();
3324 RedArrayTy, RHSArrayPtr,
3325 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3328 RHSI8Ptr, RI.PrivateVariable->getType(),
3329 RHSI8Ptr->
getName() +
".ascast");
3332 RedArrayTy, LHSArrayPtr,
3333 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3336 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3347 return ReductionFunc;
3353 for (
auto En :
enumerate(ReductionInfos)) {
3354 unsigned Index = En.index();
3355 const ReductionInfo &RI = En.value();
3356 Value *LHSFixupPtr, *RHSFixupPtr;
3363 LHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3364 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3368 RHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3369 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3375 return ReductionFunc;
3383 assert(RI.Variable &&
"expected non-null variable");
3384 assert(RI.PrivateVariable &&
"expected non-null private variable");
3385 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3386 "expected non-null reduction generator callback");
3389 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3390 "expected variables and their private equivalents to have the same "
3393 assert(RI.Variable->getType()->isPointerTy() &&
3394 "expected variables to be pointers");
3401 bool IsNoWait,
bool IsTeamsReduction,
bool HasDistribute,
3403 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3417 if (ReductionInfos.
size() == 0)
3436 if (GridValue.has_value())
3454 Value *ReductionListAlloca =
3457 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3461 for (
auto En :
enumerate(ReductionInfos)) {
3464 RedArrayTy, ReductionList,
3465 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3472 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3473 Function *WcFunc = emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3478 unsigned MaxDataSize = 0;
3480 for (
auto En :
enumerate(ReductionInfos)) {
3482 if (
Size > MaxDataSize)
3484 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3486 Value *ReductionDataSize =
3488 if (!IsTeamsReduction) {
3489 Value *SarFuncCast =
3493 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3496 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3501 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3503 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3504 Function *LtGCFunc = emitListToGlobalCopyFunction(
3505 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3506 Function *LtGRFunc = emitListToGlobalReduceFunction(
3507 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3508 Function *GtLCFunc = emitGlobalToListCopyFunction(
3509 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3510 Function *GtLRFunc = emitGlobalToListReduceFunction(
3511 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3515 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3517 Value *Args3[] = {SrcLocInfo,
3518 KernelTeamsReductionPtr,
3530 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3547 for (
auto En :
enumerate(ReductionInfos)) {
3554 Value *LHSPtr, *RHSPtr;
3556 &LHSPtr, &RHSPtr, CurFunc));
3561 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3565 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3569 assert(
false &&
"Unhandled ReductionGenCBKind");
3585 ".omp.reduction.func", &M);
3596 assert(RI.Variable &&
"expected non-null variable");
3597 assert(RI.PrivateVariable &&
"expected non-null private variable");
3598 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
3599 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3600 "expected variables and their private equivalents to have the same "
3602 assert(RI.Variable->getType()->isPointerTy() &&
3603 "expected variables to be pointers");
3616 unsigned NumReductions = ReductionInfos.
size();
3623 for (
auto En :
enumerate(ReductionInfos)) {
3624 unsigned Index = En.index();
3642 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3647 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3650 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3652 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3653 : RuntimeFunction::OMPRTL___kmpc_reduce);
3656 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3657 ReductionFunc, Lock},
3676 for (
auto En :
enumerate(ReductionInfos)) {
3681 Value *RedValue =
nullptr;
3682 if (!IsByRef[En.index()]) {
3684 "red.value." +
Twine(En.index()));
3686 Value *PrivateRedValue =
3688 "red.private.value." +
Twine(En.index()));
3690 if (IsByRef[En.index()]) {
3692 PrivateRedValue, Reduced));
3695 PrivateRedValue, Reduced));
3700 if (!IsByRef[En.index()])
3704 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3705 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3713 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3734 for (
auto En :
enumerate(ReductionInfos)) {
3737 RedArrayTy, LHSArrayPtr, 0, En.index());
3742 RedArrayTy, RHSArrayPtr, 0, En.index());
3752 if (!IsByRef[En.index()])
3769 Directive OMPD = Directive::OMPD_master;
3774 Value *Args[] = {Ident, ThreadId};
3782 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3793 Directive OMPD = Directive::OMPD_masked;
3799 Value *ArgsEnd[] = {Ident, ThreadId};
3807 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3842 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
3855 "omp_" +
Name +
".next",
true);
3866 CL->Header = Header;
3885 NextBB, NextBB,
Name);
3909 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
3919 auto *IndVarTy = cast<IntegerType>(Start->getType());
3920 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
3921 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
3927 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
3955 Value *CountIfLooping;
3956 if (InclusiveStop) {
3966 "omp_" +
Name +
".tripcount");
3987 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
3990 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
3996 InsertPointTy AllocaIP,
3997 bool NeedsBarrier) {
3998 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4000 "Require dedicated allocate IP");
4012 Type *IVTy =
IV->getType();
4032 Constant *One = ConstantInt::get(IVTy, 1);
4040 Constant *SchedulingType = ConstantInt::get(
4041 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
4046 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4047 PUpperBound, PStride, One,
Zero});
4052 CLI->setTripCount(TripCount);
4073 omp::Directive::OMPD_for,
false,
4084 bool NeedsBarrier,
Value *ChunkSize) {
4085 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4086 assert(ChunkSize &&
"Chunk size is required");
4091 Type *IVTy =
IV->getType();
4093 "Max supported tripcount bitwidth is 64 bits");
4095 :
Type::getInt64Ty(Ctx);
4098 Constant *One = ConstantInt::get(InternalIVTy, 1);
4110 Value *PLowerBound =
4112 Value *PUpperBound =
4121 Value *CastedChunkSize =
4123 Value *CastedTripCount =
4126 Constant *SchedulingType = ConstantInt::get(
4127 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4141 SchedulingType, PLastIter,
4142 PLowerBound, PUpperBound,
4147 Value *FirstChunkStart =
4149 Value *FirstChunkStop =
4154 Value *NextChunkStride =
4159 Value *DispatchCounter;
4163 FirstChunkStart, CastedTripCount, NextChunkStride,
4187 Value *IsLastChunk =
4189 Value *CountUntilOrigTripCount =
4192 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4193 Value *BackcastedChunkTC =
4195 CLI->setTripCount(BackcastedChunkTC);
4200 Value *BackcastedDispatchCounter =
4234 case WorksharingLoopType::ForStaticLoop:
4237 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4240 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4242 case WorksharingLoopType::DistributeStaticLoop:
4245 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4248 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4250 case WorksharingLoopType::DistributeForStaticLoop:
4253 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4256 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4259 if (Bitwidth != 32 && Bitwidth != 64) {
4281 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4282 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4287 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4288 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4293 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4294 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4295 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4331 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
4339 "Expected unique undroppable user of outlined function");
4340 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
4341 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
4343 "Expected outlined function call to be located in loop preheader");
4345 if (OutlinedFnCallInstruction->
arg_size() > 1)
4352 LoopBodyArg, ParallelTaskPtr, TripCount,
4355 for (
auto &ToBeDeletedItem : ToBeDeleted)
4356 ToBeDeletedItem->eraseFromParent();
4362 InsertPointTy AllocaIP,
4375 OI.OuterAllocaBB = AllocaIP.getBlock();
4380 "omp.prelatch",
true);
4400 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
4402 ParallelRegionBlockSet.
end());
4422 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
4431 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
4432 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
4438 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
4445 OI.PostOutlineCB = [=, ToBeDeletedVec =
4446 std::move(ToBeDeleted)](
Function &OutlinedFn) {
4448 ToBeDeletedVec, LoopType);
4456 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
4457 bool HasSimdModifier,
bool HasMonotonicModifier,
4458 bool HasNonmonotonicModifier,
bool HasOrderedClause,
4461 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
4463 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
4464 HasNonmonotonicModifier, HasOrderedClause);
4466 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
4467 OMPScheduleType::ModifierOrdered;
4468 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
4469 case OMPScheduleType::BaseStatic:
4470 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
4472 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4473 NeedsBarrier, ChunkSize);
4475 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
4477 case OMPScheduleType::BaseStaticChunked:
4479 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4480 NeedsBarrier, ChunkSize);
4482 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
4485 case OMPScheduleType::BaseRuntime:
4486 case OMPScheduleType::BaseAuto:
4487 case OMPScheduleType::BaseGreedy:
4488 case OMPScheduleType::BaseBalanced:
4489 case OMPScheduleType::BaseSteal:
4490 case OMPScheduleType::BaseGuidedSimd:
4491 case OMPScheduleType::BaseRuntimeSimd:
4493 "schedule type does not support user-defined chunk sizes");
4495 case OMPScheduleType::BaseDynamicChunked:
4496 case OMPScheduleType::BaseGuidedChunked:
4497 case OMPScheduleType::BaseGuidedIterativeChunked:
4498 case OMPScheduleType::BaseGuidedAnalyticalChunked:
4499 case OMPScheduleType::BaseStaticBalancedChunked:
4500 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4501 NeedsBarrier, ChunkSize);
4517 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
4520 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
4533 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
4536 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
4548 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
4551 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
4558 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4560 "Require dedicated allocate IP");
4562 "Require valid schedule type");
4564 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
4565 OMPScheduleType::ModifierOrdered;
4576 Type *IVTy =
IV->getType();
4594 Constant *One = ConstantInt::get(IVTy, 1);
4615 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4619 {SrcLoc, ThreadNum, SchedulingType, One,
4620 UpperBound, One, Chunk});
4630 PLowerBound, PUpperBound, PStride});
4631 Constant *Zero32 = ConstantInt::get(I32Type, 0);
4640 auto *PI = cast<PHINode>(Phi);
4641 PI->setIncomingBlock(0, OuterCond);
4642 PI->setIncomingValue(0, LowerBound);
4646 auto *Br = cast<BranchInst>(Term);
4647 Br->setSuccessor(0, OuterCond);
4655 auto *CI = cast<CmpInst>(Comp);
4656 CI->setOperand(1, UpperBound);
4659 auto *BI = cast<BranchInst>(Branch);
4660 assert(BI->getSuccessor(1) == Exit);
4661 BI->setSuccessor(1, OuterCond);
4674 omp::Directive::OMPD_for,
false,
4694 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
4695 for (
Use &U : BB->uses()) {
4696 auto *UseInst = dyn_cast<Instruction>(U.getUser());
4699 if (BBsToErase.count(UseInst->getParent()))
4706 while (BBsToErase.remove_if(HasRemainingUses)) {
4717 assert(
Loops.size() >= 1 &&
"At least one loop required");
4718 size_t NumLoops =
Loops.size();
4722 return Loops.front();
4734 Loop->collectControlBlocks(OldControlBBs);
4738 if (ComputeIP.
isSet())
4745 Value *CollapsedTripCount =
nullptr;
4748 "All loops to collapse must be valid canonical loops");
4749 Value *OrigTripCount = L->getTripCount();
4750 if (!CollapsedTripCount) {
4751 CollapsedTripCount = OrigTripCount;
4763 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
4771 Value *Leftover = Result->getIndVar();
4773 NewIndVars.
resize(NumLoops);
4774 for (
int i = NumLoops - 1; i >= 1; --i) {
4775 Value *OrigTripCount =
Loops[i]->getTripCount();
4778 NewIndVars[i] = NewIndVar;
4783 NewIndVars[0] = Leftover;
4792 BasicBlock *ContinueBlock = Result->getBody();
4794 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
4801 ContinueBlock =
nullptr;
4802 ContinuePred = NextSrc;
4809 for (
size_t i = 0; i < NumLoops - 1; ++i)
4810 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
4816 for (
size_t i = NumLoops - 1; i > 0; --i)
4817 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
4820 ContinueWith(Result->getLatch(),
nullptr);
4827 for (
size_t i = 0; i < NumLoops; ++i)
4828 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
4842std::vector<CanonicalLoopInfo *>
4846 "Must pass as many tile sizes as there are loops");
4847 int NumLoops =
Loops.size();
4848 assert(NumLoops >= 1 &&
"At least one loop to tile required");
4860 Loop->collectControlBlocks(OldControlBBs);
4868 assert(L->isValid() &&
"All input loops must be valid canonical loops");
4869 OrigTripCounts.
push_back(L->getTripCount());
4880 for (
int i = 0; i < NumLoops - 1; ++i) {
4893 for (
int i = 0; i < NumLoops; ++i) {
4895 Value *OrigTripCount = OrigTripCounts[i];
4908 Value *FloorTripOverflow =
4914 "omp_floor" +
Twine(i) +
".tripcount",
true);
4922 std::vector<CanonicalLoopInfo *> Result;
4923 Result.reserve(NumLoops * 2);
4936 auto EmbeddNewLoop =
4937 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
4940 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
4945 Enter = EmbeddedLoop->
getBody();
4947 OutroInsertBefore = EmbeddedLoop->
getLatch();
4948 return EmbeddedLoop;
4952 const Twine &NameBase) {
4955 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
4956 Result.push_back(EmbeddedLoop);
4960 EmbeddNewLoops(FloorCount,
"floor");
4966 for (
int i = 0; i < NumLoops; ++i) {
4970 Value *FloorIsEpilogue =
4972 Value *TileTripCount =
4979 EmbeddNewLoops(TileCounts,
"tile");
4984 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
4993 BodyEnter =
nullptr;
4994 BodyEntered = ExitBB;
5007 for (
int i = 0; i < NumLoops; ++i) {
5010 Value *OrigIndVar = OrigIndVars[i];
5038 if (Properties.
empty())
5061 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5065 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5073 if (
I.mayReadOrWriteMemory()) {
5077 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5099 const Twine &NamePrefix) {
5105 SplitBefore = dyn_cast<Instruction>(IfCond);
5151 VMap[
Block] = NewBB;
5161 if (TargetTriple.
isX86()) {
5162 if (Features.
lookup(
"avx512f"))
5164 else if (Features.
lookup(
"avx"))
5168 if (TargetTriple.
isPPC())
5170 if (TargetTriple.
isWasm())
5177 Value *IfCond, OrderKind Order,
5196 if (AlignedVars.
size()) {
5199 for (
auto &AlignedItem : AlignedVars) {
5200 Value *AlignedPtr = AlignedItem.first;
5201 Value *Alignment = AlignedItem.second;
5203 AlignedPtr, Alignment);
5210 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
5214 "Cannot find value which corresponds to original loop latch");
5215 assert(isa<BasicBlock>(MappedLatch) &&
5216 "Cannot cast mapped latch block value to BasicBlock");
5217 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
5246 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5254 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5262 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
5264 if (Simdlen || Safelen) {
5268 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
5294static std::unique_ptr<TargetMachine>
5298 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
5299 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
5300 const std::string &
Triple = M->getTargetTriple();
5310 std::nullopt, OptLevel));
5334 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
5349 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
5354 nullptr, ORE,
static_cast<int>(OptLevel),
5375 <<
" Threshold=" << UP.
Threshold <<
"\n"
5378 <<
" PartialOptSizeThreshold="
5397 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
5398 Ptr = Load->getPointerOperand();
5399 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
5400 Ptr = Store->getPointerOperand();
5404 Ptr =
Ptr->stripPointerCasts();
5406 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
5407 if (Alloca->getParent() == &
F->getEntryBlock())
5427 int MaxTripCount = 0;
5428 bool MaxOrZero =
false;
5429 unsigned TripMultiple = 0;
5431 bool UseUpperBound =
false;
5433 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
5435 unsigned Factor = UP.
Count;
5436 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
5447 assert(Factor >= 0 &&
"Unroll factor must not be negative");
5463 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
5476 *UnrolledCLI =
Loop;
5481 "unrolling only makes sense with a factor of 2 or larger");
5483 Type *IndVarTy =
Loop->getIndVarType();
5490 std::vector<CanonicalLoopInfo *>
LoopNest =
5505 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
5508 (*UnrolledCLI)->assertOK();
5526 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
5545 if (!CPVars.
empty()) {
5550 Directive OMPD = Directive::OMPD_single;
5555 Value *Args[] = {Ident, ThreadId};
5581 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
5586 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
5589 ConstantInt::get(
Int64, 0), CPVars[
I],
5592 }
else if (!IsNowait)
5594 omp::Directive::OMPD_unknown,
false,
5606 Directive OMPD = Directive::OMPD_critical;
5611 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
5612 Value *Args[] = {Ident, ThreadId, LockVar};
5629 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5637 const Twine &
Name,
bool IsDependSource) {
5640 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
5641 "OpenMP runtime requires depend vec with i64 type");
5654 for (
unsigned I = 0;
I < NumLoops; ++
I) {
5668 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
5686 Directive OMPD = Directive::OMPD_ordered;
5695 Value *Args[] = {Ident, ThreadId};
5705 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5711 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
5712 bool HasFinalize,
bool IsCancellable) {
5721 if (!isa_and_nonnull<BranchInst>(SplitPos))
5728 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
5738 "Unexpected control flow graph state!!");
5739 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
5741 "Unexpected Control Flow State!");
5747 "Unexpected Insertion point location!");
5750 auto InsertBB = merged ? ExitPredBB : ExitBB;
5751 if (!isa_and_nonnull<BranchInst>(SplitPos))
5761 if (!Conditional || !EntryCall)
5781 UI->eraseFromParent();
5789 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
5797 "Unexpected finalization stack state!");
5800 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
5850 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
5852 "copyin.not.master.end");
5907 Value *DependenceAddress,
bool HaveNowaitClause) {
5915 if (Device ==
nullptr)
5917 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
5918 if (NumDependences ==
nullptr) {
5919 NumDependences = ConstantInt::get(
Int32, 0);
5923 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5925 Ident, ThreadId, InteropVar, InteropTypeVal,
5926 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
5935 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
5943 if (Device ==
nullptr)
5945 if (NumDependences ==
nullptr) {
5946 NumDependences = ConstantInt::get(
Int32, 0);
5950 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5952 Ident, ThreadId, InteropVar, Device,
5953 NumDependences, DependenceAddress, HaveNowaitClauseVal};
5962 Value *NumDependences,
5963 Value *DependenceAddress,
5964 bool HaveNowaitClause) {
5971 if (Device ==
nullptr)
5973 if (NumDependences ==
nullptr) {
5974 NumDependences = ConstantInt::get(
Int32, 0);
5978 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5980 Ident, ThreadId, InteropVar, Device,
5981 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6010 int32_t MinThreadsVal, int32_t MaxThreadsVal,
6011 int32_t MinTeamsVal, int32_t MaxTeamsVal) {
6029 const std::string DebugPrefix =
"_debug__";
6030 if (KernelName.
ends_with(DebugPrefix)) {
6031 KernelName = KernelName.
drop_back(DebugPrefix.length());
6038 if (MinTeamsVal > 1 || MaxTeamsVal > 0)
6042 if (MaxThreadsVal < 0)
6043 MaxThreadsVal = std::max(
6046 if (MaxThreadsVal > 0)
6057 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6060 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6061 Constant *DynamicEnvironmentInitializer =
6065 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6067 DL.getDefaultGlobalsAddressSpace());
6071 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6072 ? DynamicEnvironmentGV
6074 DynamicEnvironmentPtr);
6077 ConfigurationEnvironment, {
6078 UseGenericStateMachineVal,
6079 MayUseNestedParallelismVal,
6086 ReductionBufferLength,
6089 KernelEnvironment, {
6090 ConfigurationEnvironmentInitializer,
6094 std::string KernelEnvironmentName =
6095 (KernelName +
"_kernel_environment").str();
6098 KernelEnvironmentInitializer, KernelEnvironmentName,
6100 DL.getDefaultGlobalsAddressSpace());
6104 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6105 ? KernelEnvironmentGV
6107 KernelEnvironmentPtr);
6108 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6136 UI->eraseFromParent();
6144 int32_t TeamsReductionDataSize,
6145 int32_t TeamsReductionBufferLength) {
6150 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6154 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6160 const std::string DebugPrefix =
"_debug__";
6162 KernelName = KernelName.
drop_back(DebugPrefix.length());
6163 auto *KernelEnvironmentGV =
6165 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6166 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6168 KernelEnvironmentInitializer,
6169 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6171 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6173 KernelEnvironmentGV->setInitializer(NewInitializer);
6178 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6182 auto *KernelOp = dyn_cast<ConstantAsMetadata>(
Op->getOperand(0));
6183 if (!KernelOp || KernelOp->getValue() != &
Kernel)
6185 auto *Prop = dyn_cast<MDString>(
Op->getOperand(1));
6186 if (!Prop || Prop->getString() !=
Name)
6198 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->
getOperand(2));
6199 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6202 OldVal->getValue()->getType(),
6203 Min ? std::min(OldLimit,
Value) : std::max(OldLimit,
Value))));
6212 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6217std::pair<int32_t, int32_t>
6219 int32_t ThreadLimit =
6224 if (!Attr.isValid() || !Attr.isStringAttribute())
6225 return {0, ThreadLimit};
6228 if (!llvm::to_integer(UBStr, UB, 10))
6229 return {0, ThreadLimit};
6230 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6231 if (!llvm::to_integer(LBStr, LB, 10))
6237 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6238 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6239 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6241 return {0, ThreadLimit};
6251 llvm::utostr(LB) +
"," + llvm::utostr(UB));
6258std::pair<int32_t, int32_t>
6265 int32_t LB, int32_t UB) {
6270 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
6275void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
6290 assert(OutlinedFn &&
"The outlined function must exist if embedded");
6299Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
6305 "Named kernel already exists?");
6320 ? GenerateFunctionCallback(EntryFnName)
6326 if (!IsOffloadEntry)
6329 std::string EntryFnIDName =
6331 ? std::string(EntryFnName)
6335 EntryFnName, EntryFnIDName);
6342 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
6343 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
6344 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
6346 EntryInfo, EntryAddr, OutlinedFnID,
6348 return OutlinedFnID;
6371 bool IsStandAlone = !BodyGenCB;
6395 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6402 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
6407 omp::OMPRTL___tgt_target_data_begin_mapper);
6411 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
6412 if (isa<AllocaInst>(DeviceMap.second.second)) {
6449 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6466 emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
6476 emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
6482 emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
6493 bool IsGPUDistribute) {
6494 assert((IVSize == 32 || IVSize == 64) &&
6495 "IV size is not compatible with the omp runtime");
6497 if (IsGPUDistribute)
6499 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
6500 : omp::OMPRTL___kmpc_distribute_static_init_4u)
6501 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
6502 : omp::OMPRTL___kmpc_distribute_static_init_8u);
6504 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
6505 : omp::OMPRTL___kmpc_for_static_init_4u)
6506 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
6507 : omp::OMPRTL___kmpc_for_static_init_8u);
6514 assert((IVSize == 32 || IVSize == 64) &&
6515 "IV size is not compatible with the omp runtime");
6517 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
6518 : omp::OMPRTL___kmpc_dispatch_init_4u)
6519 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
6520 : omp::OMPRTL___kmpc_dispatch_init_8u);
6527 assert((IVSize == 32 || IVSize == 64) &&
6528 "IV size is not compatible with the omp runtime");
6530 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
6531 : omp::OMPRTL___kmpc_dispatch_next_4u)
6532 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
6533 : omp::OMPRTL___kmpc_dispatch_next_8u);
6540 assert((IVSize == 32 || IVSize == 64) &&
6541 "IV size is not compatible with the omp runtime");
6543 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
6544 : omp::OMPRTL___kmpc_dispatch_fini_4u)
6545 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
6546 : omp::OMPRTL___kmpc_dispatch_fini_8u);
6569 for (
auto &Arg : Inputs)
6570 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
6574 for (
auto &Arg : Inputs)
6575 ParameterTypes.
push_back(Arg->getType());
6584 auto OldInsertPoint = Builder.
saveIP();
6612 auto AllocaIP = Builder.
saveIP();
6617 const auto &ArgRange =
6619 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
6640 if (
auto *Const = dyn_cast<Constant>(Input))
6645 if (
auto *Instr = dyn_cast<Instruction>(
User))
6646 if (Instr->getFunction() == Func)
6647 Instr->replaceUsesOfWith(Input, InputCopy);
6653 for (
auto InArg :
zip(Inputs, ArgRange)) {
6654 Value *Input = std::get<0>(InArg);
6655 Argument &Arg = std::get<1>(InArg);
6656 Value *InputCopy =
nullptr;
6659 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP()));
6677 if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
6678 llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
6679 llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
6680 DeferredReplacement.
push_back(std::make_pair(Input, InputCopy));
6684 ReplaceValue(Input, InputCopy, Func);
6688 for (
auto Deferred : DeferredReplacement)
6689 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
6732 Type *TaskPtrTy = OMPBuilder.TaskPtr;
6733 Type *TaskTy = OMPBuilder.Task;
6738 ".omp_target_task_proxy_func",
6740 ProxyFn->getArg(0)->setName(
"thread.id");
6741 ProxyFn->getArg(1)->setName(
"task");
6747 bool HasShareds = StaleCI->
arg_size() > 1;
6753 "StaleCI with shareds should have exactly two arguments.");
6755 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
6756 assert(ArgStructAlloca &&
6757 "Unable to find the alloca instruction corresponding to arguments "
6758 "for extracted function");
6759 auto *ArgStructType =
6760 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6763 Builder.
CreateAlloca(ArgStructType,
nullptr,
"structArg");
6764 Value *TaskT = ProxyFn->getArg(1);
6765 Value *ThreadId = ProxyFn->getArg(0);
6766 Value *SharedsSize =
6767 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
6774 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
6777 Builder.
CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
6790 [&OMPBuilder, &Builder, &Inputs, &CBFunc,
6791 &ArgAccessorFuncCB](
StringRef EntryFnName) {
6793 CBFunc, ArgAccessorFuncCB);
6797 IsOffloadEntry, OutlinedFn, OutlinedFnID);
6899 TargetTaskAllocaBB->
begin());
6903 OI.
EntryBB = TargetTaskAllocaBB;
6909 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
6922 EmitTargetCallFallbackCB, Args, DeviceID,
6923 RTLoc, TargetTaskAllocaIP));
6932 HasNoWait](
Function &OutlinedFn)
mutable {
6934 "there must be a single user for the outlined function");
6937 bool HasShareds = StaleCI->
arg_size() > 1;
6941 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
6972 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
6973 assert(ArgStructAlloca &&
6974 "Unable to find the alloca instruction corresponding to arguments "
6975 "for extracted function");
6976 auto *ArgStructType =
6977 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6978 assert(ArgStructType &&
"Unable to find struct type corresponding to "
6979 "arguments for extracted function");
6996 TaskAllocFn, {Ident, ThreadID,
Flags,
6997 TaskSize, SharedsSize,
7040 }
else if (DepArray) {
7059 I->eraseFromParent();
7077 DeviceAddrCB, CustomMapperCB);
7091 auto &&EmitTargetCallFallbackCB =
7098 bool HasNoWait =
false;
7099 bool HasDependencies = Dependencies.size() > 0;
7100 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7104 if (!OutlinedFnID) {
7105 if (RequiresOuterTargetTask) {
7111 OutlinedFn,
nullptr, EmitTargetCallFallbackCB, KArgs,
7112 nullptr,
nullptr, AllocaIP, Dependencies,
7133 for (
auto V : NumTeams)
7135 for (
auto V : NumThreads)
7138 unsigned NumTargetItems =
Info.NumberOfPtrs;
7151 NumTeamsC, NumThreadsC, DynCGGroupMem,
7156 if (RequiresOuterTargetTask) {
7158 OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID,
7159 RTLoc, AllocaIP, Dependencies, HasNoWait));
7162 Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
7163 DeviceID, RTLoc, AllocaIP));
7187 OutlinedFn, OutlinedFnID, Args, CBFunc,
7195 NumThreads, Args, GenMapInfoCB, Dependencies);
7209 return OS.str().str();
7223 assert(Elem.second->getValueType() == Ty &&
7224 "OMP internal variable has different type than requested");
7240 GV->setAlignment(std::max(TypeAlign, PtrAlign));
7247Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
7248 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
7249 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
7260 return SizePtrToInt;
7265 std::string VarName) {
7273 return MaptypesArrayGlobal;
7278 unsigned NumOperands,
7287 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
7291 ArrI64Ty,
nullptr,
".offload_sizes");
7302 int64_t DeviceID,
unsigned NumOperands) {
7308 Value *ArgsBaseGEP =
7310 {Builder.getInt32(0), Builder.getInt32(0)});
7313 {Builder.getInt32(0), Builder.getInt32(0)});
7314 Value *ArgSizesGEP =
7316 {Builder.getInt32(0), Builder.getInt32(0)});
7322 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
7329 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
7330 "expected region end call to runtime only when end call is separate");
7332 auto VoidPtrTy = UnqualPtrTy;
7333 auto VoidPtrPtrTy = UnqualPtrTy;
7335 auto Int64PtrTy = UnqualPtrTy;
7337 if (!
Info.NumberOfPtrs) {
7349 Info.RTArgs.BasePointersArray,
7360 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
7361 :
Info.RTArgs.MapTypesArray,
7367 if (!
Info.EmitDebug)
7376 if (!
Info.HasMapper)
7401 "struct.descriptor_dim");
7403 enum { OffsetFD = 0, CountFD, StrideFD };
7407 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
7410 if (NonContigInfo.
Dims[
I] == 1)
7417 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
7418 unsigned RevIdx = EE -
II - 1;
7421 {Builder.getInt64(0), Builder.getInt64(II)});
7425 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
7430 NonContigInfo.
Counts[L][RevIdx], CountLVal,
7435 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
7444 Info.RTArgs.PointersArray, 0,
I);
7458 Info.clearArrayInfo();
7461 if (
Info.NumberOfPtrs == 0)
7471 PointerArrayType,
nullptr,
".offload_baseptrs");
7474 PointerArrayType,
nullptr,
".offload_ptrs");
7476 PointerArrayType,
nullptr,
".offload_mappers");
7477 Info.RTArgs.MappersArray = MappersArray;
7484 ConstantInt::get(Int64Ty, 0));
7486 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
7487 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
7488 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
7489 if (IsNonContiguous &&
7490 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7492 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
7500 RuntimeSizes.
set(
I);
7503 if (RuntimeSizes.
all()) {
7506 SizeArrayType,
nullptr,
".offload_sizes");
7512 auto *SizesArrayGbl =
7517 if (!RuntimeSizes.
any()) {
7518 Info.RTArgs.SizesArray = SizesArrayGbl;
7524 SizeArrayType,
nullptr,
".offload_sizes");
7529 SizesArrayGbl, OffloadSizeAlign,
7534 Info.RTArgs.SizesArray = Buffer;
7542 for (
auto mapFlag : CombinedInfo.
Types)
7544 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7548 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
7553 auto *MapNamesArrayGbl =
7555 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
7556 Info.EmitDebug =
true;
7558 Info.RTArgs.MapNamesArray =
7560 Info.EmitDebug =
false;
7565 if (
Info.separateBeginEndCalls()) {
7566 bool EndMapTypesDiffer =
false;
7568 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7569 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
7570 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7571 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
7572 EndMapTypesDiffer =
true;
7575 if (EndMapTypesDiffer) {
7577 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
7582 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
7590 if (
Info.requiresDevicePointerInfo()) {
7597 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
7599 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
7601 DeviceAddrCB(
I, BP);
7613 if (RuntimeSizes.
test(
I)) {
7627 if (
Value *CustomMFunc = CustomMapperCB(
I))
7631 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
7637 Info.NumberOfPtrs == 0)
7682 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
7683 auto CondConstant = CI->getSExtValue();
7713bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
7717 "Unexpected Atomic Ordering.");
7781 assert(
X.Var->getType()->isPointerTy() &&
7782 "OMP Atomic expects a pointer to target memory");
7783 Type *XElemTy =
X.ElemTy;
7786 "OMP atomic read expected a scalar type");
7788 Value *XRead =
nullptr;
7794 XRead = cast<Value>(XLD);
7808 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
7820 assert(
X.Var->getType()->isPointerTy() &&
7821 "OMP Atomic expects a pointer to target memory");
7822 Type *XElemTy =
X.ElemTy;
7825 "OMP atomic write expected a scalar type");
7840 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
7853 Type *XTy =
X.Var->getType();
7855 "OMP Atomic expects a pointer to target memory");
7856 Type *XElemTy =
X.ElemTy;
7859 "OMP atomic update expected a scalar type");
7862 "OpenMP atomic does not support LT or GT operations");
7865 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
7866 X.IsVolatile, IsXBinopExpr);
7867 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
7872Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
7904std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
7907 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
7910 bool emitRMWOp =
false;
7918 emitRMWOp = XElemTy;
7921 emitRMWOp = (IsXBinopExpr && XElemTy);
7928 std::pair<Value *, Value *> Res;
7935 Res.second = Res.first;
7937 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
7955 X->getName() +
".atomic.cont");
7959 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
7962 PHI->addIncoming(OldVal, CurBB);
7968 X->getName() +
".atomic.fltCast");
7971 X->getName() +
".atomic.ptrCast");
7982 Result->setVolatile(VolatileX);
7988 Res.first = OldExprVal;
8008 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
8013 Type *XTy =
X.Var->getType();
8015 "OMP Atomic expects a pointer to target memory");
8016 Type *XElemTy =
X.ElemTy;
8019 "OMP atomic capture expected a scalar type");
8021 "OpenMP atomic does not support LT or GT operations");
8027 std::pair<Value *, Value *> Result =
8028 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
8029 X.IsVolatile, IsXBinopExpr);
8031 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
8034 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
8046 IsPostfixUpdate, IsFailOnly, Failure);
8058 assert(
X.Var->getType()->isPointerTy() &&
8059 "OMP atomic expects a pointer to target memory");
8062 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
8063 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
8068 if (
Op == OMPAtomicCompareOp::EQ) {
8087 "OldValue and V must be of same type");
8088 if (IsPostfixUpdate) {
8106 CurBBTI,
X.Var->getName() +
".atomic.exit");
8126 Value *CapturedValue =
8134 assert(R.Var->getType()->isPointerTy() &&
8135 "r.var must be of pointer type");
8136 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
8139 Value *ResultCast = R.IsSigned
8145 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
8146 "Op should be either max or min at this point");
8147 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
8187 Value *CapturedValue =
nullptr;
8188 if (IsPostfixUpdate) {
8189 CapturedValue = OldValue;
8221 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
8268 bool SubClausesPresent =
8269 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
8272 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
8273 "if lowerbound is non-null, then upperbound must also be non-null "
8274 "for bounds on num_teams");
8276 if (NumTeamsUpper ==
nullptr)
8279 if (NumTeamsLower ==
nullptr)
8280 NumTeamsLower = NumTeamsUpper;
8284 "argument to if clause must be an integer value");
8289 ConstantInt::get(IfExpr->
getType(), 0));
8298 if (ThreadLimit ==
nullptr)
8304 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
8309 BodyGenCB(AllocaIP, CodeGenIP);
8320 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
8322 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
8324 auto HostPostOutlineCB = [
this, Ident,
8325 ToBeDeleted](
Function &OutlinedFn)
mutable {
8330 "there must be a single user for the outlined function");
8335 "Outlined function must have two or three arguments only");
8337 bool HasShared = OutlinedFn.
arg_size() == 3;
8345 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
8346 "outlined function.");
8353 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
8357 I->eraseFromParent();
8372 std::string VarName) {
8381 return MapNamesArrayGlobal;
8386void OpenMPIRBuilder::initializeTypes(
Module &M) {
8389#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
8390#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
8391 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
8392 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
8393#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
8394 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
8395 VarName##Ptr = PointerType::getUnqual(VarName);
8396#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
8397 T = StructType::getTypeByName(Ctx, StructName); \
8399 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
8401 VarName##Ptr = PointerType::getUnqual(T);
8402#include "llvm/Frontend/OpenMP/OMPKinds.def"
8413 while (!Worklist.
empty()) {
8417 if (BlockSet.
insert(SuccBB).second)
8429 "omp_offloading_entries");
8453 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
8472 auto &&GetMDInt = [
this](
unsigned V) {
8480 auto &&TargetRegionMetadataEmitter =
8481 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
8496 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
8497 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
8498 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
8499 GetMDInt(E.getOrder())};
8502 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
8511 auto &&DeviceGlobalVarMetadataEmitter =
8512 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
8522 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
8523 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
8527 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
8534 DeviceGlobalVarMetadataEmitter);
8536 for (
const auto &E : OrderedEntries) {
8537 assert(E.first &&
"All ordered entries must exist!");
8538 if (
const auto *CE =
8539 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
8541 if (!CE->getID() || !CE->getAddress()) {
8553 }
else if (
const auto *CE =
dyn_cast<
8564 if (!CE->getAddress()) {
8569 if (CE->getVarSize() == 0)
8575 "Declaret target link address is set.");
8578 if (!CE->getAddress()) {
8590 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
8591 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
8599 Flags, CE->getLinkage(), CE->getVarName());
8602 Flags, CE->getLinkage());
8623 unsigned FileID,
unsigned Line,
unsigned Count) {
8626 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
8633 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
8636 EntryInfo.
Line, NewCount);
8643 auto FileIDInfo = CallBack();
8646 "getTargetEntryUniqueInfo, error message: " +
8652 std::get<1>(FileIDInfo));
8658 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8660 !(Remain & 1); Remain = Remain >> 1)
8678 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8680 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8687 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8688 Flags |= MemberOfFlag;
8694 bool IsDeclaration,
bool IsExternallyVisible,
8696 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
8697 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
8698 std::function<
Constant *()> GlobalInitializer,
8714 if (!IsExternallyVisible)
8716 OS <<
"_decl_tgt_ref_ptr";
8725 auto *
GV = cast<GlobalVariable>(
Ptr);
8729 if (GlobalInitializer)
8730 GV->setInitializer(GlobalInitializer());
8736 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
8737 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
8738 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
8741 return cast<Constant>(
Ptr);
8750 bool IsDeclaration,
bool IsExternallyVisible,
8752 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
8753 std::vector<Triple> TargetTriple,
8754 std::function<
Constant *()> GlobalInitializer,
8771 VarName = MangledName;
8779 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
8795 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
8796 GvAddrRef->setConstant(
true);
8798 GvAddrRef->setInitializer(
Addr);
8799 GeneratedRefs.push_back(GvAddrRef);
8809 VarName = (
Addr) ?
Addr->getName() :
"";
8813 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
8814 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
8815 LlvmPtrTy, GlobalInitializer, VariableLinkage);
8816 VarName = (
Addr) ?
Addr->getName() :
"";
8837 auto &&GetMDInt = [MN](
unsigned Idx) {
8838 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
8839 return cast<ConstantInt>(V->getValue())->getZExtValue();
8842 auto &&GetMDString = [MN](
unsigned Idx) {
8843 auto *V = cast<MDString>(MN->getOperand(
Idx));
8844 return V->getString();
8847 switch (GetMDInt(0)) {
8875 if (HostFilePath.
empty())
8879 if (std::error_code Err = Buf.getError()) {
8881 "OpenMPIRBuilder: " +
8889 if (std::error_code Err =
M.getError()) {
8891 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
8903 return OffloadEntriesTargetRegion.empty() &&
8904 OffloadEntriesDeviceGlobalVar.empty();
8907unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
8909 auto It = OffloadEntriesTargetRegionCount.find(
8910 getTargetRegionEntryCountKey(EntryInfo));
8911 if (It == OffloadEntriesTargetRegionCount.end())
8916void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
8918 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
8919 EntryInfo.
Count + 1;
8925 OffloadEntriesTargetRegion[EntryInfo] =
8927 OMPTargetRegionEntryTargetRegion);
8928 ++OffloadingEntriesNum;
8934 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
8937 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
8943 if (!hasTargetRegionEntryInfo(EntryInfo)) {
8946 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
8947 Entry.setAddress(
Addr);
8949 Entry.setFlags(
Flags);
8952 hasTargetRegionEntryInfo(EntryInfo,
true))
8954 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
8955 "Target region entry already registered!");
8957 OffloadEntriesTargetRegion[EntryInfo] = Entry;
8958 ++OffloadingEntriesNum;
8960 incrementTargetRegionEntryInfoCount(EntryInfo);
8967 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
8969 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
8970 if (It == OffloadEntriesTargetRegion.end()) {
8974 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
8982 for (
const auto &It : OffloadEntriesTargetRegion) {
8983 Action(It.first, It.second);
8989 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order,
Flags);
8990 ++OffloadingEntriesNum;
8998 if (!hasDeviceGlobalVarEntryInfo(VarName))
9000 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9001 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
9002 if (Entry.getVarSize() == 0) {
9003 Entry.setVarSize(VarSize);
9004 Entry.setLinkage(Linkage);
9008 Entry.setVarSize(VarSize);
9009 Entry.setLinkage(Linkage);
9010 Entry.setAddress(
Addr);
9012 if (hasDeviceGlobalVarEntryInfo(VarName)) {
9013 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9014 assert(Entry.isValid() && Entry.getFlags() ==
Flags &&
9015 "Entry not initialized!");
9016 if (Entry.getVarSize() == 0) {
9017 Entry.setVarSize(VarSize);
9018 Entry.setLinkage(Linkage);
9023 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
9027 OffloadEntriesDeviceGlobalVar.try_emplace(
9028 VarName, OffloadingEntriesNum,
Addr, VarSize,
Flags, Linkage,
"");
9029 ++OffloadingEntriesNum;
9036 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
9037 Action(E.getKey(), E.getValue());
9044void CanonicalLoopInfo::collectControlBlocks(
9051 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
9063void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
9067 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
9075void CanonicalLoopInfo::mapIndVar(
9085 for (
Use &U : OldIV->
uses()) {
9086 auto *
User = dyn_cast<Instruction>(U.getUser());
9089 if (
User->getParent() == getCond())
9091 if (
User->getParent() == getLatch())
9097 Value *NewIV = Updater(OldIV);
9100 for (
Use *U : ReplacableUses)
9121 "Preheader must terminate with unconditional branch");
9123 "Preheader must jump to header");
9126 assert(isa<BranchInst>(Header->getTerminator()) &&
9127 "Header must terminate with unconditional branch");
9128 assert(Header->getSingleSuccessor() ==
Cond &&
9129 "Header must jump to exiting block");
9132 assert(
Cond->getSinglePredecessor() == Header &&
9133 "Exiting block only reachable from header");
9135 assert(isa<BranchInst>(
Cond->getTerminator()) &&
9136 "Exiting block must terminate with conditional branch");
9138 "Exiting block must have two successors");
9139 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
9140 "Exiting block's first successor jump to the body");
9141 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
9142 "Exiting block's second successor must exit the loop");
9146 "Body only reachable from exiting block");
9151 "Latch must terminate with unconditional branch");
9159 assert(isa<BranchInst>(Exit->getTerminator()) &&
9160 "Exit block must terminate with unconditional branch");
9162 "Exit block must jump to after block");
9166 "After block only reachable from exit block");
9170 assert(IndVar &&
"Canonical induction variable not found?");
9172 "Induction variable must be an integer");
9174 "Induction variable must be a PHI in the loop header");
9175 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
9177 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
9178 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
9180 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
9182 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
9183 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
9184 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
9187 Value *TripCount = getTripCount();
9188 assert(TripCount &&
"Loop trip count not found?");
9190 "Trip count and induction variable must have the same type");
9192 auto *CmpI = cast<CmpInst>(&
Cond->front());
9194 "Exit condition must be a signed less-than comparison");
9196 "Exit condition must compare the induction variable");
9198 "Exit condition must compare with the trip count");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Function * createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI)
Create an entry point for a target task with the following.
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static void emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, Constant *OutlinedFnID, ArrayRef< int32_t > NumTeams, ArrayRef< int32_t > NumThreads, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector< llvm::OpenMPIRBuilder::DependData > Dependencies={})
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static ConstantInt * getFalse(LLVMContext &Context)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Analysis pass which computes a DominatorTree.
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
Value * CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS, const Twine &Name="")
Return the i64 difference between two pointer values, dividing out the size of the pointed-to objects...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
IntegerType * getIndexTy(const DataLayout &DL, unsigned AddrSpace)
Fetch the type of an integer that should be used to index GEP operations within AddressSpace.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name="")
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
InsertPoint saveIP() const
Returns the current insert point.
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static bool classof(const Value *V)
Methods for support type inquiry through isa, cast, and dyn_cast:
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
LLVMContext & getContext() const
Get the global data context.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
StringRef getName() const
Get a short "name" for the module.
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
void setGridValue(omp::GV G)
StringRef separator() const
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
omp::GV getGridValue() const
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
InsertPointTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, bool HasDistribute=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
void emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
InsertPointTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, ArrayRef< int32_t > NumTeams, ArrayRef< int32_t > NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, SmallVector< DependData > Dependencies={})
Generator for '#omp target'.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
InsertPointTy emitTargetTask(Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP, SmallVector< OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
Generate a target-task for the target construct.
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_GENERIC
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
CodeGenOptLevel
Code generation optimization level.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * DynCGGroupMem
The size of the dynamic shared memory.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
Data structure to contain the information needed to uniquely identify a target entry.
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
unsigned GV_Warp_Size
The default value of maximum number of threads in a worker warp.