36 #define DEBUG_TYPE "openmp-opt" 54 "openmp-hide-memory-transfer-latency",
55 cl::desc(
"[WIP] Tries to hide the latency of host to device memory" 59 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
60 "Number of OpenMP runtime calls deduplicated");
61 STATISTIC(NumOpenMPParallelRegionsDeleted,
62 "Number of OpenMP parallel regions deleted");
63 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
64 "Number of OpenMP runtime functions identified");
65 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
66 "Number of OpenMP runtime function uses identified");
68 "Number of OpenMP target region entry points (=kernels) identified");
70 NumOpenMPParallelRegionsReplacedInGPUStateMachine,
71 "Number of OpenMP parallel regions replaced with ID in GPU state machines");
73 "Number of OpenMP parallel regions merged");
92 OMPBuilder.initialize();
93 initializeRuntimeFunctions();
94 initializeInternalControlVars();
98 struct InternalControlVarInfo {
125 struct RuntimeFunctionInfo {
149 void clearUsesMap() { UsesMap.
clear(); }
152 operator bool()
const {
return Declaration; }
155 UseVector &getOrCreateUseVector(
Function *
F) {
156 std::shared_ptr<UseVector> &UV = UsesMap[
F];
158 UV = std::make_shared<UseVector>();
164 const UseVector *getUseVector(
Function &
F)
const {
165 auto I = UsesMap.find(&
F);
166 if (
I != UsesMap.end())
167 return I->second.get();
172 size_t getNumFunctionsWithUses()
const {
return UsesMap.size(); }
176 size_t getNumArgs()
const {
return ArgumentTypes.
size(); }
194 UseVector &UV = getOrCreateUseVector(
F);
204 while (!ToBeDeleted.
empty()) {
222 RuntimeFunction::OMPRTL___last>
227 InternalControlVar::ICV___last>
232 void initializeInternalControlVars() {
233 #define ICV_RT_SET(_Name, RTL) \ 235 auto &ICV = ICVs[_Name]; \ 238 #define ICV_RT_GET(Name, RTL) \ 240 auto &ICV = ICVs[Name]; \ 243 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 245 auto &ICV = ICVs[Enum]; \ 248 ICV.InitKind = Init; \ 249 ICV.EnvVarName = _EnvVarName; \ 250 switch (ICV.InitKind) { \ 251 case ICV_IMPLEMENTATION_DEFINED: \ 252 ICV.InitValue = nullptr; \ 255 ICV.InitValue = ConstantInt::get( \ 256 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 259 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 265 #include "llvm/Frontend/OpenMP/OMPKinds.def" 271 static bool declMatchesRTFTypes(
Function *
F,
Type *RTFRetType,
278 if (
F->getReturnType() != RTFRetType)
280 if (
F->arg_size() != RTFArgTypes.
size())
283 auto RTFTyIt = RTFArgTypes.
begin();
285 if (
Arg.getType() != *RTFTyIt)
295 unsigned collectUses(RuntimeFunctionInfo &RFI,
bool CollectStats =
true) {
296 unsigned NumUses = 0;
297 if (!RFI.Declaration)
302 NumOpenMPRuntimeFunctionsIdentified += 1;
303 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
307 for (
Use &U : RFI.Declaration->uses()) {
308 if (
Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
309 if (ModuleSlice.count(UserI->getFunction())) {
310 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
314 RFI.getOrCreateUseVector(
nullptr).push_back(&U);
323 auto &RFI = RFIs[RTF];
325 collectUses(RFI,
false);
329 void recollectUses() {
330 for (
int Idx = 0; Idx < RFIs.size(); ++Idx)
331 recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
336 void initializeRuntimeFunctions() {
340 #define OMP_TYPE(VarName, ...) \ 341 Type *VarName = OMPBuilder.VarName; \ 344 #define OMP_ARRAY_TYPE(VarName, ...) \ 345 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 347 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 348 (void)VarName##PtrTy; 350 #define OMP_FUNCTION_TYPE(VarName, ...) \ 351 FunctionType *VarName = OMPBuilder.VarName; \ 353 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 356 #define OMP_STRUCT_TYPE(VarName, ...) \ 357 StructType *VarName = OMPBuilder.VarName; \ 359 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 362 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 364 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 365 Function *F = M.getFunction(_Name); \ 366 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 367 auto &RFI = RFIs[_Enum]; \ 370 RFI.IsVarArg = _IsVarArg; \ 371 RFI.ReturnType = OMPBuilder._ReturnType; \ 372 RFI.ArgumentTypes = std::move(ArgsTypes); \ 373 RFI.Declaration = F; \ 374 unsigned NumUses = collectUses(RFI); \ 377 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 379 if (RFI.Declaration) \ 380 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 381 << RFI.getNumFunctionsWithUses() \ 382 << " different functions.\n"; \ 386 #include "llvm/Frontend/OpenMP/OMPKinds.def" 397 struct OffloadArray {
405 OffloadArray() =
default;
412 if (!Array.getAllocatedType()->isArrayTy())
415 if (!getValues(Array, Before))
418 this->Array = &Array;
422 static const unsigned DeviceIDArgNum = 1;
423 static const unsigned BasePtrsArgNum = 3;
424 static const unsigned PtrsArgNum = 4;
425 static const unsigned SizesArgNum = 5;
433 const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
434 StoredValues.
assign(NumValues,
nullptr);
435 LastAccesses.
assign(NumValues,
nullptr);
443 const DataLayout &
DL = Array.getModule()->getDataLayout();
444 const unsigned int PointerSize =
DL.getPointerSize();
450 if (!isa<StoreInst>(&
I))
453 auto *S = cast<StoreInst>(&
I);
458 int64_t Idx =
Offset / PointerSize;
460 LastAccesses[Idx] = S;
470 const unsigned NumValues = StoredValues.
size();
471 for (
unsigned I = 0;
I < NumValues; ++
I) {
472 if (!StoredValues[
I] || !LastAccesses[
I])
482 using OptimizationRemarkGetter =
486 OptimizationRemarkGetter OREGetter,
487 OMPInformationCache &OMPInfoCache,
Attributor &A)
488 : M(*(*SCC.
begin())->
getParent()), SCC(SCC), CGUpdater(CGUpdater),
489 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
492 bool remarksEnabled() {
493 auto &Ctx = M.getContext();
494 return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(
DEBUG_TYPE);
502 bool Changed =
false;
505 <<
" functions in a slice with " 506 << OMPInfoCache.ModuleSlice.size() <<
" functions\n");
513 Changed |= rewriteDeviceCodeStateMachine();
515 Changed |= runAttributor();
518 OMPInfoCache.recollectUses();
520 Changed |= deleteParallelRegions();
522 Changed |= hideMemTransfersLatency();
523 if (remarksEnabled())
524 analysisGlobalization();
525 Changed |= deduplicateRuntimeCalls();
527 if (mergeParallelRegions()) {
528 deduplicateRuntimeCalls();
538 void printICVs()
const {
542 for (
Function *
F : OMPInfoCache.ModuleSlice) {
543 for (
auto ICV : ICVs) {
544 auto ICVInfo = OMPInfoCache.ICVs[ICV];
546 return OR <<
"OpenMP ICV " <<
ore::NV(
"OpenMPICV", ICVInfo.Name)
548 << (ICVInfo.InitValue
549 ? ICVInfo.InitValue->getValue().toString(10,
true)
550 :
"IMPLEMENTATION_DEFINED");
553 emitRemarkOnFunction(
F,
"OpenMPICVTracker",
Remark);
559 void printKernels()
const {
561 if (!OMPInfoCache.Kernels.count(
F))
565 return OR <<
"OpenMP GPU kernel " 566 <<
ore::NV(
"OpenMPGPUKernel",
F->getName()) <<
"\n";
569 emitRemarkOnFunction(
F,
"OpenMPGPU",
Remark);
575 static CallInst *getCallIfRegularCall(
576 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI =
nullptr) {
586 static CallInst *getCallIfRegularCall(
587 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI =
nullptr) {
588 CallInst *CI = dyn_cast<CallInst>(&V);
597 bool mergeParallelRegions() {
598 const unsigned CallbackCalleeOperand = 2;
599 const unsigned CallbackFirstArgOperand = 3;
603 OMPInformationCache::RuntimeFunctionInfo &RFI =
604 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
606 if (!RFI.Declaration)
610 OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
611 OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
612 OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
615 bool Changed =
false;
621 BasicBlock *StartBB =
nullptr, *EndBB =
nullptr;
622 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
626 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
627 assert(StartBB !=
nullptr &&
"StartBB should not be null");
629 assert(EndBB !=
nullptr &&
"EndBB should not be null");
630 EndBB->getTerminator()->setSuccessor(0, CGEndBB);
633 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
Value &,
634 Value &Inner,
Value *&ReplacementValue) -> InsertPointTy {
635 ReplacementValue = &Inner;
639 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
643 auto CreateSequentialRegion = [&](
Function *OuterFn,
655 SplitBlock(ParentBB, SeqStartI, DT, LI,
nullptr,
"seq.par.merged");
658 "Expected a different CFG");
662 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
666 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
667 assert(SeqStartBB !=
nullptr &&
"SeqStartBB should not be null");
669 assert(SeqEndBB !=
nullptr &&
"SeqEndBB should not be null");
672 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
678 for (
User *Usr :
I.users()) {
686 OutsideUsers.
insert(&UsrI);
689 if (OutsideUsers.
empty())
696 I.getType(),
DL.getAllocaAddrSpace(),
nullptr,
697 I.getName() +
".seq.output.alloc", &OuterFn->
front().
front());
701 new StoreInst(&
I, AllocaI, SeqStartBB->getTerminator());
707 I.getName() +
".seq.output.load", UsrI);
713 InsertPointTy(ParentBB, ParentBB->
end()),
DL);
714 InsertPointTy SeqAfterIP =
715 OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
717 OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
735 assert(MergableCIs.
size() > 1 &&
"Assumed multiple mergable CIs");
738 OR <<
"Parallel region at " 739 <<
ore::NV(
"OpenMPParallelMergeFront",
740 MergableCIs.
front()->getDebugLoc())
741 <<
" merged with parallel regions at ";
743 OR <<
ore::NV(
"OpenMPParallelMerge", CI->getDebugLoc());
744 if (CI != MergableCIs.
back())
750 emitRemark<OptimizationRemark>(MergableCIs.
front(),
751 "OpenMPParallelRegionMerging",
Remark);
755 <<
" parallel regions in " << OriginalFn->getName()
759 EndBB =
SplitBlock(BB, MergableCIs.
back()->getNextNode(), DT, LI);
761 SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
765 assert(BB->getUniqueSuccessor() == StartBB &&
"Expected a different CFG");
766 const DebugLoc DL = BB->getTerminator()->getDebugLoc();
771 for (
auto *It = MergableCIs.
begin(), *End = MergableCIs.
end() - 1;
780 CreateSequentialRegion(OriginalFn, BB, ForkCI->
getNextNode(),
787 &OriginalFn->getEntryBlock(),
788 OriginalFn->getEntryBlock().getFirstInsertionPt());
791 InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
792 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
nullptr,
nullptr,
793 OMP_PROC_BIND_default,
false);
797 OMPInfoCache.OMPBuilder.finalize(
true);
804 for (
auto *CI : MergableCIs) {
806 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
808 cast<FunctionType>(
Callee->getType()->getPointerElementType());
812 for (
unsigned U = CallbackFirstArgOperand,
E = CI->getNumArgOperands();
814 Args.push_back(CI->getArgOperand(U));
817 if (CI->getDebugLoc())
821 for (
unsigned U = CallbackFirstArgOperand,
E = CI->getNumArgOperands();
823 for (
const Attribute &A : CI->getAttributes().getParamAttributes(U))
825 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
828 if (CI != MergableCIs.back()) {
831 OMPInfoCache.OMPBuilder.createBarrier(
838 return OR <<
"Parallel region at " 839 <<
ore::NV(
"OpenMPParallelMerge", CI->getDebugLoc())
841 <<
ore::NV(
"OpenMPParallelMergeFront",
842 MergableCIs.front()->getDebugLoc());
844 if (CI != MergableCIs.front())
845 emitRemark<OptimizationRemark>(CI,
"OpenMPParallelRegionMerging",
848 CI->eraseFromParent();
851 assert(OutlinedFn != OriginalFn &&
"Outlining failed");
855 NumOpenMPParallelRegionsMerged += MergableCIs.size();
863 CallInst *CI = getCallIfRegularCall(U, &RFI);
870 RFI.foreachUse(SCC, DetectPRsCB);
876 for (
auto &It : BB2PRMap) {
877 auto &CIs = It.getSecond();
892 auto IsMergable = [&](
Instruction &
I,
bool IsBeforeMergableRegion) {
895 if (
I.isTerminator())
898 if (!isa<CallInst>(&
I))
902 if (IsBeforeMergableRegion) {
911 for (
const auto &RFI : UnmergableCallsInfo) {
912 if (CalledFunction == RFI.Declaration)
920 if (!isa<IntrinsicInst>(CI))
927 for (
auto It = BB->
begin(), End = BB->
end(); It != End;) {
937 if (IsMergable(
I, MergableCIs.
empty()))
942 for (; It != End; ++It) {
944 if (CIs.count(&SkipI)) {
946 <<
" due to " <<
I <<
"\n");
953 if (MergableCIs.
size() > 1) {
954 MergableCIsVector.
push_back(MergableCIs);
956 <<
" parallel regions in block " << BB->
getName()
964 if (!MergableCIsVector.
empty()) {
967 for (
auto &MergableCIs : MergableCIsVector)
968 Merge(MergableCIs, BB);
975 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
976 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
977 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
978 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
985 bool deleteParallelRegions() {
986 const unsigned CallbackCalleeOperand = 2;
988 OMPInformationCache::RuntimeFunctionInfo &RFI =
989 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
991 if (!RFI.Declaration)
994 bool Changed =
false;
996 CallInst *CI = getCallIfRegularCall(U);
999 auto *Fn = dyn_cast<Function>(
1003 if (!Fn->onlyReadsMemory())
1005 if (!Fn->hasFnAttribute(Attribute::WillReturn))
1012 return OR <<
"Parallel region in " 1016 emitRemark<OptimizationRemark>(CI,
"OpenMPParallelRegionDeletion",
1022 ++NumOpenMPParallelRegionsDeleted;
1026 RFI.foreachUse(SCC, DeleteCallCB);
1032 bool deduplicateRuntimeCalls() {
1033 bool Changed =
false;
1036 OMPRTL_omp_get_num_threads,
1037 OMPRTL_omp_in_parallel,
1038 OMPRTL_omp_get_cancellation,
1039 OMPRTL_omp_get_thread_limit,
1040 OMPRTL_omp_get_supported_active_levels,
1041 OMPRTL_omp_get_level,
1042 OMPRTL_omp_get_ancestor_thread_num,
1043 OMPRTL_omp_get_team_size,
1044 OMPRTL_omp_get_active_level,
1045 OMPRTL_omp_in_final,
1046 OMPRTL_omp_get_proc_bind,
1047 OMPRTL_omp_get_num_places,
1048 OMPRTL_omp_get_num_procs,
1049 OMPRTL_omp_get_place_num,
1050 OMPRTL_omp_get_partition_num_places,
1051 OMPRTL_omp_get_partition_place_nums};
1055 collectGlobalThreadIdArguments(GTIdArgs);
1057 <<
" global thread ID arguments\n");
1060 for (
auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
1061 Changed |= deduplicateRuntimeCalls(
1062 *
F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1066 Value *GTIdArg =
nullptr;
1072 Changed |= deduplicateRuntimeCalls(
1073 *
F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
1085 bool hideMemTransfersLatency() {
1086 auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1087 bool Changed =
false;
1088 auto SplitMemTransfers = [&](
Use &U,
Function &Decl) {
1089 auto *RTCall = getCallIfRegularCall(U, &RFI);
1093 OffloadArray OffloadArrays[3];
1094 if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
1097 LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
1100 bool WasSplit =
false;
1101 Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1102 if (WaitMovementPoint)
1103 WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1105 Changed |= WasSplit;
1108 RFI.foreachUse(SCC, SplitMemTransfers);
1113 void analysisGlobalization() {
1115 OMPRTL___kmpc_data_sharing_coalesced_push_stack,
1116 OMPRTL___kmpc_data_sharing_push_stack};
1118 for (
const auto GlobalizationCallID : GlobalizationRuntimeIDs) {
1119 auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID];
1121 auto CheckGlobalization = [&](
Use &U,
Function &Decl) {
1122 if (
CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1125 <<
"Found thread data sharing on the GPU. " 1126 <<
"Expect degraded performance due to data globalization.";
1128 emitRemark<OptimizationRemarkAnalysis>(CI,
"OpenMPGlobalization",
1135 RFI.foreachUse(SCC, CheckGlobalization);
1141 bool getValuesInOffloadArrays(
CallInst &RuntimeCall,
1143 assert(OAs.
size() == 3 &&
"Need space for three offload arrays!");
1153 Value *BasePtrsArg =
1162 if (!isa<AllocaInst>(V))
1164 auto *BasePtrsArray = cast<AllocaInst>(V);
1165 if (!OAs[0].
initialize(*BasePtrsArray, RuntimeCall))
1170 if (!isa<AllocaInst>(V))
1172 auto *PtrsArray = cast<AllocaInst>(V);
1173 if (!OAs[1].
initialize(*PtrsArray, RuntimeCall))
1179 if (isa<GlobalValue>(V))
1180 return isa<Constant>(V);
1181 if (!isa<AllocaInst>(V))
1184 auto *SizesArray = cast<AllocaInst>(V);
1185 if (!OAs[2].
initialize(*SizesArray, RuntimeCall))
1196 assert(OAs.
size() == 3 &&
"There are three offload arrays to debug!");
1199 std::string ValuesStr;
1201 std::string Separator =
" --- ";
1203 for (
auto *BP : OAs[0].StoredValues) {
1210 for (
auto *
P : OAs[1].StoredValues) {
1217 for (
auto *S : OAs[2].StoredValues) {
1231 bool IsWorthIt =
false;
1254 bool splitTargetDataBeginRTC(
CallInst &RuntimeCall,
1259 auto &
IRBuilder = OMPInfoCache.OMPBuilder;
1261 Instruction *FirstInst = &(
F->getEntryBlock().front());
1263 IRBuilder.AsyncInfo,
F->getAddressSpace(),
"handle", FirstInst);
1269 M, OMPRTL___tgt_target_data_begin_mapper_issue);
1273 for (
auto &
Arg : RuntimeCall.
args())
1275 Args.push_back(Handle);
1284 M, OMPRTL___tgt_target_data_begin_mapper_wait);
1286 Value *WaitParams[2] = {
1288 OffloadArray::DeviceIDArgNum),
1296 static Value *combinedIdentStruct(
Value *CurrentIdent,
Value *NextIdent,
1297 bool GlobalOnly,
bool &SingleChoice) {
1298 if (CurrentIdent == NextIdent)
1299 return CurrentIdent;
1303 if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1304 SingleChoice = !CurrentIdent;
1316 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
1318 bool SingleChoice =
true;
1319 Value *Ident =
nullptr;
1320 auto CombineIdentStruct = [&](
Use &U,
Function &Caller) {
1321 CallInst *CI = getCallIfRegularCall(U, &RFI);
1322 if (!CI || &
F != &Caller)
1325 true, SingleChoice);
1328 RFI.foreachUse(SCC, CombineIdentStruct);
1330 if (!Ident || !SingleChoice) {
1333 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
1335 &
F.getEntryBlock(),
F.getEntryBlock().begin()));
1338 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
1339 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1346 bool deduplicateRuntimeCalls(
Function &
F,
1347 OMPInformationCache::RuntimeFunctionInfo &RFI,
1348 Value *ReplVal =
nullptr) {
1349 auto *UV = RFI.getUseVector(
F);
1350 if (!UV || UV->size() + (ReplVal !=
nullptr) < 2)
1354 dbgs() <<
TAG <<
"Deduplicate " << UV->size() <<
" uses of " << RFI.Name
1355 << (ReplVal ?
" with an existing value\n" :
"\n") <<
"\n");
1357 assert((!ReplVal || (isa<Argument>(ReplVal) &&
1358 cast<Argument>(ReplVal)->
getParent() == &
F)) &&
1359 "Unexpected replacement value!");
1362 auto CanBeMoved = [
this](
CallBase &CB) {
1363 unsigned NumArgs = CB.getNumArgOperands();
1366 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1368 for (
unsigned u = 1; u < NumArgs; ++u)
1369 if (isa<Instruction>(CB.getArgOperand(u)))
1376 if (
CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1377 if (!CanBeMoved(*CI))
1381 auto newLoc = &*
F.getEntryBlock().getFirstInsertionPt();
1382 return OR <<
"OpenMP runtime call " 1383 <<
ore::NV(
"OpenMPOptRuntime", RFI.Name) <<
" moved to " 1384 <<
ore::NV(
"OpenMPRuntimeMoves", newLoc->getDebugLoc());
1386 emitRemark<OptimizationRemark>(CI,
"OpenMPRuntimeCodeMotion",
Remark);
1388 CI->moveBefore(&*
F.getEntryBlock().getFirstInsertionPt());
1399 if (
CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1400 if (CI->getNumArgOperands() > 0 &&
1401 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1402 Value *Ident = getCombinedIdentFromCallUsesIn(RFI,
F,
1404 CI->setArgOperand(0, Ident);
1408 bool Changed =
false;
1409 auto ReplaceAndDeleteCB = [&](
Use &U,
Function &Caller) {
1410 CallInst *CI = getCallIfRegularCall(U, &RFI);
1411 if (!CI || CI == ReplVal || &
F != &Caller)
1416 return OR <<
"OpenMP runtime call " 1417 <<
ore::NV(
"OpenMPOptRuntime", RFI.Name) <<
" deduplicated";
1419 emitRemark<OptimizationRemark>(CI,
"OpenMPRuntimeDeduplicated",
Remark);
1424 ++NumOpenMPRuntimeCallsDeduplicated;
1428 RFI.foreachUse(SCC, ReplaceAndDeleteCB);
1442 if (!
F.hasLocalLinkage())
1444 for (
Use &U :
F.uses()) {
1445 if (
CallInst *CI = getCallIfRegularCall(U)) {
1446 Value *ArgOp = CI->getArgOperand(ArgNo);
1447 if (CI == &RefCI || GTIdArgs.
count(ArgOp) ||
1448 getCallIfRegularCall(
1449 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
1458 auto AddUserArgs = [&](
Value >Id) {
1459 for (
Use &U : GTId.uses())
1461 if (CI->isArgOperand(&U))
1468 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
1469 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
1471 GlobThreadNumRFI.foreachUse(SCC, [&](
Use &U,
Function &
F) {
1472 if (
CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
1480 for (
unsigned u = 0; u < GTIdArgs.
size(); ++u)
1481 AddUserArgs(*GTIdArgs[u]);
1499 return getUniqueKernelFor(*
I.getFunction());
1504 bool rewriteDeviceCodeStateMachine();
1520 template <
typename RemarkKind,
1521 typename RemarkCallBack =
function_ref<RemarkKind(RemarkKind &&)>>
1523 RemarkCallBack &&RemarkCB)
const {
1525 auto &ORE = OREGetter(
F);
1528 [&]() {
return RemarkCB(RemarkKind(
DEBUG_TYPE, RemarkName, Inst)); });
1537 auto &ORE = OREGetter(
F);
1555 OptimizationRemarkGetter OREGetter;
1558 OMPInformationCache &OMPInfoCache;
1564 bool runAttributor() {
1573 <<
" functions, result: " << Changed <<
".\n");
1580 void registerAAs() {
1585 for (
int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
1586 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
1588 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
1591 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
1595 auto &CB = cast<CallBase>(*CI);
1598 A.getOrCreateAAFor<AAICVTracker>(CBPos);
1602 GetterRFI.foreachUse(SCC, CreateAA);
1608 if (!OMPInfoCache.ModuleSlice.count(&
F))
1615 return *CachedKernel;
1622 return *CachedKernel;
1625 CachedKernel =
nullptr;
1626 if (!
F.hasLocalLinkage()) {
1630 return OR <<
"[OMP100] Potentially unknown OpenMP target region caller";
1632 emitRemarkOnFunction(&
F,
"OMP100",
Remark);
1638 auto GetUniqueKernelForUse = [&](
const Use &U) ->
Kernel {
1639 if (
auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
1641 if (Cmp->isEquality())
1642 return getUniqueKernelFor(*Cmp);
1645 if (
auto *CB = dyn_cast<CallBase>(U.getUser())) {
1647 if (CB->isCallee(&U))
1648 return getUniqueKernelFor(*CB);
1651 if (
Callee->getName() ==
"__kmpc_kernel_prepare_parallel")
1652 return getUniqueKernelFor(*CB);
1661 OMPInformationCache::foreachUse(
F, [&](
const Use &U) {
1662 PotentialKernels.
insert(GetUniqueKernelForUse(U));
1666 if (PotentialKernels.
size() == 1)
1667 K = *PotentialKernels.
begin();
1670 UniqueKernelMap[&
F] = K;
1675 bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1676 OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
1677 OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
1679 bool Changed =
false;
1680 if (!KernelPrepareParallelRFI)
1687 bool UnknownUse =
false;
1688 bool KernelPrepareUse =
false;
1689 unsigned NumDirectCalls = 0;
1692 OMPInformationCache::foreachUse(*
F, [&](
Use &U) {
1693 if (
auto *CB = dyn_cast<CallBase>(U.
getUser()))
1694 if (CB->isCallee(&U)) {
1699 if (isa<ICmpInst>(U.
getUser())) {
1700 ToBeReplacedStateMachineUses.
push_back(&U);
1703 if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall(
1704 *U.
getUser(), &KernelPrepareParallelRFI)) {
1705 KernelPrepareUse =
true;
1706 ToBeReplacedStateMachineUses.
push_back(&U);
1714 if (!KernelPrepareUse)
1719 return OR <<
"Found a parallel region that is called in a target " 1720 "region but not part of a combined target construct nor " 1721 "nesed inside a target construct without intermediate " 1722 "code. This can lead to excessive register usage for " 1723 "unrelated target regions in the same translation unit " 1724 "due to spurious call edges assumed by ptxas.";
1726 emitRemarkOnFunction(
F,
"OpenMPParallelRegionInNonSPMD",
Remark);
1732 if (UnknownUse || NumDirectCalls != 1 ||
1733 ToBeReplacedStateMachineUses.
size() != 2) {
1736 return OR <<
"Parallel region is used in " 1737 << (UnknownUse ?
"unknown" :
"unexpected")
1738 <<
" ways; will not attempt to rewrite the state machine.";
1740 emitRemarkOnFunction(
F,
"OpenMPParallelRegionInNonSPMD",
Remark);
1747 Kernel K = getUniqueKernelFor(*
F);
1751 return OR <<
"Parallel region is not known to be called from a " 1752 "unique single target region, maybe the surrounding " 1753 "function has external linkage?; will not attempt to " 1754 "rewrite the state machine use.";
1756 emitRemarkOnFunction(
F,
"OpenMPParallelRegionInMultipleKernesl",
1769 return OR <<
"Specialize parallel region that is only reached from a " 1770 "single target region to avoid spurious call edges and " 1771 "excessive register usage in other target regions. " 1772 "(parallel region ID: " 1773 <<
ore::NV(
"OpenMPParallelRegion",
F->getName())
1777 emitRemarkOnFunction(
F,
"OpenMPParallelRegionInNonSPMD",
1778 RemarkParalleRegion);
1780 return OR <<
"Target region containing the parallel region that is " 1781 "specialized. (parallel region ID: " 1782 <<
ore::NV(
"OpenMPParallelRegion",
F->getName())
1786 emitRemarkOnFunction(K,
"OpenMPParallelRegionInNonSPMD", RemarkKernel);
1796 for (
Use *U : ToBeReplacedStateMachineUses)
1799 ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
1808 struct AAICVTracker :
public StateWrapper<BooleanState, AbstractAttribute> {
1814 if (!
F || !A.isFunctionIPOAmendable(*
F))
1815 indicatePessimisticFixpoint();
1819 bool isAssumedTracked()
const {
return getAssumed(); }
1822 bool isKnownTracked()
const {
return getAssumed(); }
1845 const std::string
getName()
const override {
return "AAICVTracker"; }
1848 const char *getIdAddr()
const override {
return &
ID; }
1855 static const char ID;
1858 struct AAICVTrackerFunction :
public AAICVTracker {
1860 : AAICVTracker(IRP, A) {}
1863 const std::string getAsStr()
const override {
return "ICVTrackerFunction"; }
1866 void trackStatistics()
const override {}
1875 InternalControlVar::ICV___last>
1876 ICVReplacementValuesMap;
1883 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1886 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1888 auto &ValuesMap = ICVReplacementValuesMap[ICV];
1890 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
1896 if (ValuesMap.insert(std::make_pair(CI, CI->
getArgOperand(0))).second)
1905 ValuesMap.insert(std::make_pair(&
I, *ReplVal)).second)
1912 SetterRFI.foreachUse(TrackValues,
F);
1921 ValuesMap.insert(std::make_pair(Entry,
nullptr));
1932 const auto *CB = dyn_cast<CallBase>(
I);
1933 if (!CB || CB->hasFnAttr(
"no_openmp") ||
1934 CB->hasFnAttr(
"no_openmp_routines"))
1937 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1938 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
1939 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1940 Function *CalledFunction = CB->getCalledFunction();
1943 if (CalledFunction ==
nullptr)
1945 if (CalledFunction == GetterRFI.Declaration)
1947 if (CalledFunction == SetterRFI.Declaration) {
1948 if (ICVReplacementValuesMap[ICV].
count(
I))
1949 return ICVReplacementValuesMap[ICV].lookup(
I);
1958 const auto &ICVTrackingAA =
1961 if (ICVTrackingAA.isAssumedTracked())
1962 return ICVTrackingAA.getUniqueReplacementValue(ICV);
1978 const auto &ValuesMap = ICVReplacementValuesMap[ICV];
1979 if (ValuesMap.count(
I))
1980 return ValuesMap.lookup(
I);
1988 while (!Worklist.
empty()) {
1990 if (!Visited.
insert(CurrInst).second)
1998 if (ValuesMap.count(CurrInst)) {
2002 ReplVal = NewReplVal;
2008 if (ReplVal != NewReplVal)
2020 ReplVal = NewReplVal;
2026 if (ReplVal != NewReplVal)
2031 if (CurrBB ==
I->getParent() && ReplVal.
hasValue())
2044 struct AAICVTrackerFunctionReturned : AAICVTracker {
2046 : AAICVTracker(IRP, A) {}
2049 const std::string getAsStr()
const override {
2050 return "ICVTrackerFunctionReturned";
2054 void trackStatistics()
const override {}
2063 InternalControlVar::ICV___last>
2064 ICVReplacementValuesMap;
2069 return ICVReplacementValuesMap[ICV];
2074 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2077 if (!ICVTrackingAA.isAssumedTracked())
2078 return indicatePessimisticFixpoint();
2086 ICVTrackingAA.getReplacementValue(ICV, &
I, A);
2089 if (UniqueICVValue.
hasValue() && UniqueICVValue != NewReplVal)
2092 UniqueICVValue = NewReplVal;
2097 if (!A.checkForAllInstructions(CheckReturnInst, *
this, {Instruction::Ret},
2099 UniqueICVValue =
nullptr;
2101 if (UniqueICVValue == ReplVal)
2104 ReplVal = UniqueICVValue;
2112 struct AAICVTrackerCallSite : AAICVTracker {
2114 : AAICVTracker(IRP, A) {}
2118 if (!
F || !A.isFunctionIPOAmendable(*
F))
2119 indicatePessimisticFixpoint();
2123 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2125 auto ICVInfo = OMPInfoCache.ICVs[ICV];
2126 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
2127 if (Getter.Declaration == getAssociatedFunction()) {
2128 AssociatedICV = ICVInfo.Kind;
2134 indicatePessimisticFixpoint();
2138 if (!ReplVal.hasValue() || !ReplVal.getValue())
2141 A.changeValueAfterManifest(*getCtxI(), **ReplVal);
2142 A.deleteAfterManifest(*getCtxI());
2148 const std::string getAsStr()
const override {
return "ICVTrackerCallSite"; }
2151 void trackStatistics()
const override {}
2157 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2161 if (!ICVTrackingAA.isAssumedTracked())
2162 return indicatePessimisticFixpoint();
2165 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
2167 if (ReplVal == NewReplVal)
2170 ReplVal = NewReplVal;
2182 struct AAICVTrackerCallSiteReturned : AAICVTracker {
2184 : AAICVTracker(IRP, A) {}
2187 const std::string getAsStr()
const override {
2188 return "ICVTrackerCallSiteReturned";
2192 void trackStatistics()
const override {}
2201 InternalControlVar::ICV___last>
2202 ICVReplacementValuesMap;
2208 return ICVReplacementValuesMap[ICV];
2213 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2217 if (!ICVTrackingAA.isAssumedTracked())
2218 return indicatePessimisticFixpoint();
2223 ICVTrackingAA.getUniqueReplacementValue(ICV);
2225 if (ReplVal == NewReplVal)
2228 ReplVal = NewReplVal;
2238 AAICVTracker &AAICVTracker::createForPosition(
const IRPosition &IRP,
2240 AAICVTracker *AA =
nullptr;
2248 AA =
new (
A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
2251 AA =
new (
A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
2254 AA =
new (
A.Allocator) AAICVTrackerCallSite(IRP, A);
2257 AA =
new (
A.Allocator) AAICVTrackerFunction(IRP, A);
2275 bool SCCIsInteresting = !OMPInModule.getKernels().
empty();
2282 if (SCCIsInteresting)
2285 SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
2288 if (!SCCIsInteresting || SCC.
empty())
2305 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
Allocator,
2306 Functions, OMPInModule.getKernels());
2308 Attributor A(Functions, InfoCache, CGUpdater);
2310 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2311 bool Changed = OMPOpt.run();
2333 bool doInitialization(
CallGraph &CG)
override {
2347 bool SCCIsInteresting = !OMPInModule.
getKernels().empty();
2356 if (SCCIsInteresting)
2362 if (!SCCIsInteresting || SCC.
empty())
2365 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
2371 std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[
F];
2373 ORE = std::make_unique<OptimizationRemarkEmitter>(
F);
2380 OMPInformationCache InfoCache(
2381 *(Functions.back()->getParent()), AG,
Allocator,
2386 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2387 return OMPOpt.run();
2397 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
2402 if (
Op->getNumOperands() < 2)
2404 MDString *KindID = dyn_cast<MDString>(
Op->getOperand(1));
2405 if (!KindID || KindID->
getString() !=
"kernel")
2409 mdconst::dyn_extract_or_null<Function>(
Op->getOperand(0));
2413 ++NumOpenMPTargetRegionKernels;
2415 Kernels.insert(KernelFn);
2423 auto RecordFunctionsContainingUsesOf = [&](
Function *
F) {
2424 for (
User *U :
F->users())
2425 if (
auto *
I = dyn_cast<Instruction>(U))
2426 OMPInModule.FuncsWithOMPRuntimeCalls.insert(
I->getFunction());
2432 #define OMP_RTL(_Enum, _Name, ...) \ 2433 if (Function *F = M.getFunction(_Name)) { \ 2434 RecordFunctionsContainingUsesOf(F); \ 2435 OMPInModule = true; \ 2437 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2443 if (OMPInModule.
isKnown() && OMPInModule) {
2448 return OMPInModule =
false;
2454 "OpenMP specific optimizations",
false,
false)
Pass interface - Implemented by all 'passes'.
const Function & getFunction() const
bool containsOMPRuntimeCalls(Function *F) const
Does this function F contain any OpenMP runtime calls?
bool hasOperandBundles() const
Return true if this User has any operand bundles.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it.
void initializeOpenMPOptLegacyPassPass(PassRegistry &)
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An attribute for a function argument.
This class represents an incoming formal argument to a Function.
An attribute for the function return value.
INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", "OpenMP specific optimizations", false, false) INITIALIZE_PASS_END(OpenMPOptLegacyPass
DiagnosticInfoOptimizationBase::Argument NV
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
SmallPtrSetImpl< Kernel > & getKernels()
Return the known kernels (=GPU entry points) in the module.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
void registerOutlinedFunction(Function &OriginalFn, Function &NewFn)
If a new function was created by outlining, this method can be called to update the call graph for th...
A Module instance is used to store all the information related to an LLVM module.
LLVM_NODISCARD bool empty() const
Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static cl::opt< bool > DisableOpenMPOptimizations("openmp-opt-disable", cl::ZeroOrMore, cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false))
void push_back(const T &Elt)
This class represents a function call, abstracting a target machine's calling convention.
An efficient, type-erasing, non-owning reference to a callable.
Like Internal, but omit from symbol table.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
An instruction for reading from memory.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule)
Helper to determine if M contains OpenMP (runtime calls).
Helper to tie a abstract state implementation to an abstract attribute.
OpenMP specific optimizations
Wrapper for FunctoinAnalysisManager.
print alias Alias Set Printer
A proxy from a FunctionAnalysisManager to an SCC.
A node in the call graph for a module.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Module & getModule() const
Returns the module the call graph corresponds to.
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
iterator begin()
Instruction iterator methods.
Value * getArgOperand(unsigned i) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void removeCallSite(CallBase &CS)
Remove the call site CS from the call graph.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
A Use represents the edge between a Value definition and its users.
static cl::opt< bool > EnableParallelRegionMerging("openmp-opt-enable-merging", cl::ZeroOrMore, cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static StringRef getName(Value *V)
void identifyKernels(Module &M)
Identify kernels in the module and populate the Kernels set.
void assign(size_type NumElts, const T &Elt)
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
Class to represent function types.
Type * getType() const
All values are typed, get the type of this value.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A lazily constructed view of the call graph of a module.
This file provides interfaces used to manipulate a call graph, regardless if it is a "old style" Call...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
unsigned getOperandNo() const
Return the operand # of this use in its User.
iterator_range< op_iterator > operands()
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
A position that is not associated with a spot suitable for attributes.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Helper to remember if the module contains OpenMP (runtime calls), to be used foremost with containsOp...
StringRef getString() const
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
initializer< Ty > init(const Ty &Val)
A set of analyses that are preserved following a run of a transformation pass.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
The ModulePass which wraps up a CallGraph and the logic to build it.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void reanalyzeFunction(Function &Fn)
After an CGSCC pass changes a function in ways that affect the call graph, this method can be called ...
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed.
Allocate memory in an ever growing pool, as if by bump-pointer.
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
LLVM_NODISCARD bool empty() const
const Instruction & front() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Represent the analysis usage information of a pass.
static const IRPosition returned(const Function &F)
Create a position describing the returned value of F.
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
The fixpoint analysis framework that orchestrates the attribute deduction.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
A node in the call graph.
Argument * getArg(unsigned i) const
self_iterator getIterator()
const Function * getFunction() const
Return the function this instruction belongs to.
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
An attribute for a call site return value.
This file defines constans and helpers used when dealing with OpenMP.
Helper to describe and deal with positions in the LLVM-IR.
Used in the streaming interface as the general argument type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Base struct for all "concrete attribute" deductions.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
An attribute for a function (scope).
Pass * createOpenMPOptLegacyPass()
createOpenMPOptLegacyPass - OpenMP specific optimizations.
User * getUser() const
Returns the User that contains this Use.
static cl::opt< bool > PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
A SetVector that performs no allocations if smaller than a certain size.
An attribute for a call site argument.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
This is the shared class of boolean and integer constants.
static cl::opt< bool > PrintOpenMPKernels("openmp-print-gpu-kernels", cl::init(false), cl::Hidden)
An interface to create LLVM-IR for OpenMP directives.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_NODISCARD T pop_back_val()
An attribute for a call site (function scope).
LLVM_READNONE bool isKernel(CallingConv::ID CC)
constexpr bool hasValue() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
pred_range predecessors(BasicBlock *BB)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
amdgpu Simplify well known AMD library false FunctionCallee Callee
constexpr char Kernels[]
Key for HSA::Metadata::mKernels.
InternalControlVar
IDs for all Internal Control Variables (ICVs).
The basic data container for the call graph of a Module of IR.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
StringRef getName() const
Return a constant reference to the value's name.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
const Function * getParent() const
Return the enclosing method, or null if none.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Value * getUnderlyingObject(Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const BasicBlock & front() const
A raw_ostream that writes to an std::string.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
An SCC of the call graph.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
A vector that has set insertion semantics.
static constexpr auto TAG
static const Function * getParent(const Value *V)
auto drop_begin(T &&RangeOrContainer, size_t N)
Return a range covering RangeOrContainer with the first N elements excluded.
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
static cl::opt< bool > HideMemoryTransferLatency("openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false))
void initialize(CallGraph &CG, CallGraphSCC &SCC)
Initializers for usage outside of a CGSCC pass, inside a CGSCC pass in the old and new pass manager (...
static IntegerType * getInt8Ty(LLVMContext &C)
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
static const IRPosition function(const Function &F)
Create a position describing the function scope of F.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Kind getPositionKind() const
Return the associated position kind.
const BasicBlock * getParent() const
an instruction to allocate memory on the stack
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.