20 #include "llvm/ADT/SmallPtrSet.h" 22 using namespace clang;
23 using namespace CodeGen;
29 OMPRTL_NVPTX__kmpc_kernel_init,
31 OMPRTL_NVPTX__kmpc_kernel_deinit,
34 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
36 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
40 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
43 OMPRTL_NVPTX__kmpc_kernel_parallel,
45 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
48 OMPRTL_NVPTX__kmpc_serialized_parallel,
51 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
54 OMPRTL_NVPTX__kmpc_shuffle_int32,
57 OMPRTL_NVPTX__kmpc_shuffle_int64,
63 OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
75 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
77 OMPRTL_NVPTX__kmpc_end_reduce_nowait,
79 OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
81 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
84 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
86 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
89 OMPRTL_NVPTX__kmpc_begin_sharing_variables,
91 OMPRTL_NVPTX__kmpc_end_sharing_variables,
93 OMPRTL_NVPTX__kmpc_get_shared_variables,
96 OMPRTL_NVPTX__kmpc_parallel_level,
98 OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
101 OMPRTL_NVPTX__kmpc_get_team_static_memory,
104 OMPRTL_NVPTX__kmpc_restore_team_static_memory,
109 OMPRTL__kmpc_barrier_simple_spmd,
114 llvm::FunctionCallee EnterCallee =
nullptr;
116 llvm::FunctionCallee ExitCallee =
nullptr;
119 llvm::BasicBlock *ContBlock =
nullptr;
122 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
124 llvm::FunctionCallee ExitCallee,
126 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
135 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
153 class ExecutionRuntimeModesRAII {
158 bool SavedRuntimeMode =
false;
159 bool *RuntimeMode =
nullptr;
164 : ExecMode(ExecMode) {
165 SavedExecMode = ExecMode;
170 bool &RuntimeMode,
bool FullRuntimeMode)
171 : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
172 SavedExecMode = ExecMode;
173 SavedRuntimeMode = RuntimeMode;
175 RuntimeMode = FullRuntimeMode;
177 ~ExecutionRuntimeModesRAII() {
178 ExecMode = SavedExecMode;
180 *RuntimeMode = SavedRuntimeMode;
193 LaneIDMask = WarpSize - 1,
196 GlobalMemoryAlignment = 128,
199 SharedMemorySize = 128,
204 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
205 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
206 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
209 }
else if (
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
210 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
211 while (
const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
213 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
218 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
219 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
220 const auto *ME = cast<MemberExpr>(RefExpr);
221 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
225 static RecordDecl *buildRecordForGlobalizedVars(
228 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
229 &MappedDeclsFields,
int BufSize) {
231 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
234 for (
const ValueDecl *D : EscapedDecls)
235 GlobalizedVars.emplace_back(
240 for (
const ValueDecl *D : EscapedDeclsForTeams)
242 llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
243 return L.first > R.first;
253 llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
254 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
255 for (
const auto &Pair : GlobalizedVars) {
256 const ValueDecl *VD = Pair.second;
264 if (SingleEscaped.count(VD)) {
278 llvm::APInt ArraySize(32, BufSize);
288 GlobalMemoryAlignment)));
289 Field->
addAttr(AlignedAttr::CreateImplicit(
290 C, AlignedAttr::GNU_aligned,
true,
295 GlobalizedRD->addDecl(Field);
296 MappedDeclsFields.try_emplace(VD, Field);
298 GlobalizedRD->completeDefinition();
303 class CheckVarsEscapingDeclContext final
306 llvm::SetVector<const ValueDecl *> EscapedDecls;
307 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
308 llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
310 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
311 bool AllEscaped =
false;
312 bool IsForCombinedParallelRegion =
false;
314 void markAsEscaped(
const ValueDecl *VD) {
316 if (!isa<VarDecl>(VD) ||
317 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
324 if (
auto *CSI = CGF.CapturedStmtInfo) {
325 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
328 if (!IsForCombinedParallelRegion) {
331 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
334 if (((
Attr->getCaptureKind() != OMPC_map) &&
336 static_cast<OpenMPClauseKind>(
Attr->getCaptureKind()))) ||
337 ((
Attr->getCaptureKind() == OMPC_map) &&
338 !FD->getType()->isAnyPointerType()))
341 if (!FD->getType()->isReferenceType()) {
343 "Parameter captured by value with variably modified type");
344 EscapedParameters.insert(VD);
345 }
else if (!IsForCombinedParallelRegion) {
350 if ((!CGF.CapturedStmtInfo ||
351 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
356 EscapedVariableLengthDecls.insert(VD);
358 EscapedDecls.insert(VD);
361 void VisitValueDecl(
const ValueDecl *VD) {
364 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
365 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
366 const bool SavedAllEscaped = AllEscaped;
368 Visit(VarD->getInit());
369 AllEscaped = SavedAllEscaped;
375 bool IsCombinedParallelRegion) {
379 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
380 const ValueDecl *VD = C.getCapturedVar();
381 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
382 if (IsCombinedParallelRegion) {
386 IsForCombinedParallelRegion =
false;
389 C->getClauseKind() == OMPC_reduction ||
390 C->getClauseKind() == OMPC_linear ||
391 C->getClauseKind() == OMPC_private)
394 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
395 Vars = PC->getVarRefs();
396 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(C))
397 Vars = PC->getVarRefs();
399 llvm_unreachable(
"Unexpected clause.");
400 for (
const auto *E : Vars) {
404 IsForCombinedParallelRegion =
true;
408 if (IsForCombinedParallelRegion)
413 if (isa<OMPCapturedExprDecl>(VD))
415 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
420 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
421 assert(!GlobalizedRD &&
422 "Record for globalized variables is built already.");
425 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
427 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
428 GlobalizedRD = ::buildRecordForGlobalizedVars(
429 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
430 MappedDeclsFields, WarpSize);
436 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
438 virtual ~CheckVarsEscapingDeclContext() =
default;
439 void VisitDeclStmt(
const DeclStmt *S) {
443 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
457 if (CaptureRegions.size() == 1 && CaptureRegions.back() ==
OMPD_unknown) {
458 VisitStmt(S->getCapturedStmt());
461 VisitOpenMPCapturedStmt(
463 CaptureRegions.back() == OMPD_parallel &&
471 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
472 const ValueDecl *VD = C.getCapturedVar();
474 if (isa<OMPCapturedExprDecl>(VD))
483 if (C.capturesVariable()) {
485 const ValueDecl *VD = C.getCapturedVar();
493 void VisitBlockExpr(
const BlockExpr *E) {
498 const VarDecl *VD = C.getVariable();
505 void VisitCallExpr(
const CallExpr *E) {
511 if (Arg->isLValue()) {
512 const bool SavedAllEscaped = AllEscaped;
515 AllEscaped = SavedAllEscaped;
528 if (isa<OMPCapturedExprDecl>(VD))
530 else if (
const auto *VarD = dyn_cast<VarDecl>(VD))
531 if (VarD->isInitCapture())
538 const bool SavedAllEscaped = AllEscaped;
541 AllEscaped = SavedAllEscaped;
550 const bool SavedAllEscaped = AllEscaped;
553 AllEscaped = SavedAllEscaped;
558 void VisitExpr(
const Expr *E) {
561 bool SavedAllEscaped = AllEscaped;
567 AllEscaped = SavedAllEscaped;
569 void VisitStmt(
const Stmt *S) {
579 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
581 buildRecordForGlobalizedVars(IsInTTDRegion);
587 assert(GlobalizedRD &&
588 "Record for globalized variables must be generated already.");
589 auto I = MappedDeclsFields.find(VD);
590 if (I == MappedDeclsFields.end())
592 return I->getSecond();
597 return EscapedDecls.getArrayRef();
602 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
603 return EscapedParameters;
609 return EscapedVariableLengthDecls.getArrayRef();
617 llvm::Intrinsic::getDeclaration(
618 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
625 llvm::Intrinsic::getDeclaration(
626 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
650 llvm::Intrinsic::getDeclaration(
651 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
652 "nvptx_num_threads");
661 bool IsInSPMDExecutionMode =
false) {
663 return IsInSPMDExecutionMode
683 return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
684 Bld.CreateNot(Mask),
"master_tid");
687 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
691 createWorkerFunction(CGM);
694 void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
702 WorkerFn->setDoesNotRecurse();
706 CGOpenMPRuntimeNVPTX::getExecutionMode()
const {
707 return CurrentExecutionMode;
724 if (
const auto *NestedDir =
725 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
731 if (DKind == OMPD_teams) {
732 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
737 if (
const auto *NND =
738 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
739 DKind = NND->getDirectiveKind();
745 case OMPD_target_teams:
747 case OMPD_target_simd:
748 case OMPD_target_parallel:
749 case OMPD_target_parallel_for:
750 case OMPD_target_parallel_for_simd:
751 case OMPD_target_teams_distribute:
752 case OMPD_target_teams_distribute_simd:
753 case OMPD_target_teams_distribute_parallel_for:
754 case OMPD_target_teams_distribute_parallel_for_simd:
757 case OMPD_parallel_for:
758 case OMPD_parallel_sections:
760 case OMPD_parallel_for_simd:
762 case OMPD_cancellation_point:
764 case OMPD_threadprivate:
780 case OMPD_target_data:
781 case OMPD_target_exit_data:
782 case OMPD_target_enter_data:
783 case OMPD_distribute:
784 case OMPD_distribute_simd:
785 case OMPD_distribute_parallel_for:
786 case OMPD_distribute_parallel_for_simd:
787 case OMPD_teams_distribute:
788 case OMPD_teams_distribute_simd:
789 case OMPD_teams_distribute_parallel_for:
790 case OMPD_teams_distribute_parallel_for_simd:
791 case OMPD_target_update:
792 case OMPD_declare_simd:
793 case OMPD_declare_target:
794 case OMPD_end_declare_target:
795 case OMPD_declare_reduction:
796 case OMPD_declare_mapper:
798 case OMPD_taskloop_simd:
801 llvm_unreachable(
"Unexpected directive.");
811 switch (DirectiveKind) {
813 case OMPD_target_teams:
815 case OMPD_target_parallel:
816 case OMPD_target_parallel_for:
817 case OMPD_target_parallel_for_simd:
818 case OMPD_target_teams_distribute_parallel_for:
819 case OMPD_target_teams_distribute_parallel_for_simd:
820 case OMPD_target_simd:
821 case OMPD_target_teams_distribute_simd:
823 case OMPD_target_teams_distribute:
827 case OMPD_parallel_for:
828 case OMPD_parallel_sections:
830 case OMPD_parallel_for_simd:
832 case OMPD_cancellation_point:
834 case OMPD_threadprivate:
850 case OMPD_target_data:
851 case OMPD_target_exit_data:
852 case OMPD_target_enter_data:
853 case OMPD_distribute:
854 case OMPD_distribute_simd:
855 case OMPD_distribute_parallel_for:
856 case OMPD_distribute_parallel_for_simd:
857 case OMPD_teams_distribute:
858 case OMPD_teams_distribute_simd:
859 case OMPD_teams_distribute_parallel_for:
860 case OMPD_teams_distribute_parallel_for_simd:
861 case OMPD_target_update:
862 case OMPD_declare_simd:
863 case OMPD_declare_target:
864 case OMPD_end_declare_target:
865 case OMPD_declare_reduction:
866 case OMPD_declare_mapper:
868 case OMPD_taskloop_simd:
874 "Unknown programming model for OpenMP directive on NVPTX target.");
882 "Expected loop-based directive.");
887 return C->getScheduleKind() == OMPC_SCHEDULE_static;
900 if (
const auto *NestedDir =
901 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
909 if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
911 if (DKind == OMPD_parallel) {
912 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
917 if (
const auto *NND =
918 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
919 DKind = NND->getDirectiveKind();
924 }
else if (DKind == OMPD_teams) {
925 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
930 if (
const auto *NND =
931 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
932 DKind = NND->getDirectiveKind();
937 if (DKind == OMPD_parallel) {
938 Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
943 if (
const auto *NND =
944 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
945 DKind = NND->getDirectiveKind();
954 case OMPD_target_teams:
959 if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
961 if (DKind == OMPD_parallel) {
962 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
967 if (
const auto *NND =
968 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
969 DKind = NND->getDirectiveKind();
976 case OMPD_target_parallel:
977 if (DKind == OMPD_simd)
981 case OMPD_target_teams_distribute:
982 case OMPD_target_simd:
983 case OMPD_target_parallel_for:
984 case OMPD_target_parallel_for_simd:
985 case OMPD_target_teams_distribute_simd:
986 case OMPD_target_teams_distribute_parallel_for:
987 case OMPD_target_teams_distribute_parallel_for_simd:
990 case OMPD_parallel_for:
991 case OMPD_parallel_sections:
993 case OMPD_parallel_for_simd:
995 case OMPD_cancellation_point:
997 case OMPD_threadprivate:
1006 case OMPD_taskyield:
1009 case OMPD_taskgroup:
1013 case OMPD_target_data:
1014 case OMPD_target_exit_data:
1015 case OMPD_target_enter_data:
1016 case OMPD_distribute:
1017 case OMPD_distribute_simd:
1018 case OMPD_distribute_parallel_for:
1019 case OMPD_distribute_parallel_for_simd:
1020 case OMPD_teams_distribute:
1021 case OMPD_teams_distribute_simd:
1022 case OMPD_teams_distribute_parallel_for:
1023 case OMPD_teams_distribute_parallel_for_simd:
1024 case OMPD_target_update:
1025 case OMPD_declare_simd:
1026 case OMPD_declare_target:
1027 case OMPD_end_declare_target:
1028 case OMPD_declare_reduction:
1029 case OMPD_declare_mapper:
1031 case OMPD_taskloop_simd:
1034 llvm_unreachable(
"Unexpected directive.");
1048 switch (DirectiveKind) {
1050 case OMPD_target_teams:
1051 case OMPD_target_parallel:
1053 case OMPD_target_parallel_for:
1054 case OMPD_target_parallel_for_simd:
1055 case OMPD_target_teams_distribute_parallel_for:
1056 case OMPD_target_teams_distribute_parallel_for_simd:
1059 case OMPD_target_simd:
1060 case OMPD_target_teams_distribute_simd:
1062 case OMPD_target_teams_distribute:
1066 case OMPD_parallel_for:
1067 case OMPD_parallel_sections:
1069 case OMPD_parallel_for_simd:
1071 case OMPD_cancellation_point:
1073 case OMPD_threadprivate:
1082 case OMPD_taskyield:
1085 case OMPD_taskgroup:
1089 case OMPD_target_data:
1090 case OMPD_target_exit_data:
1091 case OMPD_target_enter_data:
1092 case OMPD_distribute:
1093 case OMPD_distribute_simd:
1094 case OMPD_distribute_parallel_for:
1095 case OMPD_distribute_parallel_for_simd:
1096 case OMPD_teams_distribute:
1097 case OMPD_teams_distribute_simd:
1098 case OMPD_teams_distribute_parallel_for:
1099 case OMPD_teams_distribute_parallel_for_simd:
1100 case OMPD_target_update:
1101 case OMPD_declare_simd:
1102 case OMPD_declare_target:
1103 case OMPD_end_declare_target:
1104 case OMPD_declare_reduction:
1105 case OMPD_declare_mapper:
1107 case OMPD_taskloop_simd:
1113 "Unknown programming model for OpenMP directive on NVPTX target.");
1117 StringRef ParentName,
1118 llvm::Function *&OutlinedFn,
1119 llvm::Constant *&OutlinedFnID,
1120 bool IsOffloadEntry,
1122 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
1123 EntryFunctionState EST;
1126 WrapperFunctionsMap.clear();
1130 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1131 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
1134 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1135 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
1136 : EST(EST), WST(WST) {}
1140 RT.emitNonSPMDEntryHeader(CGF, EST, WST);
1142 RT.setLocThreadIdInsertPt(CGF,
true);
1148 RT.emitNonSPMDEntryFooter(CGF, EST);
1152 IsInTTDRegion =
true;
1154 GlobalizedRecords.emplace_back();
1155 if (!KernelStaticGlobalized) {
1156 KernelStaticGlobalized =
new llvm::GlobalVariable(
1159 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1160 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1161 llvm::GlobalValue::NotThreadLocal,
1164 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1165 IsOffloadEntry, CodeGen);
1166 IsInTTDRegion =
false;
1170 WST.WorkerFn->setName(Twine(OutlinedFn->getName(),
"_worker"));
1173 emitWorkerFunction(WST);
1177 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(
CodeGenFunction &CGF,
1178 EntryFunctionState &EST,
1179 WorkerFunctionState &WST) {
1189 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
1192 emitCall(CGF, WST.Loc, WST.WorkerFn);
1198 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
1201 IsInTargetMasterThreadRegion =
true;
1209 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
1213 createNVPTXRuntimeFunction(
1214 OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
1216 emitGenericVarsProlog(CGF, WST.Loc);
1219 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(
CodeGenFunction &CGF,
1220 EntryFunctionState &EST) {
1221 IsInTargetMasterThreadRegion =
false;
1225 emitGenericVarsEpilog(CGF);
1230 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".termination.notifier");
1238 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
1240 syncCTAThreads(CGF);
1245 EST.ExitBB =
nullptr;
1249 StringRef ParentName,
1250 llvm::Function *&OutlinedFn,
1251 llvm::Constant *&OutlinedFnID,
1252 bool IsOffloadEntry,
1254 ExecutionRuntimeModesRAII ModeRAII(
1255 CurrentExecutionMode, RequiresFullRuntime,
1258 EntryFunctionState EST;
1263 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
1268 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
1270 : RT(RT), EST(EST), D(D) {}
1272 RT.emitSPMDEntryHeader(CGF, EST, D);
1278 RT.emitSPMDEntryFooter(CGF, EST);
1280 } Action(*
this, EST, D);
1282 IsInTTDRegion =
true;
1284 GlobalizedRecords.emplace_back();
1285 if (!KernelStaticGlobalized) {
1286 KernelStaticGlobalized =
new llvm::GlobalVariable(
1289 llvm::ConstantPointerNull::get(CGM.
VoidPtrTy),
1290 "_openmp_kernel_static_glob_rd$ptr",
nullptr,
1291 llvm::GlobalValue::NotThreadLocal,
1294 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1295 IsOffloadEntry, CodeGen);
1296 IsInTTDRegion =
false;
1299 void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
1310 Bld.getInt16(RequiresFullRuntime ? 1 : 0),
1313 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
1315 if (RequiresFullRuntime) {
1318 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
1325 IsInTargetMasterThreadRegion =
true;
1329 EntryFunctionState &EST) {
1330 IsInTargetMasterThreadRegion =
false;
1343 CGF.
Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
1345 createNVPTXRuntimeFunction(
1346 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
1350 EST.ExitBB =
nullptr;
1363 llvm::GlobalValue::WeakAnyLinkage,
1364 llvm::ConstantInt::get(CGM.
Int8Ty, Mode ? 0 : 1),
1365 Twine(Name,
"_exec_mode"));
1369 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
1375 emitWorkerLoop(CGF, WST);
1380 WorkerFunctionState &WST) {
1393 llvm::BasicBlock *SelectWorkersBB = CGF.
createBasicBlock(
".select.workers");
1395 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".terminate.parallel");
1404 syncCTAThreads(CGF);
1417 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
1422 llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID,
"should_terminate");
1423 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
1428 Bld.CreateIsNotNull(Bld.
CreateLoad(ExecStatus),
"is_active");
1429 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
1434 setLocThreadIdInsertPt(CGF,
true);
1437 for (llvm::Function *W : Work) {
1442 Bld.CreateICmpEQ(Bld.
CreateLoad(WorkFn),
ID,
"work_match");
1446 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
1455 emitCall(CGF, WST.Loc, W,
1456 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1466 auto *ParallelFnTy =
1467 llvm::FunctionType::get(CGM.
VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
1475 emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
1476 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1483 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
1490 syncCTAThreads(CGF);
1496 clearLocThreadIdInsertPt(CGF);
1503 llvm::FunctionCallee
1505 llvm::FunctionCallee RTLFn =
nullptr;
1506 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
1507 case OMPRTL_NVPTX__kmpc_kernel_init: {
1512 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1516 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
1520 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1524 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
1529 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1533 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
1537 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1541 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
1546 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1550 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
1556 llvm::FunctionType::get(RetTy, TypeParams,
false);
1560 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
1567 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
1572 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1576 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
1581 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1585 case OMPRTL_NVPTX__kmpc_shuffle_int32: {
1590 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1594 case OMPRTL_NVPTX__kmpc_shuffle_int64: {
1599 llvm::FunctionType::get(CGM.
Int64Ty, TypeParams,
false);
1603 case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
1611 auto *ShuffleReduceFnTy =
1612 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1615 auto *InterWarpCopyFnTy =
1616 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1623 ShuffleReduceFnTy->getPointerTo(),
1624 InterWarpCopyFnTy->getPointerTo()};
1626 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1628 FnTy,
"__kmpc_nvptx_parallel_reduce_nowait_v2");
1631 case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
1635 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1637 FnTy,
"__kmpc_nvptx_end_reduce_nowait");
1640 case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
1654 auto *ShuffleReduceFnTy =
1655 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1658 auto *InterWarpCopyFnTy =
1659 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1663 auto *GlobalListFnTy =
1664 llvm::FunctionType::get(CGM.
VoidTy, GlobalListTypeParams,
1666 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1671 ShuffleReduceFnTy->getPointerTo(),
1672 InterWarpCopyFnTy->getPointerTo(),
1673 GlobalListFnTy->getPointerTo(),
1674 GlobalListFnTy->getPointerTo(),
1675 GlobalListFnTy->getPointerTo(),
1676 GlobalListFnTy->getPointerTo()};
1678 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1680 FnTy,
"__kmpc_nvptx_teams_reduce_nowait_v2");
1683 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
1690 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
1698 case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
1703 llvm::FunctionType::get(CGM.
VoidPtrTy, TypeParams,
false);
1705 FnTy,
"__kmpc_data_sharing_coalesced_push_stack");
1708 case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
1712 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1714 "__kmpc_data_sharing_pop_stack");
1717 case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
1722 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1726 case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
1733 case OMPRTL_NVPTX__kmpc_get_shared_variables: {
1737 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1741 case OMPRTL_NVPTX__kmpc_parallel_level: {
1745 llvm::FunctionType::get(CGM.
Int16Ty, TypeParams,
false);
1749 case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
1751 auto *FnTy = llvm::FunctionType::get(CGM.
Int8Ty,
false);
1755 case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
1761 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1765 case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
1770 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1779 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1781 cast<llvm::Function>(RTLFn.getCallee())
1782 ->addFnAttr(llvm::Attribute::Convergent);
1785 case OMPRTL__kmpc_barrier_simple_spmd: {
1790 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1793 cast<llvm::Function>(RTLFn.getCallee())
1794 ->addFnAttr(llvm::Attribute::Convergent);
1801 void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *
ID,
1802 llvm::Constant *Addr,
1803 uint64_t Size, int32_t,
1804 llvm::GlobalValue::LinkageTypes) {
1807 if (!isa<llvm::Function>(Addr))
1813 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
1815 llvm::Metadata *MDVals[] = {
1816 llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx,
"kernel"),
1817 llvm::ConstantAsMetadata::get(
1818 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
1820 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
1823 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
1825 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
1827 if (!IsOffloadEntry)
1830 assert(!ParentName.empty() &&
"Invalid target region parent name!");
1834 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1837 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1848 KMP_IDENT_SPMD_MODE = 0x01,
1850 KMP_IDENT_SIMPLE_RT_MODE = 0x02,
1851 LLVM_MARK_AS_BITMASK_ENUM(KMP_IDENT_SIMPLE_RT_MODE)
1856 (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
1860 switch (getExecutionMode()) {
1862 if (requiresFullRuntime())
1863 return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
1864 return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
1866 assert(requiresFullRuntime() &&
"Expected full runtime.");
1867 return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
1869 return UndefinedMode;
1871 llvm_unreachable(
"Unknown flags are requested.");
1877 llvm_unreachable(
"OpenMP NVPTX can only handle device code.");
1901 const Expr *NumTeams,
1902 const Expr *ThreadLimit,
1910 bool &IsInParallelRegion;
1911 bool PrevIsInParallelRegion;
1914 NVPTXPrePostActionTy(
bool &IsInParallelRegion)
1915 : IsInParallelRegion(IsInParallelRegion) {}
1917 PrevIsInParallelRegion = IsInParallelRegion;
1918 IsInParallelRegion =
true;
1921 IsInParallelRegion = PrevIsInParallelRegion;
1923 } Action(IsInParallelRegion);
1925 bool PrevIsInTTDRegion = IsInTTDRegion;
1926 IsInTTDRegion =
false;
1927 bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
1928 IsInTargetMasterThreadRegion =
false;
1931 D, ThreadIDVar, InnermostKind, CodeGen));
1933 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1934 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1935 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1937 IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
1938 IsInTTDRegion = PrevIsInTTDRegion;
1940 !IsInParallelRegion) {
1941 llvm::Function *WrapperFun =
1942 createParallelDataSharingWrapper(OutlinedFun, D);
1943 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
1955 "expected teams directive.");
1962 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
1970 for (
const Expr *E : C->getVarRefs())
1980 "expected teams directive.");
1982 for (
const Expr *E : C->privates())
1994 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
2000 if (!LastPrivatesReductions.empty()) {
2001 GlobalizedRD = ::buildRecordForGlobalizedVars(
2003 MappedDeclsFields, WarpSize);
2005 }
else if (!LastPrivatesReductions.empty()) {
2006 assert(!TeamAndReductions.first &&
2007 "Previous team declaration is not expected.");
2009 std::swap(TeamAndReductions.second, LastPrivatesReductions);
2016 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2020 NVPTXPrePostActionTy(
2022 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2024 : Loc(Loc), GlobalizedRD(GlobalizedRD),
2025 MappedDeclsFields(MappedDeclsFields) {}
2030 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
2031 I->getSecond().GlobalRecord = GlobalizedRD;
2032 I->getSecond().MappedParams =
2033 llvm::make_unique<CodeGenFunction::OMPMapVars>();
2034 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
2035 for (
const auto &Pair : MappedDeclsFields) {
2036 assert(Pair.getFirst()->isCanonicalDecl() &&
2037 "Expected canonical declaration");
2038 Data.insert(std::make_pair(Pair.getFirst(),
2039 MappedVarData(Pair.getSecond(),
2043 Rt.emitGenericVarsProlog(CGF, Loc);
2047 .emitGenericVarsEpilog(CGF);
2049 } Action(Loc, GlobalizedRD, MappedDeclsFields);
2052 D, ThreadIDVar, InnermostKind, CodeGen);
2054 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
2055 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
2056 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
2062 void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(
CodeGenFunction &CGF,
2064 bool WithSPMDCheck) {
2071 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2072 if (I == FunctionGlobalizedDecls.end())
2074 if (
const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
2082 unsigned Alignment =
2084 unsigned GlobalRecordSize =
2086 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2088 llvm::PointerType *GlobalRecPtrTy =
2092 if (!IsInTTDRegion &&
2098 if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
2104 IsTTD = Bld.CreateIsNull(PL);
2108 Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
2112 Address RecPtr =
Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
2119 if (
const RecordDecl *SecGlobalizedVarsRecord =
2120 I->getSecond().SecondaryGlobalRecord.getValueOr(
nullptr)) {
2128 unsigned Alignment =
2130 unsigned GlobalRecordSize =
2132 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
2133 Size = Bld.CreateSelect(
2134 IsTTD, llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize), Size);
2139 Size, CGF.
Builder.getInt16(0)};
2142 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2143 GlobalRecordSizeArg);
2145 GlobalRecValue, GlobalRecPtrTy);
2147 auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
2148 2,
"_select_stack");
2149 Phi->addIncoming(RecPtr.
getPointer(), SPMDBB);
2150 Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
2151 GlobalRecCastAddr = Phi;
2152 I->getSecond().GlobalRecordAddr = Phi;
2153 I->getSecond().IsInSPMDModeFlag = IsSPMD;
2154 }
else if (IsInTTDRegion) {
2155 assert(GlobalizedRecords.back().Records.size() < 2 &&
2156 "Expected less than 2 globalized records: one for target and one " 2159 for (
const RecordDecl *RD : GlobalizedRecords.back().Records) {
2161 unsigned Alignment =
2165 llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
2167 unsigned Alignment =
2169 Offset = llvm::alignTo(Offset, Alignment);
2170 GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
2171 ++GlobalizedRecords.back().RegionCounter;
2172 if (GlobalizedRecords.back().Records.size() == 1) {
2173 assert(KernelStaticGlobalized &&
2174 "Kernel static pointer must be initialized already.");
2175 auto *UseSharedMemory =
new llvm::GlobalVariable(
2178 "_openmp_static_kernel$is_shared");
2179 UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2185 false, Int16Ty, Loc);
2186 auto *StaticGlobalized =
new llvm::GlobalVariable(
2188 llvm::GlobalValue::CommonLinkage,
nullptr);
2189 auto *RecSize =
new llvm::GlobalVariable(
2192 "_openmp_static_kernel$size");
2193 RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2200 llvm::ConstantInt::get(
2203 StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
2205 OMPRTL_NVPTX__kmpc_get_team_static_memory),
2206 GlobalRecordSizeArg);
2207 GlobalizedRecords.back().Buffer = StaticGlobalized;
2208 GlobalizedRecords.back().RecSize = RecSize;
2209 GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
2210 GlobalizedRecords.back().Loc = Loc;
2212 assert(KernelStaticGlobalized &&
"Global address must be set already.");
2217 .castAs<PointerType>());
2220 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2221 I->getSecond().IsInSPMDModeFlag =
nullptr;
2228 llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize),
2232 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2233 GlobalRecordSizeArg);
2235 GlobalRecValue, GlobalRecPtrTy);
2236 I->getSecond().GlobalRecordAddr = GlobalRecValue;
2237 I->getSecond().IsInSPMDModeFlag =
nullptr;
2245 decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
2247 SecIt = I->getSecond().SecondaryLocalVarData->begin();
2248 llvm::PointerType *SecGlobalRecPtrTy =
2252 I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
2255 for (
auto &Rec : I->getSecond().LocalVarData) {
2256 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
2259 const auto *VD = cast<VarDecl>(Rec.first);
2267 if (Rec.second.IsOnePerTeam) {
2268 VarTy = Rec.second.FD->getType();
2279 Rec.second.PrivateAddr = VarAddr.
getAddress();
2280 if (!IsInTTDRegion &&
2283 assert(I->getSecond().IsInSPMDModeFlag &&
2284 "Expected unknown execution mode or required SPMD check.");
2286 assert(SecIt->second.IsOnePerTeam &&
2287 "Secondary glob data must be one per team.");
2293 Rec.second.PrivateAddr = VarAddr.
getAddress();
2295 Address GlobalPtr = Rec.second.PrivateAddr;
2297 Rec.second.PrivateAddr =
Address(
2298 Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
2303 const auto *VD = cast<VarDecl>(Rec.first);
2305 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress());
2311 for (
const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
2319 Size = Bld.CreateNUWAdd(
2323 Size = Bld.CreateUDiv(Size, AlignVal);
2324 Size = Bld.CreateNUWMul(Size, AlignVal);
2328 Size, CGF.
Builder.getInt16(0)};
2331 OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
2332 GlobalRecordSizeArg);
2338 I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
2340 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
2342 I->getSecond().MappedParams->apply(CGF);
2345 void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(
CodeGenFunction &CGF,
2346 bool WithSPMDCheck) {
2351 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2352 if (I != FunctionGlobalizedDecls.end()) {
2353 I->getSecond().MappedParams->restore(CGF);
2357 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
2362 if (I->getSecond().GlobalRecordAddr) {
2363 if (!IsInTTDRegion &&
2369 Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
2375 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2378 }
else if (IsInTTDRegion) {
2379 assert(GlobalizedRecords.back().RegionCounter > 0 &&
2380 "region counter must be > 0.");
2381 --GlobalizedRecords.back().RegionCounter;
2383 if (GlobalizedRecords.back().RegionCounter == 0) {
2387 Address(GlobalizedRecords.back().UseSharedMemory,
2389 false, Int16Ty, GlobalizedRecords.back().Loc);
2391 llvm::ConstantInt::get(
2397 OMPRTL_NVPTX__kmpc_restore_team_static_memory),
2402 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
2403 I->getSecond().GlobalRecordAddr);
2412 llvm::Function *OutlinedFn,
2423 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
2424 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2435 emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2437 emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
2440 void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
2443 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
2453 Address ThreadIDAddr = ZeroAddr;
2454 auto &&CodeGen = [
this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
2459 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2460 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2461 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2472 NVPTXActionTy Action(
2484 llvm::Function *WFn = WrapperFunctionsMap[Fn];
2485 assert(WFn &&
"Wrapper function does not exist!");
2490 CGF.EmitRuntimeCall(
2499 if (!CapturedVars.empty()) {
2502 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
"shared_arg_refs");
2507 llvm::ConstantInt::get(CGM.
SizeTy, CapturedVars.size())};
2509 OMPRTL_NVPTX__kmpc_begin_sharing_variables),
2515 Address SharedArgListAddress = CGF.EmitLoadOfPointer(
2517 .castAs<PointerType>());
2521 if (
V->getType()->isIntegerTy())
2522 PtrV = Bld.CreateIntToPtr(
V, CGF.VoidPtrTy);
2525 CGF.EmitStoreOfScalar(PtrV, Dst,
false,
2533 syncCTAThreads(CGF);
2541 syncCTAThreads(CGF);
2543 if (!CapturedVars.empty())
2544 CGF.EmitRuntimeCall(
2548 Work.emplace_back(WFn);
2551 auto &&LNParallelGen = [
this, Loc, &SeqGen, &L0ParallelGen](
2553 if (IsInParallelRegion) {
2554 SeqGen(CGF, Action);
2555 }
else if (IsInTargetMasterThreadRegion) {
2556 L0ParallelGen(CGF, Action);
2565 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(
".exit");
2566 llvm::BasicBlock *SeqBB = CGF.createBasicBlock(
".sequential");
2567 llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(
".parcheck");
2568 llvm::BasicBlock *MasterBB = CGF.createBasicBlock(
".master");
2569 llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
2571 Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
2574 CGF.EmitBlock(ParallelCheckBB);
2581 Bld.CreateCondBr(Res, SeqBB, MasterBB);
2582 CGF.EmitBlock(SeqBB);
2583 SeqGen(CGF, Action);
2584 CGF.EmitBranch(ExitBB);
2587 CGF.EmitBlock(MasterBB);
2588 L0ParallelGen(CGF, Action);
2589 CGF.EmitBranch(ExitBB);
2593 CGF.EmitBlock(ExitBB,
true);
2606 void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
2619 Address ThreadIDAddr = ZeroAddr;
2620 auto &&CodeGen = [
this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
2626 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
2627 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2628 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2639 NVPTXActionTy Action(
2648 if (IsInTargetMasterThreadRegion) {
2669 llvm::ConstantPointerNull::get(
2671 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
2718 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
2725 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
2726 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
2757 "Cast type must sized.");
2759 "Val type must sized.");
2761 if (ValTy == CastTy)
2767 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
2790 "Unsupported bitwidth in shuffle instruction.");
2793 ? OMPRTL_NVPTX__kmpc_shuffle_int32
2794 : OMPRTL_NVPTX__kmpc_shuffle_int64;
2829 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
2843 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
2845 llvm::PHINode *PhiSrc =
2846 Bld.CreatePHI(Ptr.
getType(), 2);
2847 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
2848 llvm::PHINode *PhiDest =
2849 Bld.CreatePHI(ElemPtr.
getType(), 2);
2850 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
2856 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
2865 PhiSrc->addIncoming(LocalPtr.
getPointer(), ThenBB);
2866 PhiDest->addIncoming(LocalElemPtr.
getPointer(), ThenBB);
2877 Size = Size % IntSize;
2913 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2914 llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
2915 llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
2920 unsigned Size = Privates.size();
2921 for (
const Expr *Private : Privates) {
2926 bool ShuffleInElement =
false;
2929 bool UpdateDestListPtr =
false;
2932 bool IncrScratchpadSrc =
false;
2933 bool IncrScratchpadDest =
false;
2936 case RemoteLaneToThread: {
2947 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
2948 ShuffleInElement =
true;
2949 UpdateDestListPtr =
true;
2967 case ThreadToScratchpad: {
2978 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
2980 Bld.CreateNUWAdd(DestBase.
getPointer(), CurrentOffset);
2981 ScratchPadElemAbsolutePtrVal =
2982 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
2983 DestElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
2985 IncrScratchpadDest =
true;
2988 case ScratchpadToThread: {
2993 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
2995 Bld.CreateNUWAdd(SrcBase.
getPointer(), CurrentOffset);
2996 ScratchPadElemAbsolutePtrVal =
2997 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
2998 SrcElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
3000 IncrScratchpadSrc =
true;
3006 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
3007 UpdateDestListPtr =
true;
3021 if (ShuffleInElement) {
3023 RemoteLaneOffset, Private->getExprLoc());
3029 Private->
getType(), Private->getExprLoc());
3038 Private->getExprLoc());
3058 if (UpdateDestListPtr) {
3061 DestElementPtrAddr,
false,
3068 if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
3072 ScratchpadBasePtr = Bld.CreateNUWAdd(
3074 Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
3077 ScratchpadBasePtr = Bld.CreateNUWSub(
3078 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3079 ScratchpadBasePtr = Bld.CreateUDiv(
3081 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3082 ScratchpadBasePtr = Bld.CreateNUWAdd(
3083 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
3084 ScratchpadBasePtr = Bld.CreateNUWMul(
3086 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
3088 if (IncrScratchpadDest)
3128 Args.push_back(&ReduceListArg);
3129 Args.push_back(&NumWarpsArg);
3135 "_omp_reduction_inter_warp_copy_func", &M);
3137 Fn->setDoesNotRecurse();
3150 StringRef TransferMediumName =
3151 "__openmp_nvptx_data_transfer_temporary_storage";
3152 llvm::GlobalVariable *TransferMedium =
3153 M.getGlobalVariable(TransferMediumName);
3154 if (!TransferMedium) {
3155 auto *Ty = llvm::ArrayType::get(CGM.
Int32Ty, WarpSize);
3157 TransferMedium =
new llvm::GlobalVariable(
3158 M, Ty,
false, llvm::GlobalVariable::CommonLinkage,
3159 llvm::Constant::getNullValue(Ty), TransferMediumName,
3160 nullptr, llvm::GlobalVariable::NotThreadLocal,
3161 SharedAddressSpace);
3181 for (
const Expr *Private : Privates) {
3186 unsigned RealTySize =
3190 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
3191 unsigned NumIters = RealTySize / TySize;
3200 llvm::BasicBlock *PrecondBB =
nullptr;
3201 llvm::BasicBlock *ExitBB =
nullptr;
3214 Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.
IntTy, NumIters));
3215 Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
3227 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
3228 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3245 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
3246 TransferMedium, {llvm::Constant::getNullValue(CGM.
Int64Ty), WarpID});
3247 Address MediumPtr(MediumPtrVal, Align);
3259 Bld.CreateBr(MergeBB);
3262 Bld.CreateBr(MergeBB);
3280 AddrNumWarpsArg,
false, C.
IntTy, Loc);
3284 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
3285 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3290 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
3292 {llvm::Constant::getNullValue(CGM.
Int64Ty), ThreadID});
3293 Address SrcMediumPtr(SrcMediumPtrVal, Align);
3300 TargetElemPtrPtr,
false, C.
VoidPtrTy, Loc);
3313 Bld.CreateBr(W0MergeBB);
3316 Bld.CreateBr(W0MergeBB);
3321 Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.
IntTy, 1));
3327 RealTySize %= TySize;
3420 Args.push_back(&ReduceListArg);
3421 Args.push_back(&LaneIDArg);
3422 Args.push_back(&RemoteLaneOffsetArg);
3423 Args.push_back(&AlgoVerArg);
3429 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
3431 Fn->setDoesNotRecurse();
3433 Fn->removeFnAttr(llvm::Attribute::NoInline);
3434 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
3435 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
3466 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
3472 LocalReduceList, RemoteReduceList,
3473 {RemoteLaneOffsetArgVal,
3498 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
3500 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3502 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
3504 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
3506 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
3507 CondAlgo2 = Bld.CreateAnd(
3508 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
3510 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
3511 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
3516 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
3521 LocalReduceList.getPointer(), CGF.
VoidPtrTy);
3525 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
3526 Bld.CreateBr(MergeBB);
3529 Bld.CreateBr(MergeBB);
3535 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3537 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
3542 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3546 RemoteReduceList, LocalReduceList);
3547 Bld.CreateBr(CpyMergeBB);
3550 Bld.CreateBr(CpyMergeBB);
3568 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3582 Args.push_back(&BufferArg);
3583 Args.push_back(&IdxArg);
3584 Args.push_back(&ReduceListArg);
3590 "_omp_reduction_list_to_global_copy_func", &CGM.
getModule());
3592 Fn->setDoesNotRecurse();
3611 LLVMReductionsBufferTy->getPointerTo());
3617 for (
const Expr *Private : Privates) {
3627 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
3629 const FieldDecl *FD = VarFieldMap.lookup(VD);
3673 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3675 llvm::Function *ReduceFn) {
3688 Args.push_back(&BufferArg);
3689 Args.push_back(&IdxArg);
3690 Args.push_back(&ReduceListArg);
3696 "_omp_reduction_list_to_global_reduce_func", &CGM.
getModule());
3698 Fn->setDoesNotRecurse();
3710 LLVMReductionsBufferTy->getPointerTo());
3715 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3716 auto IPriv = Privates.begin();
3722 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
3725 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
3726 const FieldDecl *FD = VarFieldMap.lookup(VD);
3732 if ((*IPriv)->getType()->isVariablyModifiedType()) {
3751 AddrReduceListArg,
false, C.
VoidPtrTy, Loc);
3753 CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
3768 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3782 Args.push_back(&BufferArg);
3783 Args.push_back(&IdxArg);
3784 Args.push_back(&ReduceListArg);
3790 "_omp_reduction_global_to_list_copy_func", &CGM.
getModule());
3792 Fn->setDoesNotRecurse();
3811 LLVMReductionsBufferTy->getPointerTo());
3818 for (
const Expr *Private : Privates) {
3828 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
3830 const FieldDecl *FD = VarFieldMap.lookup(VD);
3873 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
3875 llvm::Function *ReduceFn) {
3888 Args.push_back(&BufferArg);
3889 Args.push_back(&IdxArg);
3890 Args.push_back(&ReduceListArg);
3896 "_omp_reduction_global_to_list_reduce_func", &CGM.
getModule());
3898 Fn->setDoesNotRecurse();
3910 LLVMReductionsBufferTy->getPointerTo());
3915 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3916 auto IPriv = Privates.begin();
3922 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
3925 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
3926 const FieldDecl *FD = VarFieldMap.lookup(VD);
3932 if ((*IPriv)->getType()->isVariablyModifiedType()) {
3951 AddrReduceListArg,
false, C.
VoidPtrTy, Loc);
3953 CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
4213 assert(!TeamsReduction && !ParallelReduction &&
4214 "Invalid reduction selection in emitReduction.");
4216 ReductionOps, Options);
4220 assert((TeamsReduction || ParallelReduction) &&
4221 "Invalid reduction selection in emitReduction.");
4234 auto Size = RHSExprs.size();
4235 for (
const Expr *E : Privates) {
4236 if (E->getType()->isVariablyModifiedType())
4240 llvm::APInt ArraySize(32, Size);
4245 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
4246 auto IPriv = Privates.begin();
4248 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4254 if ((*IPriv)->getType()->isVariablyModifiedType()) {
4272 LHSExprs, RHSExprs, ReductionOps);
4275 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
4279 if (ParallelReduction) {
4282 CGF.
Builder.getInt32(RHSExprs.size()),
4283 ReductionArrayTySize,
4290 OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
4293 assert(TeamsReduction &&
"expected teams reduction.");
4294 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
4297 for (
const Expr *DRE : Privates) {
4298 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
4301 const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
4304 TeamsReductions.push_back(TeamReductionRec);
4305 if (!KernelTeamsReductionPtr) {
4306 KernelTeamsReductionPtr =
new llvm::GlobalVariable(
4309 "_openmp_teams_reductions_buffer_$_$ptr");
4315 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
4317 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
4320 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
4322 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
4333 GlobalToBufferCpyFn,
4334 GlobalToBufferRedFn,
4335 BufferToGlobalCpyFn,
4336 BufferToGlobalRedFn};
4340 OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
4348 Res, llvm::ConstantInt::get(CGM.
Int32Ty, 1));
4349 CGF.
Builder.CreateCondBr(Cond, ThenBB, ExitBB);
4358 auto &&CodeGen = [
Privates, LHSExprs, RHSExprs, ReductionOps,
4360 auto IPriv = Privates.begin();
4361 auto ILHS = LHSExprs.begin();
4362 auto IRHS = RHSExprs.begin();
4363 for (
const Expr *E : ReductionOps) {
4365 cast<DeclRefExpr>(*IRHS));
4373 NVPTXActionTy Action(
4381 CGF.EmitBlock(ExitBB,
true);
4386 const VarDecl *NativeParam)
const {
4391 const Type *NonQualTy = QC.
strip(ArgType);
4392 QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
4393 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
4394 if (
Attr->getCaptureKind() == OMPC_map) {
4397 }
else if (
Attr->getCaptureKind() == OMPC_firstprivate &&
4405 enum { NVPTX_local_addr = 5 };
4408 if (isa<ImplicitParamDecl>(NativeParam))
4423 const VarDecl *TargetParam)
const {
4424 assert(NativeParam != TargetParam &&
4426 "Native arg must not be the same as target arg.");
4430 const Type *NonQualTy = QC.
strip(NativeParamType);
4431 QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
4432 unsigned NativePointeeAddrSpace =
4439 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4443 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
4444 NativePointeeAddrSpace));
4448 return NativeParamAddr;
4455 TargetArgs.reserve(Args.size());
4456 auto *FnType = OutlinedFn.getFunctionType();
4457 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
4458 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
4459 TargetArgs.append(std::next(Args.begin(), I), Args.end());
4462 llvm::Type *TargetType = FnType->getParamType(I);
4464 if (!TargetType->isPointerTy()) {
4465 TargetArgs.emplace_back(NativeArg);
4470 NativeArg->getType()->getPointerElementType()->getPointerTo());
4471 TargetArgs.emplace_back(
4481 llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
4498 WrapperArgs.emplace_back(&ParallelLevelArg);
4499 WrapperArgs.emplace_back(&WrapperArg);
4506 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &CGM.
getModule());
4509 Fn->setDoesNotRecurse();
4515 const auto *RD = CS.getCapturedRecordDecl();
4516 auto CurField = RD->field_begin();
4529 auto CI = CS.capture_begin();
4544 if (CS.capture_size() > 0 ||
4556 Src, CGF.
SizeTy->getPointerTo());
4561 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
4562 Args.emplace_back(LB);
4566 Src, CGF.
SizeTy->getPointerTo());
4571 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
4572 Args.emplace_back(UB);
4575 if (CS.capture_size() > 0) {
4577 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
4578 QualType ElemTy = CurField->getType();
4586 if (CI->capturesVariableByCopy() &&
4587 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
4591 Args.emplace_back(Arg);
4605 assert(D &&
"Expected function or captured|block decl.");
4606 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
4607 "Function is registered already.");
4608 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
4609 "Team is set but not processed.");
4610 const Stmt *Body =
nullptr;
4611 bool NeedToDelayGlobalization =
false;
4612 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
4613 Body = FD->getBody();
4614 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
4615 Body = BD->getBody();
4616 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
4617 Body = CD->getBody();
4619 if (NeedToDelayGlobalization &&
4625 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
4626 VarChecker.Visit(Body);
4628 VarChecker.getGlobalizedRecord(IsInTTDRegion);
4629 TeamAndReductions.first =
nullptr;
4630 TeamAndReductions.second.clear();
4632 VarChecker.getEscapedVariableLengthDecls();
4633 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
4635 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
4636 I->getSecond().MappedParams =
4637 llvm::make_unique<CodeGenFunction::OMPMapVars>();
4638 I->getSecond().GlobalRecord = GlobalizedVarsRecord;
4639 I->getSecond().EscapedParameters.insert(
4640 VarChecker.getEscapedParameters().begin(),
4641 VarChecker.getEscapedParameters().end());
4642 I->getSecond().EscapedVariableLengthDecls.append(
4643 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
4644 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
4645 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4646 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4647 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4648 Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
4650 if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
4651 CheckVarsEscapingDeclContext VarChecker(CGF,
llvm::None);
4652 VarChecker.Visit(Body);
4653 I->getSecond().SecondaryGlobalRecord =
4654 VarChecker.getGlobalizedRecord(
true);
4655 I->getSecond().SecondaryLocalVarData.emplace();
4656 DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
4657 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
4658 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
4659 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
4661 std::make_pair(VD, MappedVarData(FD,
true)));
4664 if (!NeedToDelayGlobalization) {
4665 emitGenericVarsProlog(CGF, D->
getBeginLoc(),
true);
4667 GlobalizationScope() =
default;
4671 .emitGenericVarsEpilog(CGF,
true);
4680 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
4681 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
4682 switch (A->getAllocatorType()) {
4685 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
4686 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
4687 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
4688 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
4691 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
4694 case OMPAllocateDeclAttr::OMPConstMemAlloc: {
4696 auto *GV =
new llvm::GlobalVariable(
4699 llvm::Constant::getNullValue(VarTy), VD->
getName(),
4700 nullptr, llvm::GlobalValue::NotThreadLocal,
4703 GV->setAlignment(Align.getQuantity());
4706 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
4708 auto *GV =
new llvm::GlobalVariable(
4711 llvm::Constant::getNullValue(VarTy), VD->
getName(),
4712 nullptr, llvm::GlobalValue::NotThreadLocal,
4715 GV->setAlignment(Align.getQuantity());
4718 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
4719 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
4721 auto *GV =
new llvm::GlobalVariable(
4724 llvm::Constant::getNullValue(VarTy), VD->
getName());
4726 GV->setAlignment(Align.getQuantity());
4736 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
4737 if (I == FunctionGlobalizedDecls.end())
4739 auto VDI = I->getSecond().LocalVarData.find(VD);
4740 if (VDI != I->getSecond().LocalVarData.end())
4741 return VDI->second.PrivateAddr;
4746 auto VDI = I->getSecond().LocalVarData.find(
4747 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
4749 if (VDI != I->getSecond().LocalVarData.end())
4750 return VDI->second.PrivateAddr;
4758 FunctionGlobalizedDecls.erase(CGF.
CurFn);
4767 ScheduleKind = OMPC_DIST_SCHEDULE_static;
4774 CGF, S, ScheduleKind, Chunk);
4780 const Expr *&ChunkExpr)
const {
4781 ScheduleKind = OMPC_SCHEDULE_static;
4783 llvm::APInt ChunkSize(32, 1);
4792 " Expected target-based directive.");
4797 if (!C.capturesVariable())
4799 const VarDecl *VD = C.getCapturedVar();
4800 const auto *RD = VD->
getType()
4804 if (!RD || !RD->isLambda())
4813 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
4815 RD->getCaptureFields(Captures, ThisCapture);
4825 const VarDecl *VD = LC.getCapturedVar();
4828 auto It = Captures.find(VD);
4829 assert(It != Captures.end() &&
"Found lambda capture without field.");
4847 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
4849 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
4850 switch(A->getAllocatorType()) {
4851 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
4853 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
4854 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
4855 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
4856 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
4857 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
4860 case OMPAllocateDeclAttr::OMPConstMemAlloc:
4863 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
4866 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
4867 llvm_unreachable(
"Expected predefined allocator for the variables with the " 4877 llvm::StringMap<bool> Features;
4881 for (
const auto &Feature : Features) {
4882 if (Feature.getValue()) {
4896 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
4910 CGM.
Error(Clause->getBeginLoc(),
4911 "Target architecture does not support unified addressing");
4939 llvm_unreachable(
"Unexpected Cuda arch.");
4948 std::pair<unsigned, unsigned> Data;
4952 Data.second = CGM.
getLangOpts().OpenMPCUDABlocksPerSM;
4953 if (Data.first && Data.second)
4997 llvm_unreachable(
"Unexpected Cuda arch.");
4999 llvm_unreachable(
"Unexpected NVPTX target without ptx feature.");
5003 if (!GlobalizedRecords.empty()) {
5013 for (
const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
5014 if (Records.Records.empty())
5017 unsigned RecAlignment = 0;
5018 for (
const RecordDecl *RD : Records.Records) {
5021 RecAlignment =
std::max(RecAlignment, Alignment);
5024 llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
5026 Size = llvm::alignTo(Size, RecAlignment);
5027 llvm::APInt ArySize(64, Size);
5030 const bool UseSharedMemory = Size <= SharedMemorySize;
5038 if (UseSharedMemory) {
5039 SharedStaticRD->
addDecl(Field);
5040 SharedRecs.push_back(&Records);
5042 StaticRD->addDecl(Field);
5043 GlobalRecs.push_back(&Records);
5045 Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.
SizeTy, Size));
5046 Records.UseSharedMemory->setInitializer(
5047 llvm::ConstantInt::get(CGM.
Int16Ty, UseSharedMemory ? 1 : 0));
5054 llvm::APInt ArySize(64, SharedMemorySize);
5063 SharedStaticRD->
addDecl(Field);
5069 auto *GV =
new llvm::GlobalVariable(
5071 false, llvm::GlobalValue::CommonLinkage,
5072 llvm::Constant::getNullValue(LLVMStaticTy),
5073 "_openmp_shared_static_glob_rd_$_",
nullptr,
5074 llvm::GlobalValue::NotThreadLocal,
5076 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
5078 for (
const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
5079 Rec->Buffer->replaceAllUsesWith(Replacement);
5080 Rec->Buffer->eraseFromParent();
5087 llvm::APInt Size1(32, SMsBlockPerSM.second);
5091 llvm::APInt Size2(32, SMsBlockPerSM.first);
5098 auto *GV =
new llvm::GlobalVariable(
5101 llvm::Constant::getNullValue(LLVMArr2Ty),
5102 "_openmp_static_glob_rd_$_");
5103 auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
5105 for (
const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
5106 Rec->Buffer->replaceAllUsesWith(Replacement);
5107 Rec->Buffer->eraseFromParent();
5111 if (!TeamsReductions.empty()) {
5116 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
5133 auto *GV =
new llvm::GlobalVariable(
5134 CGM.
getModule(), LLVMReductionsBufferTy,
5136 llvm::Constant::getNullValue(LLVMReductionsBufferTy),
5137 "_openmp_teams_reductions_buffer_$_");
5138 KernelTeamsReductionPtr->setInitializer(
5139 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
llvm::PointerType * Int8PtrPtrTy
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
const BlockDecl * getBlockDecl() const
TargetOptions & getTargetOpts() const
Retrieve the target options.
static llvm::Value * emitGlobalToListCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static const Decl * getCanonicalDecl(const Decl *D)
llvm::IntegerType * IntTy
int
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T)
Given a value of type T* that may not be to a complete object, construct an l-value with the natural ...
Other implicit parameter.
A class which contains all the information about a particular captured value.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
CodeGenTypes & getTypes()
llvm::Function * emitReductionFunction(SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
ArrayRef< OMPClause * > clauses()
llvm::Type * ConvertTypeForMem(QualType T)
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::LLVMContext & getLLVMContext()
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
static std::pair< unsigned, unsigned > getSMsBlocksPerSM(CodeGenModule &CGM)
Get number of SMs and number of blocks per SM.
attr_iterator attr_begin() const
Stmt - This represents one statement.
void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of reduction variables from the teams ... directives.
Decl - This represents one declaration (or definition), e.g.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
SourceLocation getBeginLoc() const LLVM_READONLY
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * ScratchpadIndex
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
The base class of the type hierarchy.
virtual void completeDefinition()
Note that the definition of this type is now complete.
bool isZero() const
isZero - Test whether the quantity equals zero.
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
static bool hasStaticScheduling(const OMPExecutableDirective &D)
Check if the directive is loops based and has schedule clause at all or has static scheduling...
virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing...
Describes the capture of a variable or of this, or of a C++1y init-capture.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false)
llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function for the current OpenMP implementation.
QualType getElementType() const
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
CudaArch StringToCudaArch(llvm::StringRef S)
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
Represents a variable declaration or definition.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
LangAS getLangASFromTargetAS(unsigned TargetAS)
const ArrayType * castAsArrayTypeUnsafe() const
A variant of castAs<> for array type which silently discards qualifiers from the outermost type...
LangAS
Defines the address space values used by the address space qualifier of QualType. ...
DiagnosticsEngine & getDiags() const
OpenMPDirectiveKind ReductionKind
llvm::Value * getPointer() const
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
unsigned getAddressSpace() const
Return the address space that this address resides in.
SPMD execution mode (all threads are worker threads).
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Represents a struct/union/class.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
clauselist_range clauselists()
virtual llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
Address getAddress() const
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
attr_iterator attr_end() const
llvm::IntegerType * Int64Ty
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
Represents a member of a struct/union/class.
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
CharUnits getAlignment() const
llvm::IntegerType * SizeTy
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
unsigned getDefaultLocationReserved2Flags() const override
Returns additional flags that can be stored in reserved_2 field of the default location.
static llvm::Value * getMasterThreadID(CodeGenFunction &CGF)
Get the thread id of the OMP master thread.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
CharUnits getSizeAlign() const
void startDefinition()
Starts the definition of this tag declaration.
bool isReferenceType() const
static llvm::Value * emitListToGlobalCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
OpenMPDirectiveKind getDirectiveKind() const
__DEVICE__ int max(int __a, int __b)
SourceLocation getBeginLoc() const LLVM_READONLY
static bool hasNestedLightweightDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) lightweight runtime construct, if any.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void InitTempAlloca(Address Alloca, llvm::Value *Value)
InitTempAlloca - Provide an initial value for the given alloca which will be observable at all locati...
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc...
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
CharUnits - This is an opaque type for sizes expressed in character units.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::PointerType * VoidPtrTy
Expr * getIterationVariable() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
ModeFlagsTy
Enum for accesseing the reserved_2 field of the ident_t struct.
bool isCXXThisExprCaptured() const
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
Scope - A scope is a transient data structure that is used while parsing the program.
llvm::PointerType * VoidPtrPtrTy
static CGOpenMPRuntimeNVPTX::DataSharingMode getDataSharingMode(CodeGenModule &CGM)
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
This represents clause 'reduction' in the '#pragma omp ...' directives.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
CharUnits getPointerAlign() const
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
unsigned getDefaultFirstprivateAddressSpace() const override
Returns default address space for the constant firstprivates, constant address space by default...
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Unknown execution mode (orphaned directive).
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
ASTContext & getContext() const
Describes the capture of either a variable, or 'this', or variable-length array type.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate', 'reduction' etc.
void setAddress(Address address)
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl *> &Vars)
Get list of lastprivate variables from the teams distribute ...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
This represents '#pragma omp requires...' directive.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
This represents one expression.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
llvm::PointerType * getType() const
Return the type of the pointer value.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
DeclContext * getDeclContext()
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
static llvm::Function * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
This represents 'ordered' clause in the '#pragma omp ...' directive.
llvm::IntegerType * Int32Ty
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
QualType getRecordType(const RecordDecl *Decl) const
UnaryOperator - This represents the unary-expression's (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
MachineConfiguration
GPU Configuration: This information can be derived from cuda registers, however, providing compile ti...
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
const TargetInfo & getTarget() const
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
OpenMPProcBindClauseKind
OpenMP attributes for 'proc_bind' clause.
Non-SPMD execution mode (1 master thread, others are workers).
llvm::Value * ScratchpadWidth
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
GlobalDecl - represents a global declaration.
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
std::string CPU
If given, the name of the target CPU to generate code for.
The l-value was considered opaque, so the alignment was determined from a type.
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
This captures a statement into a function.
QualType getCanonicalType() const
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Encodes a location in the source.
static llvm::Value * getThreadLimit(CodeGenFunction &CGF, bool IsInSPMDExecutionMode=false)
Get the value of the thread_limit clause in the teams directive.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
Expr * getSubExpr() const
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
This is a basic class for representing single OpenMP executable directive.
CastKind getCastKind() const
This represents 'schedule' clause in the '#pragma omp ...' directive.
llvm::IntegerType * Int16Ty
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
OpenMPDirectiveKind
OpenMP directives.
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ...
static llvm::Value * emitGlobalToListReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
This file defines OpenMP nodes for declarative directives.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
This is a basic class for representing single OpenMP clause.
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
static llvm::Value * emitListToGlobalReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl *> &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode)
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
static CudaArch getCudaArch(CodeGenModule &CGM)
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Dataflow Directional Tag Classes.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
A qualifier set is used to build a set of qualifiers.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
ArrayRef< Capture > captures() const
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
llvm::Module & getModule() const
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
This file defines OpenMP AST classes for executable directives and clauses.
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
llvm::Type * getElementType() const
Return the type of the values stored in this address.
llvm::PointerType * Int8PtrTy
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create or return a runtime function declaration with the specified type and name. ...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void addDecl(Decl *D)
Add the declaration D into this context.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
ExecutionMode
Defines the execution mode.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
bool isLValueReferenceType() const
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Generic data-sharing mode.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g., it is an signed integer type or a vector.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Privates[]
Gets the list of initial values for linear variables.
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
capture_range captures() const
Retrieve this lambda's captures.
CapturedRegionKind getKind() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
static llvm::Value * getNVPTXThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
const VariableArrayType * getAsVariableArrayType(QualType T) const
static llvm::Value * getNVPTXWarpSize(CodeGenFunction &CGF)
Get the GPU warp size.
llvm::Value * RemoteLaneOffset
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
A reference to a declared variable, function, enum, etc.
CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
void addAddressSpace(LangAS space)
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LValue - This represents an lvalue references.
Information for lazily generating a cleanup.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
void setAccess(AccessSpecifier AS)
bool isConstant(const ASTContext &Ctx) const
unsigned getTargetAddressSpace(QualType T) const
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
const LangOptions & getLangOpts() const
static llvm::Value * getNVPTXNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
llvm::Value * getPointer() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
Attr - This represents one attribute.
SourceLocation getLocation() const
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
static bool supportsLightweightRuntime(ASTContext &Ctx, const OMPExecutableDirective &D)
Checks if the construct supports lightweight runtime.