20 #include "llvm/ADT/SmallPtrSet.h" 22 using namespace clang;
23 using namespace CodeGen;
29 OMPRTL_NVPTX__kmpc_kernel_init,
31 OMPRTL_NVPTX__kmpc_kernel_deinit,
34 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
36 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
40 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
43 OMPRTL_NVPTX__kmpc_kernel_parallel,
45 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
48 OMPRTL_NVPTX__kmpc_serialized_parallel,
51 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
54 OMPRTL_NVPTX__kmpc_shuffle_int32,
57 OMPRTL_NVPTX__kmpc_shuffle_int64,
63 OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
69 OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
79 OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
81 OMPRTL_NVPTX__kmpc_end_reduce_nowait,
83 OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
85 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
88 OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
90 OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
93 OMPRTL_NVPTX__kmpc_begin_sharing_variables,
95 OMPRTL_NVPTX__kmpc_end_sharing_variables,
97 OMPRTL_NVPTX__kmpc_get_shared_variables,
100 OMPRTL_NVPTX__kmpc_parallel_level,
102 OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
112 llvm::BasicBlock *ContBlock =
nullptr;
117 bool Conditional =
false)
118 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
127 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
145 class ExecutionModeRAII {
157 ~ExecutionModeRAII() { Mode = SavedMode; }
169 LaneIDMask = WarpSize - 1,
172 GlobalMemoryAlignment = 256,
183 class CheckVarsEscapingDeclContext final
186 llvm::SetVector<const ValueDecl *> EscapedDecls;
187 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
188 llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
190 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
191 bool AllEscaped =
false;
192 bool IsForCombinedParallelRegion =
false;
194 void markAsEscaped(
const ValueDecl *VD) {
196 if (!isa<VarDecl>(VD) ||
197 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
201 if (
auto *CSI = CGF.CapturedStmtInfo) {
202 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
205 if (!IsForCombinedParallelRegion) {
208 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
212 static_cast<OpenMPClauseKind>(
Attr->getCaptureKind())) ||
213 Attr->getCaptureKind() == OMPC_map)
216 if (!FD->getType()->isReferenceType()) {
218 "Parameter captured by value with variably modified type");
219 EscapedParameters.insert(VD);
220 }
else if (!IsForCombinedParallelRegion) {
225 if ((!CGF.CapturedStmtInfo ||
226 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
231 EscapedVariableLengthDecls.insert(VD);
233 EscapedDecls.insert(VD);
236 void VisitValueDecl(
const ValueDecl *VD) {
239 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
240 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
241 const bool SavedAllEscaped = AllEscaped;
243 Visit(VarD->getInit());
244 AllEscaped = SavedAllEscaped;
250 bool IsCombinedParallelRegion) {
254 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
255 const ValueDecl *VD = C.getCapturedVar();
256 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
257 if (IsCombinedParallelRegion) {
261 IsForCombinedParallelRegion =
false;
264 C->getClauseKind() == OMPC_reduction ||
265 C->getClauseKind() == OMPC_linear ||
266 C->getClauseKind() == OMPC_private)
269 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
270 Vars = PC->getVarRefs();
271 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(C))
272 Vars = PC->getVarRefs();
274 llvm_unreachable(
"Unexpected clause.");
275 for (
const auto *E : Vars) {
279 IsForCombinedParallelRegion =
true;
283 if (IsForCombinedParallelRegion)
288 if (isa<OMPCapturedExprDecl>(VD))
290 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
297 return P1.first > P2.first;
300 void buildRecordForGlobalizedVars() {
301 assert(!GlobalizedRD &&
302 "Record for globalized variables is built already.");
303 if (EscapedDecls.empty())
309 std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
316 for (
const auto &Pair : GlobalizedVars) {
330 GlobalizedRD->addDecl(Field);
337 MappedDeclsFields.try_emplace(VD, Field);
339 GlobalizedRD->completeDefinition();
344 virtual ~CheckVarsEscapingDeclContext() =
default;
345 void VisitDeclStmt(
const DeclStmt *S) {
349 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
363 if (CaptureRegions.size() == 1 && CaptureRegions.back() ==
OMPD_unknown) {
364 VisitStmt(S->getCapturedStmt());
367 VisitOpenMPCapturedStmt(
369 CaptureRegions.back() == OMPD_parallel &&
377 if (C.capturesVariable() && !C.capturesVariableByCopy()) {
378 const ValueDecl *VD = C.getCapturedVar();
380 if (isa<OMPCapturedExprDecl>(VD))
389 if (C.capturesVariable()) {
391 const ValueDecl *VD = C.getCapturedVar();
399 void VisitBlockExpr(
const BlockExpr *E) {
404 const VarDecl *VD = C.getVariable();
411 void VisitCallExpr(
const CallExpr *E) {
417 if (Arg->isLValue()) {
418 const bool SavedAllEscaped = AllEscaped;
421 AllEscaped = SavedAllEscaped;
434 if (isa<OMPCapturedExprDecl>(VD))
436 else if (
const auto *VarD = dyn_cast<VarDecl>(VD))
437 if (VarD->isInitCapture())
444 const bool SavedAllEscaped = AllEscaped;
447 AllEscaped = SavedAllEscaped;
456 const bool SavedAllEscaped = AllEscaped;
459 AllEscaped = SavedAllEscaped;
464 void VisitExpr(
const Expr *E) {
467 bool SavedAllEscaped = AllEscaped;
473 AllEscaped = SavedAllEscaped;
475 void VisitStmt(
const Stmt *S) {
487 buildRecordForGlobalizedVars();
493 assert(GlobalizedRD &&
494 "Record for globalized variables must be generated already.");
495 auto I = MappedDeclsFields.find(VD);
496 if (I == MappedDeclsFields.end())
498 return I->getSecond();
503 return EscapedDecls.getArrayRef();
508 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
509 return EscapedParameters;
515 return EscapedVariableLengthDecls.getArrayRef();
523 llvm::Intrinsic::getDeclaration(
524 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
531 llvm::Intrinsic::getDeclaration(
532 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
556 llvm::Intrinsic::getDeclaration(
557 &CGF.
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
558 "nvptx_num_threads");
572 llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
592 bool IsInSPMDExecutionMode =
false) {
594 return IsInSPMDExecutionMode
614 return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
615 Bld.CreateNot(Mask),
"master_tid");
618 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
622 createWorkerFunction(CGM);
625 void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
633 WorkerFn->setDoesNotRecurse();
637 CGOpenMPRuntimeNVPTX::getExecutionMode()
const {
638 return CurrentExecutionMode;
650 if (
const auto *C = dyn_cast<CompoundStmt>(Body))
652 return C->body_front();
665 if (NameModifier != OMPD_parallel && NameModifier !=
OMPD_unknown)
667 const Expr *Cond = C->getCondition();
682 if (
const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
689 if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
690 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
694 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
695 DKind = NND->getDirectiveKind();
699 if (DKind == OMPD_distribute) {
700 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
706 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
707 DKind = NND->getDirectiveKind();
715 case OMPD_target_teams:
719 if (DKind == OMPD_distribute) {
720 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
724 if (
const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
725 DKind = NND->getDirectiveKind();
731 case OMPD_target_teams_distribute:
734 case OMPD_target_simd:
735 case OMPD_target_parallel:
736 case OMPD_target_parallel_for:
737 case OMPD_target_parallel_for_simd:
738 case OMPD_target_teams_distribute_simd:
739 case OMPD_target_teams_distribute_parallel_for:
740 case OMPD_target_teams_distribute_parallel_for_simd:
743 case OMPD_parallel_for:
744 case OMPD_parallel_sections:
746 case OMPD_parallel_for_simd:
748 case OMPD_cancellation_point:
750 case OMPD_threadprivate:
765 case OMPD_target_data:
766 case OMPD_target_exit_data:
767 case OMPD_target_enter_data:
768 case OMPD_distribute:
769 case OMPD_distribute_simd:
770 case OMPD_distribute_parallel_for:
771 case OMPD_distribute_parallel_for_simd:
772 case OMPD_teams_distribute:
773 case OMPD_teams_distribute_simd:
774 case OMPD_teams_distribute_parallel_for:
775 case OMPD_teams_distribute_parallel_for_simd:
776 case OMPD_target_update:
777 case OMPD_declare_simd:
778 case OMPD_declare_target:
779 case OMPD_end_declare_target:
780 case OMPD_declare_reduction:
782 case OMPD_taskloop_simd:
784 llvm_unreachable(
"Unexpected directive.");
794 switch (DirectiveKind) {
796 case OMPD_target_teams:
797 case OMPD_target_teams_distribute:
799 case OMPD_target_parallel:
800 case OMPD_target_parallel_for:
801 case OMPD_target_parallel_for_simd:
802 case OMPD_target_teams_distribute_parallel_for:
803 case OMPD_target_teams_distribute_parallel_for_simd:
805 case OMPD_target_simd:
806 case OMPD_target_teams_distribute_simd:
810 case OMPD_parallel_for:
811 case OMPD_parallel_sections:
813 case OMPD_parallel_for_simd:
815 case OMPD_cancellation_point:
817 case OMPD_threadprivate:
832 case OMPD_target_data:
833 case OMPD_target_exit_data:
834 case OMPD_target_enter_data:
835 case OMPD_distribute:
836 case OMPD_distribute_simd:
837 case OMPD_distribute_parallel_for:
838 case OMPD_distribute_parallel_for_simd:
839 case OMPD_teams_distribute:
840 case OMPD_teams_distribute_simd:
841 case OMPD_teams_distribute_parallel_for:
842 case OMPD_teams_distribute_parallel_for_simd:
843 case OMPD_target_update:
844 case OMPD_declare_simd:
845 case OMPD_declare_target:
846 case OMPD_end_declare_target:
847 case OMPD_declare_reduction:
849 case OMPD_taskloop_simd:
854 "Unknown programming model for OpenMP directive on NVPTX target.");
858 StringRef ParentName,
859 llvm::Function *&OutlinedFn,
860 llvm::Constant *&OutlinedFnID,
863 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
false);
864 EntryFunctionState EST;
867 WrapperFunctionsMap.clear();
871 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
872 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
875 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
876 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
877 : EST(EST), WST(WST) {}
880 .emitNonSPMDEntryHeader(CGF, EST, WST);
884 .emitNonSPMDEntryFooter(CGF, EST);
888 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
889 IsOffloadEntry, CodeGen);
893 WST.WorkerFn->setName(Twine(OutlinedFn->getName(),
"_worker"));
896 emitWorkerFunction(WST);
900 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(
CodeGenFunction &CGF,
901 EntryFunctionState &EST,
902 WorkerFunctionState &WST) {
912 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
915 emitCall(CGF, WST.Loc, WST.WorkerFn);
921 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
924 IsInTargetMasterThreadRegion =
true;
932 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
936 createNVPTXRuntimeFunction(
937 OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
939 emitGenericVarsProlog(CGF, WST.Loc);
942 void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(
CodeGenFunction &CGF,
943 EntryFunctionState &EST) {
944 IsInTargetMasterThreadRegion =
false;
948 emitGenericVarsEpilog(CGF);
953 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".termination.notifier");
961 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
968 EST.ExitBB =
nullptr;
972 StringRef ParentName,
973 llvm::Function *&OutlinedFn,
974 llvm::Constant *&OutlinedFnID,
977 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
true);
978 EntryFunctionState EST;
983 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
988 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
990 : RT(RT), EST(EST), D(D) {}
992 RT.emitSPMDEntryHeader(CGF, EST, D);
995 RT.emitSPMDEntryFooter(CGF, EST);
997 } Action(*
this, EST, D);
999 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
1000 IsOffloadEntry, CodeGen);
1003 void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
1019 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
1023 createNVPTXRuntimeFunction(
1024 OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
1030 IsInTargetMasterThreadRegion =
true;
1034 EntryFunctionState &EST) {
1035 IsInTargetMasterThreadRegion =
false;
1048 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
1052 EST.ExitBB =
nullptr;
1065 llvm::GlobalValue::WeakAnyLinkage,
1066 llvm::ConstantInt::get(CGM.
Int8Ty, Mode ? 0 : 1),
1067 Twine(Name,
"_exec_mode"));
1071 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
1077 emitWorkerLoop(CGF, WST);
1082 WorkerFunctionState &WST) {
1095 llvm::BasicBlock *SelectWorkersBB = CGF.
createBasicBlock(
".select.workers");
1097 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".terminate.parallel");
1119 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
1124 llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID,
"should_terminate");
1125 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
1130 Bld.CreateIsNotNull(Bld.
CreateLoad(ExecStatus),
"is_active");
1131 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
1137 for (llvm::Function *W : Work) {
1142 Bld.CreateICmpEQ(Bld.
CreateLoad(WorkFn),
ID,
"work_match");
1146 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
1155 emitCall(CGF, WST.Loc, W,
1156 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1166 auto *ParallelFnTy =
1167 llvm::FunctionType::get(CGM.
VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
1175 emitCall(CGF, WST.Loc, WorkFnCast,
1176 {Bld.getInt16(0), getThreadID(CGF, WST.Loc)});
1183 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
1203 llvm::Constant *RTLFn =
nullptr;
1204 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
1205 case OMPRTL_NVPTX__kmpc_kernel_init: {
1210 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1214 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
1218 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1222 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
1227 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1231 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
1238 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
1243 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1247 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
1253 llvm::FunctionType::get(RetTy, TypeParams,
false);
1257 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
1264 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
1269 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1273 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
1278 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1282 case OMPRTL_NVPTX__kmpc_shuffle_int32: {
1287 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1291 case OMPRTL_NVPTX__kmpc_shuffle_int64: {
1296 llvm::FunctionType::get(CGM.
Int64Ty, TypeParams,
false);
1300 case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: {
1308 auto *ShuffleReduceFnTy =
1309 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1312 auto *InterWarpCopyFnTy =
1313 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1319 ShuffleReduceFnTy->getPointerTo(),
1320 InterWarpCopyFnTy->getPointerTo()};
1322 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1324 FnTy,
"__kmpc_nvptx_parallel_reduce_nowait");
1327 case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
1335 auto *ShuffleReduceFnTy =
1336 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1339 auto *InterWarpCopyFnTy =
1340 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1346 ShuffleReduceFnTy->getPointerTo(),
1347 InterWarpCopyFnTy->getPointerTo()};
1349 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1351 FnTy,
"__kmpc_nvptx_simd_reduce_nowait");
1354 case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
1366 auto *ShuffleReduceFnTy =
1367 llvm::FunctionType::get(CGM.
VoidTy, ShuffleReduceTypeParams,
1370 auto *InterWarpCopyFnTy =
1371 llvm::FunctionType::get(CGM.
VoidTy, InterWarpCopyTypeParams,
1375 auto *CopyToScratchpadFnTy =
1376 llvm::FunctionType::get(CGM.
VoidTy, CopyToScratchpadTypeParams,
1380 auto *LoadReduceFnTy =
1381 llvm::FunctionType::get(CGM.
VoidTy, LoadReduceTypeParams,
1387 ShuffleReduceFnTy->getPointerTo(),
1388 InterWarpCopyFnTy->getPointerTo(),
1389 CopyToScratchpadFnTy->getPointerTo(),
1390 LoadReduceFnTy->getPointerTo()};
1392 llvm::FunctionType::get(CGM.
Int32Ty, TypeParams,
false);
1394 FnTy,
"__kmpc_nvptx_teams_reduce_nowait");
1397 case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
1401 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1403 FnTy,
"__kmpc_nvptx_end_reduce_nowait");
1406 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
1413 case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
1420 case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
1425 llvm::FunctionType::get(CGM.
VoidPtrTy, TypeParams,
false);
1427 FnTy,
"__kmpc_data_sharing_push_stack");
1430 case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
1434 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1436 "__kmpc_data_sharing_pop_stack");
1439 case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
1444 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1448 case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
1455 case OMPRTL_NVPTX__kmpc_get_shared_variables: {
1459 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
1463 case OMPRTL_NVPTX__kmpc_parallel_level: {
1467 llvm::FunctionType::get(CGM.
Int16Ty, TypeParams,
false);
1471 case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
1473 auto *FnTy = llvm::FunctionType::get(CGM.
Int8Ty,
false);
1481 void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *
ID,
1482 llvm::Constant *Addr,
1483 uint64_t Size, int32_t,
1484 llvm::GlobalValue::LinkageTypes) {
1487 if (!isa<llvm::Function>(Addr))
1493 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
1495 llvm::Metadata *MDVals[] = {
1496 llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx,
"kernel"),
1497 llvm::ConstantAsMetadata::get(
1498 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
1500 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
1503 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
1505 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
1507 if (!IsOffloadEntry)
1510 assert(!ParentName.empty() &&
"Invalid target region parent name!");
1514 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1517 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
1526 llvm_unreachable(
"OpenMP NVPTX can only handle device code.");
1550 const Expr *NumTeams,
1551 const Expr *ThreadLimit,
1559 bool &IsInParallelRegion;
1560 bool PrevIsInParallelRegion;
1563 NVPTXPrePostActionTy(
bool &IsInParallelRegion)
1564 : IsInParallelRegion(IsInParallelRegion) {}
1566 PrevIsInParallelRegion = IsInParallelRegion;
1567 IsInParallelRegion =
true;
1570 IsInParallelRegion = PrevIsInParallelRegion;
1572 } Action(IsInParallelRegion);
1574 bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
1575 IsInTargetMasterThreadRegion =
false;
1578 D, ThreadIDVar, InnermostKind, CodeGen));
1579 IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
1581 !IsInParallelRegion) {
1582 llvm::Function *WrapperFun =
1583 createParallelDataSharingWrapper(OutlinedFun, D);
1584 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
1603 .emitGenericVarsProlog(CGF, Loc);
1607 .emitGenericVarsEpilog(CGF);
1612 D, ThreadIDVar, InnermostKind, CodeGen);
1613 llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
1614 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1615 OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1616 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1621 void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(
CodeGenFunction &CGF,
1628 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1629 if (I == FunctionGlobalizedDecls.end())
1631 if (
const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
1638 unsigned Alignment =
1640 unsigned GlobalRecordSize =
1642 GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
1646 llvm::ConstantInt::get(CGM.
SizeTy, GlobalRecordSize),
1650 GlobalRecordSizeArg);
1655 I->getSecond().GlobalRecordAddr = GlobalRecValue;
1659 for (
auto &Rec : I->getSecond().LocalVarData) {
1660 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
1663 const auto *VD = cast<VarDecl>(Rec.first);
1672 const auto *VD = cast<VarDecl>(Rec.first);
1674 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress());
1678 for (
const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
1686 Size = Bld.CreateNUWAdd(
1690 Size = Bld.CreateUDiv(Size, AlignVal);
1691 Size = Bld.CreateNUWMul(Size, AlignVal);
1695 Size, CGF.
Builder.getInt16(0)};
1698 GlobalRecordSizeArg);
1704 I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
1706 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
1708 I->getSecond().MappedParams->apply(CGF);
1711 void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(
CodeGenFunction &CGF) {
1715 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1716 if (I != FunctionGlobalizedDecls.end()) {
1717 I->getSecond().MappedParams->restore(CGF);
1721 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
1726 if (I->getSecond().GlobalRecordAddr) {
1729 I->getSecond().GlobalRecordAddr);
1748 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
1749 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1760 emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
1762 emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
1765 void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
1768 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
1778 Address ThreadIDAddr = ZeroAddr;
1779 auto &&CodeGen = [
this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
1784 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
1785 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1786 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1797 NVPTXActionTy Action(
1809 llvm::Function *WFn = WrapperFunctionsMap[Fn];
1810 assert(WFn &&
"Wrapper function does not exist!");
1815 CGF.EmitRuntimeCall(
1824 if (!CapturedVars.empty()) {
1827 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
"shared_arg_refs");
1832 llvm::ConstantInt::get(CGM.
SizeTy, CapturedVars.size())};
1834 OMPRTL_NVPTX__kmpc_begin_sharing_variables),
1840 Address SharedArgListAddress = CGF.EmitLoadOfPointer(
1842 .castAs<PointerType>());
1845 CGF.getPointerSize());
1847 if (V->getType()->isIntegerTy())
1848 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
1851 CGF.EmitStoreOfScalar(PtrV, Dst,
false,
1869 if (!CapturedVars.empty())
1870 CGF.EmitRuntimeCall(
1874 Work.emplace_back(WFn);
1877 auto &&LNParallelGen = [
this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
1881 if (IsInParallelRegion) {
1882 SeqGen(CGF, Action);
1883 }
else if (IsInTargetMasterThreadRegion) {
1884 L0ParallelGen(CGF, Action);
1897 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(
".exit");
1898 llvm::BasicBlock *SeqBB = CGF.createBasicBlock(
".sequential");
1899 llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(
".parcheck");
1900 llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(
".mastercheck");
1901 llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
1903 Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
1906 CGF.EmitBlock(ParallelCheckBB);
1913 Bld.CreateCondBr(Res, SeqBB, MasterCheckBB);
1914 CGF.EmitBlock(SeqBB);
1915 SeqGen(CGF, Action);
1916 CGF.EmitBranch(ExitBB);
1919 CGF.EmitBlock(MasterCheckBB);
1920 llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock(
"master.then");
1921 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(
"omp_if.else");
1924 Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock);
1925 CGF.EmitBlock(MasterThenBB);
1926 L0ParallelGen(CGF, Action);
1927 CGF.EmitBranch(ExitBB);
1930 CGF.EmitBlock(ElseBlock);
1937 CGF.EmitBlock(ExitBB,
true);
1950 void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
1963 Address ThreadIDAddr = ZeroAddr;
1964 auto &&CodeGen = [
this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
1970 OutlinedFnArgs.push_back(ThreadIDAddr.
getPointer());
1971 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1972 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1983 NVPTXActionTy Action(
1992 if (IsInTargetMasterThreadRegion) {
2034 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
2041 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
2042 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
2071 "Cast type must sized.");
2073 "Val type must sized.");
2075 if (ValTy == CastTy)
2081 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
2104 "Unsupported bitwidth in shuffle instruction.");
2107 ? OMPRTL_NVPTX__kmpc_shuffle_int32
2108 : OMPRTL_NVPTX__kmpc_shuffle_int64;
2143 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
2157 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
2159 llvm::PHINode *PhiSrc =
2160 Bld.CreatePHI(Ptr.
getType(), 2);
2161 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
2162 llvm::PHINode *PhiDest =
2163 Bld.CreatePHI(ElemPtr.
getType(), 2);
2164 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
2170 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
2180 PhiSrc->addIncoming(Ptr.
getPointer(), ThenBB);
2181 PhiDest->addIncoming(ElemPtr.
getPointer(), ThenBB);
2193 Size = Size % IntSize;
2229 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2230 llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
2231 llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
2236 unsigned Size = Privates.size();
2237 for (
const Expr *Private : Privates) {
2242 bool ShuffleInElement =
false;
2245 bool UpdateDestListPtr =
false;
2248 bool IncrScratchpadSrc =
false;
2249 bool IncrScratchpadDest =
false;
2252 case RemoteLaneToThread: {
2262 DestElementPtrAddr =
2265 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
2266 ShuffleInElement =
true;
2267 UpdateDestListPtr =
true;
2280 DestElementPtrAddr =
2287 case ThreadToScratchpad: {
2299 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
2301 Bld.CreateNUWAdd(DestBase.
getPointer(), CurrentOffset);
2302 ScratchPadElemAbsolutePtrVal =
2303 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
2304 DestElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
2306 IncrScratchpadDest =
true;
2309 case ScratchpadToThread: {
2314 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
2316 Bld.CreateNUWAdd(SrcBase.
getPointer(), CurrentOffset);
2317 ScratchPadElemAbsolutePtrVal =
2318 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
2319 SrcElementAddr =
Address(ScratchPadElemAbsolutePtrVal,
2321 IncrScratchpadSrc =
true;
2325 DestElementPtrAddr =
2328 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
2329 UpdateDestListPtr =
true;
2343 if (ShuffleInElement) {
2345 RemoteLaneOffset, Private->getExprLoc());
2347 if (Private->getType()->isScalarType()) {
2350 Private->
getType(), Private->getExprLoc());
2367 if (UpdateDestListPtr) {
2370 DestElementPtrAddr,
false,
2377 if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
2381 ScratchpadBasePtr = Bld.CreateNUWAdd(
2383 Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
2386 ScratchpadBasePtr = Bld.CreateNUWSub(
2387 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
2388 ScratchpadBasePtr = Bld.CreateUDiv(
2390 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
2391 ScratchpadBasePtr = Bld.CreateNUWAdd(
2392 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
2393 ScratchpadBasePtr = Bld.CreateNUWMul(
2395 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
2397 if (IncrScratchpadDest)
2445 Args.push_back(&ReduceListArg);
2446 Args.push_back(&ScratchPadArg);
2447 Args.push_back(&IndexArg);
2448 Args.push_back(&WidthArg);
2449 Args.push_back(&ShouldReduceArg);
2455 "_omp_reduction_load_and_reduce", &CGM.
getModule());
2457 Fn->setDoesNotRecurse();
2474 AddrScratchPadArg,
false, C.
VoidPtrTy, Loc);
2488 AddrShouldReduceArg,
false, Int32Ty, Loc);
2492 Bld.CreatePtrToInt(ScratchPadBase, CGM.
SizeTy);
2498 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_red_list");
2502 SrcDataAddr, RemoteReduceList,
2511 llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal);
2512 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
2518 ReduceListAddr.getPointer(), CGF.
VoidPtrTy);
2520 RemoteReduceList.getPointer(), CGF.
VoidPtrTy);
2522 CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
2523 Bld.CreateBr(MergeBB);
2529 RemoteReduceList, ReduceListAddr);
2530 Bld.CreateBr(MergeBB);
2569 Args.push_back(&ReduceListArg);
2570 Args.push_back(&ScratchPadArg);
2571 Args.push_back(&IndexArg);
2572 Args.push_back(&WidthArg);
2578 "_omp_reduction_copy_to_scratchpad", &CGM.
getModule());
2580 Fn->setDoesNotRecurse();
2596 AddrScratchPadArg,
false, C.
VoidPtrTy, Loc);
2611 Bld.CreatePtrToInt(ScratchPadBase, CGM.
SizeTy);
2615 SrcDataAddr, DestDataAddr,
2654 Args.push_back(&ReduceListArg);
2655 Args.push_back(&NumWarpsArg);
2661 "_omp_reduction_inter_warp_copy_func", &CGM.
getModule());
2663 Fn->setDoesNotRecurse();
2676 StringRef TransferMediumName =
2677 "__openmp_nvptx_data_transfer_temporary_storage";
2678 llvm::GlobalVariable *TransferMedium =
2679 M.getGlobalVariable(TransferMediumName);
2680 if (!TransferMedium) {
2681 auto *Ty = llvm::ArrayType::get(CGM.
Int64Ty, WarpSize);
2683 TransferMedium =
new llvm::GlobalVariable(
2685 false, llvm::GlobalVariable::CommonLinkage,
2686 llvm::Constant::getNullValue(Ty), TransferMediumName,
2687 nullptr, llvm::GlobalVariable::NotThreadLocal,
2688 SharedAddressSpace);
2708 for (
const Expr *Private : Privates) {
2718 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
2719 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2735 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
2736 TransferMedium, {llvm::Constant::getNullValue(CGM.
Int64Ty), WarpID});
2745 if (Private->getType()->isScalarType()) {
2757 Bld.CreateBr(MergeBB);
2760 Bld.CreateBr(MergeBB);
2782 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
2783 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2788 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
2789 TransferMedium, {llvm::Constant::getNullValue(CGM.
Int64Ty), ThreadID});
2790 Address SrcMediumPtr(SrcMediumPtrVal,
2807 if (Private->getType()->isScalarType()) {
2809 SrcMediumPtr,
false, Private->
getType(), Loc);
2818 Bld.CreateBr(W0MergeBB);
2821 Bld.CreateBr(W0MergeBB);
2919 Args.push_back(&ReduceListArg);
2920 Args.push_back(&LaneIDArg);
2921 Args.push_back(&RemoteLaneOffsetArg);
2922 Args.push_back(&AlgoVerArg);
2928 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
2930 Fn->setDoesNotRecurse();
2959 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
2965 LocalReduceList, RemoteReduceList,
2966 {RemoteLaneOffsetArgVal,
2991 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
2993 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
2995 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
2997 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
2999 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
3000 CondAlgo2 = Bld.CreateAnd(
3001 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
3003 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
3004 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
3009 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
3014 LocalReduceList.getPointer(), CGF.
VoidPtrTy);
3018 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
3019 Bld.CreateBr(MergeBB);
3022 Bld.CreateBr(MergeBB);
3028 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
3030 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
3035 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3039 RemoteReduceList, LocalReduceList);
3040 Bld.CreateBr(CpyMergeBB);
3043 Bld.CreateBr(CpyMergeBB);
3303 assert((TeamsReduction || ParallelReduction || SimdReduction) &&
3304 "Invalid reduction selection in emitReduction.");
3308 ReductionOps, Options);
3316 auto Size = RHSExprs.size();
3317 for (
const Expr *E : Privates) {
3318 if (E->getType()->isVariablyModifiedType())
3322 llvm::APInt ArraySize(32, Size);
3327 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
3328 auto IPriv = Privates.begin();
3330 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3337 if ((*IPriv)->getType()->isVariablyModifiedType()) {
3355 Privates, LHSExprs, RHSExprs, ReductionOps);
3365 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
3370 CGF.
Builder.getInt32(RHSExprs.size()),
3371 ReductionArrayTySize,
3377 if (ParallelReduction)
3381 else if (SimdReduction)
3386 if (TeamsReduction) {
3390 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
3393 CGF.
Builder.getInt32(RHSExprs.size()),
3394 ReductionArrayTySize,
3406 llvm::BasicBlock *DefaultBB = CGF.
createBasicBlock(
".omp.reduction.default");
3407 llvm::SwitchInst *SwInst =
3408 CGF.
Builder.CreateSwitch(Res, DefaultBB, 1);
3415 SwInst->addCase(CGF.
Builder.getInt32(1), Case1BB);
3420 auto &&CodeGen = [
Privates, LHSExprs, RHSExprs, ReductionOps,
3422 auto IPriv = Privates.begin();
3423 auto ILHS = LHSExprs.begin();
3424 auto IRHS = RHSExprs.begin();
3425 for (
const Expr *E : ReductionOps) {
3427 cast<DeclRefExpr>(*IRHS));
3434 NVPTXActionTy Action(
3440 CGF.EmitBranch(DefaultBB);
3441 CGF.EmitBlock(DefaultBB,
true);
3446 const VarDecl *NativeParam)
const {
3451 const Type *NonQualTy = QC.
strip(ArgType);
3452 QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
3453 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
3454 if (
Attr->getCaptureKind() == OMPC_map) {
3461 enum { NVPTX_local_addr = 5 };
3464 if (isa<ImplicitParamDecl>(NativeParam))
3479 const VarDecl *TargetParam)
const {
3480 assert(NativeParam != TargetParam &&
3482 "Native arg must not be the same as target arg.");
3486 const Type *NonQualTy = QC.
strip(NativeParamType);
3487 QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
3488 unsigned NativePointeeAddrSpace =
3495 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
3499 TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
3500 NativePointeeAddrSpace));
3504 return NativeParamAddr;
3511 TargetArgs.reserve(Args.size());
3513 cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType());
3514 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
3515 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
3516 TargetArgs.append(std::next(Args.begin(), I), Args.end());
3519 llvm::Type *TargetType = FnType->getParamType(I);
3521 if (!TargetType->isPointerTy()) {
3522 TargetArgs.emplace_back(NativeArg);
3527 NativeArg->getType()->getPointerElementType()->getPointerTo());
3528 TargetArgs.emplace_back(
3538 llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
3555 WrapperArgs.emplace_back(&ParallelLevelArg);
3556 WrapperArgs.emplace_back(&WrapperArg);
3563 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &CGM.
getModule());
3566 Fn->setDoesNotRecurse();
3572 const auto *RD = CS.getCapturedRecordDecl();
3573 auto CurField = RD->field_begin();
3586 auto CI = CS.capture_begin();
3601 if (CS.capture_size() > 0 ||
3614 Src, CGF.
SizeTy->getPointerTo());
3619 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
3620 Args.emplace_back(LB);
3625 Src, CGF.
SizeTy->getPointerTo());
3630 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
3631 Args.emplace_back(UB);
3634 if (CS.capture_size() > 0) {
3636 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
3637 QualType ElemTy = CurField->getType();
3646 if (CI->capturesVariableByCopy() &&
3647 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
3651 Args.emplace_back(Arg);
3665 assert(D &&
"Expected function or captured|block decl.");
3666 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
3667 "Function is registered already.");
3668 const Stmt *Body =
nullptr;
3669 bool NeedToDelayGlobalization =
false;
3670 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
3671 Body = FD->getBody();
3672 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
3673 Body = BD->getBody();
3674 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
3675 Body = CD->getBody();
3680 CheckVarsEscapingDeclContext VarChecker(CGF);
3681 VarChecker.Visit(Body);
3682 const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
3684 VarChecker.getEscapedVariableLengthDecls();
3685 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
3687 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
3688 I->getSecond().MappedParams =
3689 llvm::make_unique<CodeGenFunction::OMPMapVars>();
3690 I->getSecond().GlobalRecord = GlobalizedVarsRecord;
3691 I->getSecond().EscapedParameters.insert(
3692 VarChecker.getEscapedParameters().begin(),
3693 VarChecker.getEscapedParameters().end());
3694 I->getSecond().EscapedVariableLengthDecls.append(
3695 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
3696 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
3697 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
3698 assert(VD->isCanonicalDecl() &&
"Expected canonical declaration");
3699 const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
3702 if (!NeedToDelayGlobalization) {
3705 GlobalizationScope() =
default;
3709 .emitGenericVarsEpilog(CGF);
3722 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
3723 if (I == FunctionGlobalizedDecls.end())
3725 auto VDI = I->getSecond().LocalVarData.find(VD);
3726 if (VDI != I->getSecond().LocalVarData.end())
3727 return VDI->second.second;
3732 auto VDI = I->getSecond().LocalVarData.find(
3733 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
3735 if (VDI != I->getSecond().LocalVarData.end())
3736 return VDI->second.second;
3743 FunctionGlobalizedDecls.erase(CGF.
CurFn);
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
llvm::PointerType * Int8PtrPtrTy
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
const BlockDecl * getBlockDecl() const
static const Decl * getCanonicalDecl(const Decl *D)
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T)
Given a value of type T* that may not be to a complete object, construct an l-value with the natural ...
static llvm::Value * emitCopyToScratchpad(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that stores reduced data from the team master to a scratchpad array in g...
Other implicit parameter.
A class which contains all the information about a particular captured value.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
CodeGenTypes & getTypes()
ArrayRef< OMPClause * > clauses()
llvm::Type * ConvertTypeForMem(QualType T)
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, const OMPExecutableDirective &D)
Check if the parallel directive has an 'if' clause with non-constant or false condition.
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::LLVMContext & getLLVMContext()
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = T* ...
attr_iterator attr_begin() const
Stmt - This represents one statement.
static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, llvm::Value *NumThreads)
Get barrier #ID to synchronize selected (multiple of warp size) threads in a CTA. ...
llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
Decl - This represents one declaration (or definition), e.g.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
CharUnits getPointerSize() const
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
static bool stable_sort_comparator(const PrivateDataTy P1, const PrivateDataTy P2)
This represents 'if' clause in the '#pragma omp ...' directive.
llvm::Value * ScratchpadIndex
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
The base class of the type hierarchy.
bool isZero() const
isZero - Test whether the quantity equals zero.
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Describes the capture of a variable or of this, or of a C++1y init-capture.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
Represents a variable declaration or definition.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
LangAS getLangASFromTargetAS(unsigned TargetAS)
This represents 'num_threads' clause in the '#pragma omp ...' directive.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
OpenMPDirectiveKind ReductionKind
llvm::Value * getPointer() const
unsigned getAddressSpace() const
Return the address space that this address resides in.
SPMD execution mode (all threads are worker threads).
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Represents a struct/union/class.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
Address getAddress() const
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
attr_iterator attr_end() const
llvm::IntegerType * Int64Ty
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
Represents a member of a struct/union/class.
llvm::IntegerType * SizeTy
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
static llvm::Value * getMasterThreadID(CodeGenFunction &CGF)
Get the thread id of the OMP master thread.
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
void startDefinition()
Starts the definition of this tag declaration.
bool isReferenceType() const
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
OpenMPDirectiveKind getDirectiveKind() const
void InitTempAlloca(Address Alloca, llvm::Value *Value)
InitTempAlloca - Provide an initial value for the given alloca which will be observable at all locati...
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
CharUnits - This is an opaque type for sizes expressed in character units.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::PointerType * VoidPtrTy
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
SourceLocation getLocStart() const LLVM_READONLY
Returns starting location of directive kind.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
Scope - A scope is a transient data structure that is used while parsing the program.
llvm::PointerType * VoidPtrPtrTy
static CGOpenMPRuntimeNVPTX::DataSharingMode getDataSharingMode(CodeGenModule &CGM)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
llvm::Value * emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
virtual llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
CharUnits getPointerAlign() const
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
static const Stmt * getSingleCompoundChild(const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one...
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
static void syncCTAThreads(CodeGenFunction &CGF)
Synchronize all GPU threads in a block.
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
ASTContext & getContext() const
Describes the capture of either a variable, or 'this', or variable-length array type.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate', 'reduction' etc.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Expr - This represents one expression.
virtual llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
const Expr * getCallee() const
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
llvm::PointerType * getType() const
Return the type of the pointer value.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
DeclContext * getDeclContext()
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
llvm::IntegerType * Int32Ty
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
QualType getRecordType(const RecordDecl *Decl) const
UnaryOperator - This represents the unary-expression's (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
MachineConfiguration
GPU Configuration: This information can be derived from cuda registers, however, providing compile ti...
const LangOptions & getLangOpts() const
ASTContext & getContext() const
OpenMPProcBindClauseKind
OpenMP attributes for 'proc_bind' clause.
static llvm::Value * emitReduceScratchpadFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc)
This function emits a helper that loads data from the scratchpad array and (optionally) reduces it wi...
Non-SPMD execution mode (1 master thread, others are workers).
llvm::Value * ScratchpadWidth
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
GlobalDecl - represents a global declaration.
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
SourceLocation getLocStart() const LLVM_READONLY
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
This captures a statement into a function.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Encodes a location in the source.
static llvm::Value * getThreadLimit(CodeGenFunction &CGF, bool IsInSPMDExecutionMode=false)
Get the value of the thread_limit clause in the teams directive.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
Expr * getSubExpr() const
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
This is a basic class for representing single OpenMP executable directive.
CastKind getCastKind() const
llvm::IntegerType * Int16Ty
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
OpenMPDirectiveKind
OpenMP directives.
This file defines OpenMP nodes for declarative directives.
This is a basic class for representing single OpenMP clause.
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode)
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Dataflow Directional Tag Classes.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
static void getNVPTXCTABarrier(CodeGenFunction &CGF)
Get barrier to synchronize all threads in a block.
A qualifier set is used to build a set of qualifiers.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
ArrayRef< Capture > captures() const
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
SourceLocation getLocStart() const LLVM_READONLY
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = T* ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
llvm::Module & getModule() const
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads)
Synchronize worker threads in a parallel region.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
llvm::Constant * createNVPTXRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function for the current OpenMP implementation.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
This file defines OpenMP AST classes for executable directives and clauses.
Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = [n x T]* ...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
llvm::Type * getElementType() const
Return the type of the values stored in this address.
llvm::PointerType * Int8PtrTy
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
ExecutionMode
Defines the execution mode.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
bool isLValueReferenceType() const
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
static llvm::Value * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
Generic data-sharing mode.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g., it is an signed integer type or a vector.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Privates[]
Gets the list of initial values for linear variables.
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
capture_range captures() const
Retrieve this lambda's captures.
CapturedRegionKind getKind() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
static llvm::Value * getNVPTXThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
CGCapturedStmtInfo * CapturedStmtInfo
const VariableArrayType * getAsVariableArrayType(QualType T) const
static llvm::Value * getNVPTXWarpSize(CodeGenFunction &CGF)
Get the GPU warp size.
llvm::Value * RemoteLaneOffset
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
A reference to a declared variable, function, enum, etc.
CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
void addAddressSpace(LangAS space)
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
LValue - This represents an lvalue references.
Information for lazily generating a cleanup.
unsigned getTargetAddressSpace(QualType T) const
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
static llvm::Value * getNVPTXNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
llvm::Value * getPointer() const
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
Attr - This represents one attribute.
SourceLocation getLocation() const
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.