104#include "llvm/IR/IntrinsicsAMDGPU.h"
114#define DEBUG_TYPE "amdgpu-sw-lower-lds"
115#define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15
123 AsanInstrumentLDS(
"amdgpu-asan-instrument-lds",
124 cl::desc(
"Run asan instrumentation on LDS instructions "
125 "lowered to global memory"),
130struct LDSAccessTypeInfo {
138struct KernelLDSParameters {
142 LDSAccessTypeInfo DirectAccess;
143 LDSAccessTypeInfo IndirectAccess;
145 LDSToReplacementIndicesMap;
153struct NonKernelLDSParameters {
160struct AsanInstrumentInfo {
166struct FunctionsAndLDSAccess {
174class AMDGPUSwLowerLDS {
177 DomTreeCallback Callback)
178 : M(
Mod), AMDGPUTM(TM), IRB(M.getContext()), DTCallback(Callback) {}
180 void getUsesOfLDSByNonKernels();
181 void getNonKernelsWithLDSArguments(
const CallGraph &CG);
186 void buildSwLDSGlobal(
Function *Func);
187 void buildSwDynLDSGlobal(
Function *Func);
188 void populateSwMetadataGlobal(
Function *Func);
189 void populateSwLDSAttributeAndMetadata(
Function *Func);
190 void populateLDSToReplacementIndicesMap(
Function *Func);
191 void getLDSMemoryInstructions(
Function *Func,
193 void replaceKernelLDSAccesses(
Function *Func);
194 Value *getTranslatedGlobalMemoryPtrOfLDS(
Value *LoadMallocPtr,
Value *LDSPtr);
195 void translateLDSMemoryOperationsToGlobalMemory(
200 void buildNonKernelLDSOffsetTable(NonKernelLDSParameters &NKLDSParams);
201 void buildNonKernelLDSBaseTable(NonKernelLDSParameters &NKLDSParams);
203 getAddressesOfVariablesInKernel(
Function *Func,
205 void lowerNonKernelLDSAccesses(
Function *Func,
207 NonKernelLDSParameters &NKLDSParams);
209 updateMallocSizeForDynamicLDS(
Function *Func,
Value **CurrMallocSize,
210 Value *HiddenDynLDSSize,
218 DomTreeCallback DTCallback;
219 FunctionsAndLDSAccess FuncLDSAccessInfo;
220 AsanInstrumentInfo AsanInfo;
223template <
typename T>
SetVector<T> sortByName(std::vector<T> &&V) {
226 sort(V, [](
const auto *L,
const auto *R) {
227 return L->getName() < R->getName();
236 std::vector<GlobalVariable *>(Variables.
begin(), Variables.
end()));
244 if (Kernels.size() > UINT32_MAX) {
248 sortByName(std::vector<Function *>(Kernels.begin(), Kernels.end()));
249 for (
size_t i = 0; i < Kernels.size(); i++) {
254 Func->setMetadata(
"llvm.amdgcn.lds.kernel.id",
257 return OrderedKernels;
260void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(
const CallGraph &CG) {
264 for (
auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) {
269 for (
auto &
I : *CGN) {
278 Type *ArgTy = (*AI).getType();
283 FuncLDSAccessInfo.NonKernelsWithLDSArgument.
insert(CalledFunc);
286 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.
insert(Func);
292void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
293 for (
GlobalVariable *GV : FuncLDSAccessInfo.AllNonKernelLDSAccess) {
301 FuncLDSAccessInfo.NonKernelToLDSAccessMap[
F].insert(GV);
315 ConstantInt::get(IntTy, Address + 1));
316 GV->
setMetadata(LLVMContext::MD_absolute_symbol, MetadataNode);
327 Func->addFnAttr(
"amdgpu-lds-size", Buffer);
333 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
336 Intrinsic::donothing, {});
338 Value *UseInstance[1] = {
339 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
341 Builder.CreateCall(Decl, {},
345void AMDGPUSwLowerLDS::buildSwLDSGlobal(
Function *Func) {
348 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
356 LDSParams.SwLDS->setSanitizerMetadata(MD);
359void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(
Function *Func) {
361 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
362 if (LDSParams.DirectAccess.DynamicLDSGlobals.empty() &&
363 LDSParams.IndirectAccess.DynamicLDSGlobals.empty())
369 "llvm.amdgcn." + Func->getName() +
".dynlds",
nullptr,
371 markUsedByKernel(Func, LDSParams.SwDynLDS);
374 LDSParams.SwDynLDS->setSanitizerMetadata(MD);
377void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(
Function *Func) {
378 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
379 bool IsDynLDSUsed = LDSParams.SwDynLDS;
381 recordLDSAbsoluteAddress(M, LDSParams.SwLDS, 0);
382 addLDSSizeAttribute(Func,
Offset, IsDynLDSUsed);
383 if (LDSParams.SwDynLDS)
384 recordLDSAbsoluteAddress(M, LDSParams.SwDynLDS,
Offset);
387void AMDGPUSwLowerLDS::populateSwMetadataGlobal(
Function *Func) {
390 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
391 auto &Ctx = M.getContext();
392 auto &
DL = M.getDataLayout();
393 std::vector<Type *> Items;
395 std::vector<Constant *> Initializers;
396 Align MaxAlignment(1);
399 MaxAlignment = std::max(MaxAlignment, GVAlign);
402 for (
GlobalVariable *GV : LDSParams.DirectAccess.StaticLDSGlobals)
403 UpdateMaxAlignment(GV);
405 for (
GlobalVariable *GV : LDSParams.DirectAccess.DynamicLDSGlobals)
406 UpdateMaxAlignment(GV);
408 for (
GlobalVariable *GV : LDSParams.IndirectAccess.StaticLDSGlobals)
409 UpdateMaxAlignment(GV);
411 for (
GlobalVariable *GV : LDSParams.IndirectAccess.DynamicLDSGlobals)
412 UpdateMaxAlignment(GV);
417 MDItemOS <<
"llvm.amdgcn.sw.lds." << Func->getName() <<
".md.item";
421 uint32_t &MallocSize = LDSParams.MallocSize;
423 int AsanScale = AsanInfo.Scale;
424 auto buildInitializerForSwLDSMD =
426 for (
auto &GV : LDSGlobals) {
429 UniqueLDSGlobals.
insert(GV);
432 const uint64_t SizeInBytes =
DL.getTypeAllocSize(Ty);
433 Items.push_back(LDSItemTy);
440 MallocSize += SizeInBytes;
442 LDSParams.RedzoneOffsetAndSizeVector.emplace_back(MallocSize,
444 MallocSize += RightRedzoneSize;
447 alignTo(SizeInBytes + RightRedzoneSize, MaxAlignment);
449 ConstantInt::get(
Int32Ty, AlignedSize);
451 MallocSize =
alignTo(MallocSize, MaxAlignment);
454 AlignedSizeInBytesConst});
455 Initializers.push_back(InitItem);
459 SwLDSVector.
insert(LDSParams.SwLDS);
460 buildInitializerForSwLDSMD(SwLDSVector);
461 buildInitializerForSwLDSMD(LDSParams.DirectAccess.StaticLDSGlobals);
462 buildInitializerForSwLDSMD(LDSParams.IndirectAccess.StaticLDSGlobals);
463 buildInitializerForSwLDSMD(LDSParams.DirectAccess.DynamicLDSGlobals);
464 buildInitializerForSwLDSMD(LDSParams.IndirectAccess.DynamicLDSGlobals);
467 Type *Ty = LDSParams.SwLDS->getValueType();
468 const uint64_t SizeInBytes =
DL.getTypeAllocSize(Ty);
470 LDSParams.LDSSize = AlignedSize;
473 MDTypeOS <<
"llvm.amdgcn.sw.lds." << Func->getName() <<
".md.type";
478 MDOS <<
"llvm.amdgcn.sw.lds." << Func->getName() <<
".md";
484 LDSParams.SwLDSMetadata->setInitializer(
data);
487 LDSParams.SwLDS->setAlignment(MaxAlignment);
488 if (LDSParams.SwDynLDS)
489 LDSParams.SwDynLDS->setAlignment(MaxAlignment);
492 LDSParams.SwLDSMetadata->setSanitizerMetadata(MD);
495void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(
Function *Func) {
498 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
502 for (
auto &GV : LDSGlobals) {
505 UniqueLDSGlobals.
insert(GV);
506 LDSParams.LDSToReplacementIndicesMap[GV] = {0, Idx, 0};
512 SwLDSVector.
insert(LDSParams.SwLDS);
513 PopulateIndices(SwLDSVector, Idx);
514 PopulateIndices(LDSParams.DirectAccess.StaticLDSGlobals, Idx);
515 PopulateIndices(LDSParams.IndirectAccess.StaticLDSGlobals, Idx);
516 PopulateIndices(LDSParams.DirectAccess.DynamicLDSGlobals, Idx);
517 PopulateIndices(LDSParams.IndirectAccess.DynamicLDSGlobals, Idx);
521 Value *Replacement) {
523 auto ReplaceUsesLambda = [Func](
const Use &U) ->
bool {
524 auto *V = U.getUser();
526 auto *Func1 = Inst->getFunction();
535void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(
Function *Func) {
536 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
544 auto &IndirectAccess = LDSParams.IndirectAccess;
545 auto &DirectAccess = LDSParams.DirectAccess;
549 for (
auto &GV : LDSGlobals) {
552 if ((IndirectAccess.StaticLDSGlobals.contains(GV) ||
553 IndirectAccess.DynamicLDSGlobals.contains(GV)) &&
554 (!DirectAccess.StaticLDSGlobals.contains(GV) &&
555 !DirectAccess.DynamicLDSGlobals.contains(GV)))
559 UniqueLDSGlobals.
insert(GV);
560 auto &Indices = LDSParams.LDSToReplacementIndicesMap[GV];
561 assert(Indices.size() == 3);
563 ConstantInt::get(
Int32Ty, Indices[1]),
564 ConstantInt::get(
Int32Ty, Indices[2])};
566 SwLDSMetadataStructType, SwLDSMetadata, GEPIdx,
true);
568 Value *BasePlusOffset =
572 replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset);
575 ReplaceLDSGlobalUses(DirectAccess.StaticLDSGlobals);
576 ReplaceLDSGlobalUses(IndirectAccess.StaticLDSGlobals);
577 ReplaceLDSGlobalUses(DirectAccess.DynamicLDSGlobals);
578 ReplaceLDSGlobalUses(IndirectAccess.DynamicLDSGlobals);
581void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS(
584 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
589 assert(SwLDS && SwLDSMetadata);
594 Value *MaxAlignValueMinusOne = IRB.
getInt32(MaxAlignment - 1);
597 auto &Indices = LDSParams.LDSToReplacementIndicesMap[DynGV];
604 MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset});
610 {Index0, Index1, Index2Size});
616 MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize});
618 Value *AlignedDynLDSSize =
619 IRB.
CreateAdd(CurrDynLDSSize, MaxAlignValueMinusOne);
620 AlignedDynLDSSize = IRB.
CreateUDiv(AlignedDynLDSSize, MaxAlignValue);
621 AlignedDynLDSSize = IRB.
CreateMul(AlignedDynLDSSize, MaxAlignValue);
622 IRB.
CreateStore(AlignedDynLDSSize, GEPForAlignedSize);
625 *CurrMallocSize = IRB.
CreateAdd(*CurrMallocSize, AlignedDynLDSSize);
639void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
645 LDSInstructions.
insert(&Inst);
648 LDSInstructions.
insert(&Inst);
651 LDSInstructions.
insert(&Inst);
654 LDSInstructions.
insert(&Inst);
658 LDSInstructions.
insert(&Inst);
661 LDSInstructions.
insert(&Inst);
664 LDSInstructions.
insert(&Inst);
672Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(
Value *LoadMallocPtr,
674 assert(LDSPtr &&
"Invalid LDS pointer operand");
688void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
691 LLVM_DEBUG(
dbgs() <<
"Translating LDS memory operations to global memory : "
696 Value *LIOperand = LI->getPointerOperand();
698 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand);
700 LI->getAlign(), LI->isVolatile());
701 NewLI->
setAtomic(LI->getOrdering(), LI->getSyncScopeID());
702 AsanInfo.Instructions.
insert(NewLI);
703 LI->replaceAllUsesWith(NewLI);
704 LI->eraseFromParent();
706 Value *SIOperand =
SI->getPointerOperand();
708 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand);
710 SI->getValueOperand(), Replacement,
SI->getAlign(),
SI->isVolatile());
712 AsanInfo.Instructions.
insert(NewSI);
713 SI->replaceAllUsesWith(NewSI);
714 SI->eraseFromParent();
716 Value *RMWPtrOperand = RMW->getPointerOperand();
717 Value *RMWValOperand = RMW->getValOperand();
719 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand);
721 RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(),
722 RMW->getOrdering(), RMW->getSyncScopeID());
724 AsanInfo.Instructions.
insert(NewRMW);
725 RMW->replaceAllUsesWith(NewRMW);
726 RMW->eraseFromParent();
728 Value *XCHGPtrOperand = XCHG->getPointerOperand();
730 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand);
732 Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(),
733 XCHG->getAlign(), XCHG->getSuccessOrdering(),
734 XCHG->getFailureOrdering(), XCHG->getSyncScopeID());
736 AsanInfo.Instructions.
insert(NewXCHG);
737 XCHG->replaceAllUsesWith(NewXCHG);
738 XCHG->eraseFromParent();
740 Value *NewDest =
MI->getRawDest();
742 NewDest = getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, NewDest);
745 if (
MI->isAtomic()) {
747 NewDest, MSI->getValue(), MSI->getLength(),
748 MSI->getDestAlign().valueOrOne(), MSI->getElementSizeInBytes());
750 NewMI = IRB.
CreateMemSet(NewDest, MSI->getValue(), MSI->getLength(),
755 Value *NewSrc = MTI->getRawSource();
757 NewSrc = getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, NewSrc);
758 if (
MI->isAtomic()) {
759 if (
MI->getIntrinsicID() ==
760 Intrinsic::memmove_element_unordered_atomic) {
762 NewDest, MTI->getDestAlign().valueOrOne(), NewSrc,
763 MTI->getSourceAlign().valueOrOne(), MTI->getLength(),
764 MTI->getElementSizeInBytes());
767 NewDest, MTI->getDestAlign().valueOrOne(), NewSrc,
768 MTI->getSourceAlign().valueOrOne(), MTI->getLength(),
769 MTI->getElementSizeInBytes());
773 MI->getIntrinsicID(), NewDest, MTI->getDestAlign(), NewSrc,
774 MTI->getSourceAlign(), MTI->getLength(),
779 AsanInfo.Instructions.
insert(NewMI);
780 MI->replaceAllUsesWith(NewMI);
781 MI->eraseFromParent();
783 Value *AIOperand = ASC->getPointerOperand();
785 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand);
791 ASC->eraseFromParent();
797void AMDGPUSwLowerLDS::poisonRedzones(
Function *Func,
Value *MallocPtr) {
798 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
802 "__asan_poison_region",
805 auto RedzonesVec = LDSParams.RedzoneOffsetAndSizeVector;
806 size_t VecSize = RedzonesVec.size();
807 for (
unsigned i = 0; i < VecSize; i++) {
808 auto &RedzonePair = RedzonesVec[i];
809 uint64_t RedzoneOffset = RedzonePair.first;
810 uint64_t RedzoneSize = RedzonePair.second;
812 IRB.
getInt8Ty(), MallocPtr, {IRB.getInt64(RedzoneOffset)});
815 {RedzoneAddress, IRB.
getInt64(RedzoneSize)});
819void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(
Function *Func,
821 LLVM_DEBUG(
dbgs() <<
"Sw Lowering Kernel LDS for : " << Func->getName());
822 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
823 auto &Ctx = M.getContext();
824 auto *PrevEntryBlock = &Func->getEntryBlock();
826 getLDSMemoryInstructions(Func, LDSInstructions);
840 auto SplitIt = PrevEntryBlock->getFirstNonPHIOrDbgOrAlloca();
841 WIdBlock->splice(WIdBlock->end(), PrevEntryBlock, PrevEntryBlock->begin(),
846 AI->moveBefore(*WIdBlock, WIdBlock->end());
861 IRB.
CreateCondBr(WIdzCond, MallocBlock, PrevEntryBlock);
871 assert(SwLDS && SwLDSMetadata);
875 Value *CurrMallocSize;
881 for (
auto &GV : LDSGlobals) {
884 UniqueLDSGlobals.
insert(GV);
888 GetUniqueLDSGlobals(LDSParams.DirectAccess.StaticLDSGlobals);
889 GetUniqueLDSGlobals(LDSParams.IndirectAccess.StaticLDSGlobals);
890 unsigned NumStaticLDS = 1 + UniqueLDSGlobals.
size();
891 UniqueLDSGlobals.
clear();
894 auto *GEPForEndStaticLDSOffset =
897 ConstantInt::get(
Int32Ty, NumStaticLDS - 1),
898 ConstantInt::get(
Int32Ty, 0)});
900 auto *GEPForEndStaticLDSSize =
903 ConstantInt::get(
Int32Ty, NumStaticLDS - 1),
904 ConstantInt::get(
Int32Ty, 2)});
906 Value *EndStaticLDSOffset =
909 CurrMallocSize = IRB.
CreateAdd(EndStaticLDSOffset, EndStaticLDSSize);
911 CurrMallocSize = IRB.
getInt32(MallocSize);
913 if (LDSParams.SwDynLDS) {
916 "Dynamic LDS size query is only supported for CO V5 and later.");
922 {ConstantInt::get(Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)});
923 UniqueLDSGlobals.
clear();
924 GetUniqueLDSGlobals(LDSParams.DirectAccess.DynamicLDSGlobals);
925 GetUniqueLDSGlobals(LDSParams.IndirectAccess.DynamicLDSGlobals);
926 updateMallocSizeForDynamicLDS(Func, &CurrMallocSize, HiddenDynLDSSize,
930 CurrMallocSize = IRB.
CreateZExt(CurrMallocSize, Int64Ty);
935 Intrinsic::returnaddress, IRB.
getPtrTy(
DL.getProgramAddressSpace()),
941 Value *MallocCall = IRB.
CreateCall(MallocFunc, {CurrMallocSize, RAPtrToInt});
950 poisonRedzones(Func, MallocPtr);
958 auto *XYZCondPhi = IRB.
CreatePHI(Int1Ty, 2,
"xyzCond");
960 XYZCondPhi->addIncoming(IRB.
getInt1(1), MallocBlock);
965 Value *LoadMallocPtr =
969 replaceKernelLDSAccesses(Func);
973 translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr,
982 RI->eraseFromParent();
1002 Intrinsic::returnaddress, IRB.
getPtrTy(
DL.getProgramAddressSpace()),
1006 IRB.
CreateCall(AsanFreeFunc, {MallocPtrToInt, RAPToInt});
1020Constant *AMDGPUSwLowerLDS::getAddressesOfVariablesInKernel(
1023 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
1027 auto *SwLDSMetadataStructType =
1033 for (
auto *GV : Variables) {
1034 auto It = LDSParams.LDSToReplacementIndicesMap.find(GV);
1035 if (It == LDSParams.LDSToReplacementIndicesMap.end()) {
1040 auto &Indices = It->second;
1042 ConstantInt::get(
Int32Ty, Indices[1]),
1043 ConstantInt::get(
Int32Ty, Indices[2])};
1045 SwLDSMetadata, GEPIdx,
true);
1046 Elements.push_back(
GEP);
1051void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable(
1052 NonKernelLDSParameters &NKLDSParams) {
1056 auto &Kernels = NKLDSParams.OrderedKernels;
1057 if (Kernels.empty())
1059 const size_t NumberKernels = Kernels.size();
1062 std::vector<Constant *> OverallConstantExprElts(NumberKernels);
1063 for (
size_t i = 0; i < NumberKernels; i++) {
1065 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
1066 OverallConstantExprElts[i] = LDSParams.SwLDS;
1079void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable(
1080 NonKernelLDSParameters &NKLDSParams) {
1088 auto &Variables = NKLDSParams.OrdereLDSGlobals;
1089 auto &Kernels = NKLDSParams.OrderedKernels;
1090 if (Variables.
empty() || Kernels.empty())
1092 const size_t NumberVariables = Variables.
size();
1093 const size_t NumberKernels = Kernels.size();
1100 std::vector<Constant *> overallConstantExprElts(NumberKernels);
1101 for (
size_t i = 0; i < NumberKernels; i++) {
1103 overallConstantExprElts[i] =
1104 getAddressesOfVariablesInKernel(Func, Variables);
1117void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
1119 NonKernelLDSParameters &NKLDSParams) {
1122 LLVM_DEBUG(
dbgs() <<
"Sw LDS lowering, lower non-kernel access for : "
1123 << Func->getName());
1124 auto InsertAt = Func->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
1129 getLDSMemoryInstructions(Func, LDSInstructions);
1131 auto *KernelId = IRB.
CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
1134 auto &OrdereLDSGlobals = NKLDSParams.OrdereLDSGlobals;
1136 LDSBaseTable->
getValueType(), LDSBaseTable, {IRB.getInt32(0), KernelId});
1139 Value *LoadMallocPtr =
1143 const auto *GVIt =
llvm::find(OrdereLDSGlobals, GV);
1144 assert(GVIt != OrdereLDSGlobals.end());
1145 uint32_t GVOffset = std::distance(OrdereLDSGlobals.begin(), GVIt);
1149 {IRB.getInt32(0), KernelId, IRB.getInt32(GVOffset)});
1153 Value *BasePlusOffset =
1155 LLVM_DEBUG(
dbgs() <<
"Sw LDS Lowering, Replace non-kernel LDS for "
1157 replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset);
1159 translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr,
1163static void reorderStaticDynamicIndirectLDSSet(KernelLDSParameters &LDSParams) {
1166 auto &DirectAccess = LDSParams.DirectAccess;
1167 auto &IndirectAccess = LDSParams.IndirectAccess;
1168 LDSParams.DirectAccess.StaticLDSGlobals = sortByName(
1169 std::vector<GlobalVariable *>(DirectAccess.StaticLDSGlobals.begin(),
1170 DirectAccess.StaticLDSGlobals.end()));
1171 LDSParams.DirectAccess.DynamicLDSGlobals = sortByName(
1172 std::vector<GlobalVariable *>(DirectAccess.DynamicLDSGlobals.begin(),
1173 DirectAccess.DynamicLDSGlobals.end()));
1174 LDSParams.IndirectAccess.StaticLDSGlobals = sortByName(
1175 std::vector<GlobalVariable *>(IndirectAccess.StaticLDSGlobals.begin(),
1176 IndirectAccess.StaticLDSGlobals.end()));
1177 LDSParams.IndirectAccess.DynamicLDSGlobals = sortByName(
1178 std::vector<GlobalVariable *>(IndirectAccess.DynamicLDSGlobals.begin(),
1179 IndirectAccess.DynamicLDSGlobals.end()));
1182void AMDGPUSwLowerLDS::initAsanInfo() {
1188 bool OrShadowOffset;
1190 &
Offset, &Scale, &OrShadowOffset);
1191 AsanInfo.Scale = Scale;
1192 AsanInfo.Offset =
Offset;
1196 for (
auto &K : LDSAccesses) {
1200 if (
F->hasFnAttribute(Attribute::SanitizeAddress))
1206bool AMDGPUSwLowerLDS::run() {
1218 bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.
direct_access) ||
1226 bool DirectAccess) {
1227 for (
auto &K : LDSAccesses) {
1229 if (!
F || K.second.empty())
1235 FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
1236 {
F, KernelLDSParameters()});
1238 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[
F];
1240 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.
insert(
F);
1242 if (!DirectAccess) {
1244 LDSParams.IndirectAccess.DynamicLDSGlobals.insert(GV);
1246 LDSParams.IndirectAccess.StaticLDSGlobals.insert(GV);
1247 FuncLDSAccessInfo.AllNonKernelLDSAccess.insert(GV);
1250 LDSParams.DirectAccess.DynamicLDSGlobals.insert(GV);
1252 LDSParams.DirectAccess.StaticLDSGlobals.insert(GV);
1258 PopulateKernelStaticDynamicLDS(LDSUsesInfo.
direct_access,
true);
1264 for (
auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) {
1266 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];
1267 if (LDSParams.DirectAccess.StaticLDSGlobals.empty() &&
1268 LDSParams.DirectAccess.DynamicLDSGlobals.empty() &&
1269 LDSParams.IndirectAccess.StaticLDSGlobals.empty() &&
1270 LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) {
1275 {
"amdgpu-no-workitem-id-x",
"amdgpu-no-workitem-id-y",
1276 "amdgpu-no-workitem-id-z",
"amdgpu-no-heap-ptr"});
1277 if (!LDSParams.IndirectAccess.StaticLDSGlobals.empty() ||
1278 !LDSParams.IndirectAccess.DynamicLDSGlobals.empty())
1280 reorderStaticDynamicIndirectLDSSet(LDSParams);
1281 buildSwLDSGlobal(Func);
1282 buildSwDynLDSGlobal(Func);
1283 populateSwMetadataGlobal(Func);
1284 populateSwLDSAttributeAndMetadata(Func);
1285 populateLDSToReplacementIndicesMap(Func);
1287 DomTreeUpdater::UpdateStrategy::Lazy);
1288 lowerKernelLDSAccesses(Func, DTU);
1294 getUsesOfLDSByNonKernels();
1297 getNonKernelsWithLDSArguments(CG);
1300 if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
1301 !FuncLDSAccessInfo.NonKernelsWithLDSArgument.
empty()) {
1302 NonKernelLDSParameters NKLDSParams;
1303 NKLDSParams.OrderedKernels = getOrderedIndirectLDSAccessingKernels(
1304 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess);
1305 NKLDSParams.OrdereLDSGlobals = getOrderedNonKernelAllLDSGlobals(
1306 FuncLDSAccessInfo.AllNonKernelLDSAccess);
1307 buildNonKernelLDSBaseTable(NKLDSParams);
1308 buildNonKernelLDSOffsetTable(NKLDSParams);
1309 for (
auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {
1313 std::vector<GlobalVariable *>(LDSGlobals.
begin(), LDSGlobals.
end()));
1314 lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);
1316 for (
Function *Func : FuncLDSAccessInfo.NonKernelsWithLDSArgument) {
1317 auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;
1318 if (K.contains(Func))
1321 lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);
1338 if (AsanInstrumentLDS) {
1345 for (
auto &Operand : OperandsToInstrument) {
1346 Value *Addr = Operand.getPtr();
1348 Operand.Alignment.valueOrOne(), Operand.TypeStoreSize,
1349 Operand.IsWrite,
nullptr,
false,
false, AsanInfo.Scale,
1358class AMDGPUSwLowerLDSLegacy :
public ModulePass {
1364 bool runOnModule(
Module &M)
override;
1371char AMDGPUSwLowerLDSLegacy::ID = 0;
1375 "AMDGPU Software lowering of LDS",
false,
false)
1380bool AMDGPUSwLowerLDSLegacy::runOnModule(
Module &M) {
1383 if (!M.getModuleFlag(
"nosanitize_address"))
1386 getAnalysisIfAvailable<DominatorTreeWrapperPass>();
1388 return DTW ? &DTW->getDomTree() : nullptr;
1391 auto &TPC = getAnalysis<TargetPassConfig>();
1394 AMDGPUSwLowerLDS SwLowerLDSImpl(M, *AMDGPUTM, DTCallback);
1395 bool IsChanged = SwLowerLDSImpl.run();
1401 return new AMDGPUSwLowerLDSLegacy(TM);
1408 if (!M.getModuleFlag(
"nosanitize_address"))
1414 AMDGPUSwLowerLDS SwLowerLDSImpl(M,
TM, DTCallback);
1415 bool IsChanged = SwLowerLDSImpl.run();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
Target-Independent Code Generator Pass Configuration Options pass.
static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, DISubprogram *SP)
This class represents a conversion between pointers from one address space to another.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents any memset intrinsic.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
A node in the call graph for a module.
Function * getFunction() const
Returns the function that this call graph node represents.
The basic data container for the call graph of a Module of IR.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Subprogram description. Uses SubclassData1.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
static constexpr UpdateKind Insert
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
LLVM_ABI void setSanitizerMetadata(SanitizerMetadata Meta)
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
uint64_t getAlignment() const
FIXME: Remove this function once transition to Align is over.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateElementUnorderedAtomicMemMove(Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert an element unordered-atomic memmove between the specified pointers.
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
CallInst * CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val, uint64_t Size, Align Alignment, uint32_t ElementSize, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert an element unordered-atomic memset of the region of memory starting at the given po...
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memset to the specified pointer and the specified value.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
LLVM_ABI CallInst * CreateElementUnorderedAtomicMemCpy(Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert an element unordered-atomic memcpy between the specified pointers.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Return a value (possibly void), from a function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Represent a constant reference to a string, i.e.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
const Triple & getTargetTriple() const
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
void getInterestingMemoryOperands(Module &M, Instruction *I, SmallVectorImpl< InterestingMemoryOperand > &Interesting)
Get all the memory operands from the instruction that needs to be instrumented.
bool isDynamicLDS(const GlobalVariable &GV)
unsigned getAMDHSACodeObjectVersion(const Module &M)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, Align Alignment, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, bool Recover, int AsanScale, int AsanOffset)
Instrument the memory operand Addr.
uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes)
Given SizeInBytes of the Value to be instrunmented, Returns the redzone size corresponding to it.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
ModulePass * createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
char & AMDGPUSwLowerLDSLegacyPassID
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, bool IsKasan, uint64_t *ShadowBase, int *MappingScale, bool *OrShadowOffset)
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
const AMDGPUTargetMachine & TM
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.