241#include "llvm/IR/IntrinsicsAMDGPU.h"
259#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
281 Type *remapType(
Type *SrcTy)
override;
282 void clear() { Map.clear(); }
288class BufferFatPtrToIntTypeMap :
public BufferFatPtrTypeLoweringBase {
289 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
299class BufferFatPtrToStructTypeMap :
public BufferFatPtrTypeLoweringBase {
300 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
309Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
Type *Ty) {
315 return *
Entry = remapScalar(PT);
321 return *
Entry = remapVector(VT);
329 bool IsUniqued = !TyAsStruct || TyAsStruct->
isLiteral();
338 Type *NewElem = remapTypeImpl(OldElem);
339 ElementTypes[
I] = NewElem;
340 Changed |= (OldElem != NewElem);
348 return *
Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
350 return *
Entry = FunctionType::get(ElementTypes[0],
360 SmallString<16>
Name(STy->getName());
368Type *BufferFatPtrTypeLoweringBase::remapType(
Type *SrcTy) {
369 return remapTypeImpl(SrcTy);
372Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) {
373 LLVMContext &Ctx = PT->getContext();
378Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) {
379 ElementCount
EC = VT->getElementCount();
380 LLVMContext &Ctx = VT->getContext();
399 if (!ST->isLiteral() || ST->getNumElements() != 2)
405 return MaybeRsrc && MaybeOff &&
414 return isBufferFatPtrOrVector(U.get()->getType());
427class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
428 :
public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor, bool> {
429 BufferFatPtrToIntTypeMap *TypeMap;
435 const TargetMachine *TM;
446 StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap,
447 const DataLayout &
DL,
449 const TargetMachine *TM)
450 : TypeMap(TypeMap), IRB(Ctx, InstSimplifyFolder(
DL)), TM(TM) {}
453 bool visitInstruction(Instruction &
I) {
return false; }
454 bool visitAllocaInst(AllocaInst &
I);
455 bool visitLoadInst(LoadInst &LI);
456 bool visitStoreInst(StoreInst &SI);
457 bool visitGetElementPtrInst(GetElementPtrInst &
I);
459 bool visitMemCpyInst(MemCpyInst &MCI);
460 bool visitMemMoveInst(MemMoveInst &MMI);
461 bool visitMemSetInst(MemSetInst &MSI);
462 bool visitMemSetPatternInst(MemSetPatternInst &MSPI);
466Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
471 if (
Find != ConvertedForStore.
end())
474 Value *Cast = IRB.CreatePtrToInt(V, To, Name +
".int");
475 ConvertedForStore[
V] = Cast;
483 Type *FromPart = AT->getArrayElementType();
485 for (uint64_t
I = 0,
E = AT->getArrayNumElements();
I <
E; ++
I) {
488 fatPtrsToInts(
Field, FromPart, ToPart, Name +
"." + Twine(
I));
489 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
492 for (
auto [Idx, FromPart, ToPart] :
494 Value *
Field = IRB.CreateExtractValue(V, Idx);
496 fatPtrsToInts(
Field, FromPart, ToPart, Name +
"." + Twine(Idx));
497 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
500 ConvertedForStore[
V] = Ret;
504Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
509 Value *Cast = IRB.CreateIntToPtr(V, To, Name +
".ptr");
519 for (uint64_t
I = 0,
E = AT->getArrayNumElements();
I <
E; ++
I) {
522 intsToFatPtrs(
Field, FromPart, ToPart, Name +
"." + Twine(
I));
523 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
526 for (
auto [Idx, FromPart, ToPart] :
528 Value *
Field = IRB.CreateExtractValue(V, Idx);
530 intsToFatPtrs(
Field, FromPart, ToPart, Name +
"." + Twine(Idx));
531 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
537bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::processFunction(Function &
F) {
551 ConvertedForStore.
clear();
555bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &
I) {
556 Type *Ty =
I.getAllocatedType();
557 Type *NewTy = TypeMap->remapType(Ty);
560 I.setAllocatedType(NewTy);
564bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
565 GetElementPtrInst &
I) {
566 Type *Ty =
I.getSourceElementType();
567 Type *NewTy = TypeMap->remapType(Ty);
572 I.setSourceElementType(NewTy);
573 I.setResultElementType(TypeMap->remapType(
I.getResultElementType()));
577bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
579 Type *IntTy = TypeMap->remapType(Ty);
583 IRB.SetInsertPoint(&LI);
585 NLI->mutateType(IntTy);
586 NLI = IRB.Insert(NLI);
589 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
595bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
597 Type *Ty =
V->getType();
598 Type *IntTy = TypeMap->remapType(Ty);
602 IRB.SetInsertPoint(&SI);
603 Value *IntV = fatPtrsToInts(V, Ty, IntTy,
V->getName());
607 SI.setOperand(0, IntV);
611bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst(
624bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst(
630 "memmove() on buffer descriptors is not implemented because pointer "
631 "comparison on buffer descriptors isn't implemented\n");
634bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(
644bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst(
645 MemSetPatternInst &MSPI) {
670class LegalizeBufferContentTypesVisitor
671 :
public InstVisitor<LegalizeBufferContentTypesVisitor, bool> {
672 friend class InstVisitor<LegalizeBufferContentTypesVisitor, bool>;
676 const DataLayout &
DL;
680 Type *scalarArrayTypeAsVector(
Type *MaybeArrayType);
681 Value *arrayToVector(
Value *V,
Type *TargetType,
const Twine &Name);
682 Value *vectorToArray(
Value *V,
Type *OrigType,
const Twine &Name);
690 Value *makeLegalNonAggregate(
Value *V,
Type *TargetType,
const Twine &Name);
691 Value *makeIllegalNonAggregate(
Value *V,
Type *OrigType,
const Twine &Name);
704 void getVecSlices(
Type *
T, SmallVectorImpl<VecSlice> &Slices);
706 Value *extractSlice(
Value *Vec, VecSlice S,
const Twine &Name);
707 Value *insertSlice(
Value *Whole,
Value *Part, VecSlice S,
const Twine &Name);
717 Type *intrinsicTypeFor(
Type *LegalType);
719 bool visitLoadImpl(LoadInst &OrigLI,
Type *PartType,
720 SmallVectorImpl<uint32_t> &AggIdxs, uint64_t AggByteOffset,
721 Value *&Result,
const Twine &Name);
723 std::pair<bool, bool> visitStoreImpl(StoreInst &OrigSI,
Type *PartType,
724 SmallVectorImpl<uint32_t> &AggIdxs,
725 uint64_t AggByteOffset,
728 bool visitInstruction(Instruction &
I) {
return false; }
729 bool visitLoadInst(LoadInst &LI);
730 bool visitStoreInst(StoreInst &SI);
733 LegalizeBufferContentTypesVisitor(
const DataLayout &
DL, LLVMContext &Ctx)
734 : IRB(Ctx, InstSimplifyFolder(
DL)),
DL(
DL) {}
739Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(
Type *
T) {
743 Type *ET = AT->getElementType();
746 "should have recursed");
747 if (!
DL.typeSizeEqualsStoreSize(AT))
749 "loading padded arrays from buffer fat pinters should have recursed");
753Value *LegalizeBufferContentTypesVisitor::arrayToVector(
Value *V,
758 unsigned EC = VT->getNumElements();
759 for (
auto I : iota_range<unsigned>(0, EC,
false)) {
760 Value *Elem = IRB.CreateExtractValue(V,
I, Name +
".elem." + Twine(
I));
761 VectorRes = IRB.CreateInsertElement(VectorRes, Elem,
I,
762 Name +
".as.vec." + Twine(
I));
767Value *LegalizeBufferContentTypesVisitor::vectorToArray(
Value *V,
772 unsigned EC = AT->getNumElements();
773 for (
auto I : iota_range<unsigned>(0, EC,
false)) {
774 Value *Elem = IRB.CreateExtractElement(V,
I, Name +
".elem." + Twine(
I));
775 ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem,
I,
776 Name +
".as.array." + Twine(
I));
781Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(
Type *
T) {
782 TypeSize
Size =
DL.getTypeStoreSizeInBits(
T);
784 if (!
DL.typeSizeEqualsStoreSize(
T))
785 T = IRB.getIntNTy(
Size.getFixedValue());
786 Type *ElemTy =
T->getScalarType();
792 unsigned ElemSize =
DL.getTypeSizeInBits(ElemTy).getFixedValue();
793 if (
isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {
798 Type *BestVectorElemType =
nullptr;
799 if (
Size.isKnownMultipleOf(32))
800 BestVectorElemType = IRB.getInt32Ty();
801 else if (
Size.isKnownMultipleOf(16))
802 BestVectorElemType = IRB.getInt16Ty();
804 BestVectorElemType = IRB.getInt8Ty();
805 unsigned NumCastElems =
807 if (NumCastElems == 1)
808 return BestVectorElemType;
812Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
813 Value *V,
Type *TargetType,
const Twine &Name) {
814 Type *SourceType =
V->getType();
815 TypeSize SourceSize =
DL.getTypeSizeInBits(SourceType);
816 TypeSize TargetSize =
DL.getTypeSizeInBits(TargetType);
817 if (SourceSize != TargetSize) {
820 Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy, Name +
".as.scalar");
821 Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy, Name +
".zext");
823 SourceType = ByteScalarTy;
825 return IRB.CreateBitCast(V, TargetType, Name +
".legal");
828Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(
829 Value *V,
Type *OrigType,
const Twine &Name) {
830 Type *LegalType =
V->getType();
831 TypeSize LegalSize =
DL.getTypeSizeInBits(LegalType);
832 TypeSize OrigSize =
DL.getTypeSizeInBits(OrigType);
833 if (LegalSize != OrigSize) {
836 Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy, Name +
".bytes.cast");
837 Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy, Name +
".trunc");
838 return IRB.CreateBitCast(Trunc, OrigType, Name +
".orig");
840 return IRB.CreateBitCast(V, OrigType, Name +
".real.ty");
843Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(
Type *LegalType) {
847 Type *ET = VT->getElementType();
850 if (VT->getNumElements() == 1)
852 if (
DL.getTypeSizeInBits(LegalType) == 96 &&
DL.getTypeSizeInBits(ET) < 32)
855 switch (VT->getNumElements()) {
859 return IRB.getInt8Ty();
861 return IRB.getInt16Ty();
863 return IRB.getInt32Ty();
873void LegalizeBufferContentTypesVisitor::getVecSlices(
874 Type *
T, SmallVectorImpl<VecSlice> &Slices) {
880 uint64_t ElemBitWidth =
881 DL.getTypeSizeInBits(VT->getElementType()).getFixedValue();
883 uint64_t ElemsPer4Words = 128 / ElemBitWidth;
884 uint64_t ElemsPer2Words = ElemsPer4Words / 2;
885 uint64_t ElemsPerWord = ElemsPer2Words / 2;
886 uint64_t ElemsPerShort = ElemsPerWord / 2;
887 uint64_t ElemsPerByte = ElemsPerShort / 2;
891 uint64_t ElemsPer3Words = ElemsPerWord * 3;
893 uint64_t TotalElems = VT->getNumElements();
895 auto TrySlice = [&](
unsigned MaybeLen) {
896 if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) {
897 VecSlice Slice{
Index, MaybeLen};
904 while (Index < TotalElems) {
905 TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) ||
906 TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) ||
907 TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte);
911Value *LegalizeBufferContentTypesVisitor::extractSlice(
Value *Vec, VecSlice S,
916 if (S.Length == VecVT->getNumElements() && S.Index == 0)
919 return IRB.CreateExtractElement(Vec, S.Index,
920 Name +
".slice." + Twine(S.Index));
922 llvm::iota_range<int>(S.Index, S.Index + S.Length,
false));
923 return IRB.CreateShuffleVector(Vec, Mask, Name +
".slice." + Twine(S.Index));
926Value *LegalizeBufferContentTypesVisitor::insertSlice(
Value *Whole,
Value *Part,
932 if (S.Length == WholeVT->getNumElements() && S.Index == 0)
935 return IRB.CreateInsertElement(Whole, Part, S.Index,
936 Name +
".slice." + Twine(S.Index));
941 SmallVector<int> ExtPartMask(NumElems, -1);
946 Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask,
947 Name +
".ext." + Twine(S.Index));
949 SmallVector<int>
Mask =
954 return IRB.CreateShuffleVector(Whole, ExtPart, Mask,
955 Name +
".parts." + Twine(S.Index));
958bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
959 LoadInst &OrigLI,
Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,
960 uint64_t AggByteOff,
Value *&Result,
const Twine &Name) {
962 const StructLayout *Layout =
DL.getStructLayout(ST);
964 for (
auto [
I, ElemTy,
Offset] :
967 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
968 AggByteOff +
Offset.getFixedValue(), Result,
969 Name +
"." + Twine(
I));
975 Type *ElemTy = AT->getElementType();
978 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
980 for (
auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
983 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
985 Result, Name + Twine(
I));
994 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
995 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
998 getVecSlices(LegalType, Slices);
999 bool HasSlices = Slices.
size() > 1;
1000 bool IsAggPart = !AggIdxs.
empty();
1002 if (!HasSlices && !IsAggPart) {
1003 Type *LoadableType = intrinsicTypeFor(LegalType);
1004 if (LoadableType == PartType)
1007 IRB.SetInsertPoint(&OrigLI);
1009 NLI->mutateType(LoadableType);
1010 NLI = IRB.Insert(NLI);
1011 NLI->setName(Name +
".loadable");
1013 LoadsRes = IRB.CreateBitCast(NLI, LegalType, Name +
".from.loadable");
1015 IRB.SetInsertPoint(&OrigLI);
1023 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1025 if (IsAggPart && Slices.
empty())
1027 for (VecSlice S : Slices) {
1030 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1032 Value *NewPtr = IRB.CreateGEP(
1034 OrigPtr->
getName() +
".off.ptr." + Twine(ByteOffset),
1036 Type *LoadableType = intrinsicTypeFor(SliceType);
1037 LoadInst *NewLI = IRB.CreateAlignedLoad(
1039 Name +
".off." + Twine(ByteOffset));
1045 Value *
Loaded = IRB.CreateBitCast(NewLI, SliceType,
1046 NewLI->
getName() +
".from.loadable");
1047 LoadsRes = insertSlice(LoadsRes, Loaded, S, Name);
1050 if (LegalType != ArrayAsVecType)
1051 LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType, Name);
1052 if (ArrayAsVecType != PartType)
1053 LoadsRes = vectorToArray(LoadsRes, PartType, Name);
1056 Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs, Name);
1062bool LegalizeBufferContentTypesVisitor::visitLoadInst(LoadInst &LI) {
1066 SmallVector<uint32_t> AggIdxs;
1069 bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.
getName());
1078std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
1079 StoreInst &OrigSI,
Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,
1080 uint64_t AggByteOff,
const Twine &Name) {
1082 const StructLayout *Layout =
DL.getStructLayout(ST);
1084 for (
auto [
I, ElemTy,
Offset] :
1087 Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs,
1088 AggByteOff +
Offset.getFixedValue(),
1089 Name +
"." + Twine(
I)));
1092 return std::make_pair(
Changed,
false);
1095 Type *ElemTy = AT->getElementType();
1098 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
1100 for (
auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
1103 Changed |= std::get<0>(visitStoreImpl(
1104 OrigSI, ElemTy, AggIdxs,
1108 return std::make_pair(
Changed,
false);
1113 Value *NewData = OrigData;
1115 bool IsAggPart = !AggIdxs.
empty();
1117 NewData = IRB.CreateExtractValue(NewData, AggIdxs, Name);
1119 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
1120 if (ArrayAsVecType != PartType) {
1121 NewData = arrayToVector(NewData, ArrayAsVecType, Name);
1124 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
1125 if (LegalType != ArrayAsVecType) {
1126 NewData = makeLegalNonAggregate(NewData, LegalType, Name);
1130 getVecSlices(LegalType, Slices);
1131 bool NeedToSplit = Slices.
size() > 1 || IsAggPart;
1133 Type *StorableType = intrinsicTypeFor(LegalType);
1134 if (StorableType == PartType)
1135 return std::make_pair(
false,
false);
1136 NewData = IRB.CreateBitCast(NewData, StorableType, Name +
".storable");
1138 return std::make_pair(
true,
true);
1143 if (IsAggPart && Slices.
empty())
1145 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1147 for (VecSlice S : Slices) {
1150 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1152 IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset),
1153 OrigPtr->
getName() +
".part." + Twine(S.Index),
1155 Value *DataSlice = extractSlice(NewData, S, Name);
1156 Type *StorableType = intrinsicTypeFor(SliceType);
1157 DataSlice = IRB.CreateBitCast(DataSlice, StorableType,
1158 DataSlice->
getName() +
".storable");
1162 NewSI->setOperand(0, DataSlice);
1163 NewSI->setOperand(1, NewPtr);
1166 return std::make_pair(
true,
false);
1169bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {
1172 IRB.SetInsertPoint(&SI);
1173 SmallVector<uint32_t> AggIdxs;
1174 Value *OrigData =
SI.getValueOperand();
1175 auto [
Changed, ModifiedInPlace] =
1176 visitStoreImpl(SI, OrigData->
getType(), AggIdxs, 0, OrigData->
getName());
1177 if (
Changed && !ModifiedInPlace)
1178 SI.eraseFromParent();
1182bool LegalizeBufferContentTypesVisitor::processFunction(Function &
F) {
1193static std::pair<Constant *, Constant *>
1196 return std::make_pair(
C->getAggregateElement(0u),
C->getAggregateElement(1u));
1201class FatPtrConstMaterializer final :
public ValueMaterializer {
1202 BufferFatPtrToStructTypeMap *TypeMap;
1208 ValueMapper InternalMapper;
1210 Constant *materializeBufferFatPtrConst(Constant *
C);
1214 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
1217 InternalMapper(UnderlyingMap,
RF_None, TypeMap, this) {}
1218 ~FatPtrConstMaterializer() =
default;
1224Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *
C) {
1225 Type *SrcTy =
C->getType();
1227 if (
C->isNullValue())
1228 return ConstantAggregateZero::getNullValue(NewTy);
1241 if (Constant *S =
VC->getSplatValue()) {
1246 auto EC =
VC->getType()->getElementCount();
1252 for (
Value *
Op :
VC->operand_values()) {
1267 "fat pointer) values are not supported");
1271 "constant exprs containing ptr addrspace(7) (buffer "
1272 "fat pointer) values should have been expanded earlier");
1277Value *FatPtrConstMaterializer::materialize(
Value *V) {
1285 return materializeBufferFatPtrConst(
C);
1293class SplitPtrStructs :
public InstVisitor<SplitPtrStructs, PtrParts> {
1336 void processConditionals();
1386void SplitPtrStructs::copyMetadata(
Value *Dest,
Value *Src) {
1390 if (!DestI || !SrcI)
1393 DestI->copyMetadata(*SrcI);
1398 "of something that wasn't rewritten");
1399 auto *RsrcEntry = &RsrcParts[
V];
1400 auto *OffEntry = &OffParts[
V];
1401 if (*RsrcEntry && *OffEntry)
1402 return {*RsrcEntry, *OffEntry};
1406 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1409 IRBuilder<InstSimplifyFolder>::InsertPointGuard Guard(IRB);
1414 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1417 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1418 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1420 IRB.SetInsertPointPastAllocas(
A->getParent());
1421 IRB.SetCurrentDebugLocation(
DebugLoc());
1423 Value *Rsrc = IRB.CreateExtractValue(V, 0,
V->getName() +
".rsrc");
1424 Value *
Off = IRB.CreateExtractValue(V, 1,
V->getName() +
".off");
1425 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1438 V =
GEP->getPointerOperand();
1440 V = ASC->getPointerOperand();
1444void SplitPtrStructs::getPossibleRsrcRoots(Instruction *
I,
1445 SmallPtrSetImpl<Value *> &Roots,
1446 SmallPtrSetImpl<Value *> &Seen) {
1450 for (
Value *In :
PHI->incoming_values()) {
1457 if (!Seen.
insert(SI).second)
1472void SplitPtrStructs::processConditionals() {
1473 SmallDenseMap<Value *, Value *> FoundRsrcs;
1474 SmallPtrSet<Value *, 4> Roots;
1475 SmallPtrSet<Value *, 4> Seen;
1476 for (Instruction *
I : Conditionals) {
1478 Value *Rsrc = RsrcParts[
I];
1480 assert(Rsrc && Off &&
"must have visited conditionals by now");
1482 std::optional<Value *> MaybeRsrc;
1483 auto MaybeFoundRsrc = FoundRsrcs.
find(
I);
1484 if (MaybeFoundRsrc != FoundRsrcs.
end()) {
1485 MaybeRsrc = MaybeFoundRsrc->second;
1487 IRBuilder<InstSimplifyFolder>::InsertPointGuard Guard(IRB);
1490 getPossibleRsrcRoots(
I, Roots, Seen);
1493 for (
Value *V : Roots)
1495 for (
Value *V : Seen)
1507 if (Diff.size() == 1) {
1508 Value *RootVal = *Diff.begin();
1512 MaybeRsrc = std::get<0>(getPtrParts(RootVal));
1514 MaybeRsrc = RootVal;
1522 IRB.SetInsertPoint(*
PHI->getInsertionPointAfterDef());
1523 IRB.SetCurrentDebugLocation(
PHI->getDebugLoc());
1525 NewRsrc = *MaybeRsrc;
1528 auto *RsrcPHI = IRB.CreatePHI(RsrcTy,
PHI->getNumIncomingValues());
1529 RsrcPHI->takeName(Rsrc);
1530 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1531 Value *VRsrc = std::get<0>(getPtrParts(V));
1532 RsrcPHI->addIncoming(VRsrc, BB);
1534 copyMetadata(RsrcPHI,
PHI);
1539 auto *NewOff = IRB.CreatePHI(OffTy,
PHI->getNumIncomingValues());
1540 NewOff->takeName(Off);
1541 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1542 assert(OffParts.
count(V) &&
"An offset part had to be created by now");
1543 Value *VOff = std::get<1>(getPtrParts(V));
1544 NewOff->addIncoming(VOff, BB);
1546 copyMetadata(NewOff,
PHI);
1555 ConditionalTemps.push_back(RsrcInst);
1556 RsrcInst->replaceAllUsesWith(NewRsrc);
1559 ConditionalTemps.push_back(OffInst);
1560 OffInst->replaceAllUsesWith(NewOff);
1565 for (
Value *V : Seen)
1566 FoundRsrcs[
V] = NewRsrc;
1571 if (RsrcInst != *MaybeRsrc) {
1572 ConditionalTemps.push_back(RsrcInst);
1573 RsrcInst->replaceAllUsesWith(*MaybeRsrc);
1576 for (
Value *V : Seen)
1577 FoundRsrcs[
V] = *MaybeRsrc;
1585void SplitPtrStructs::killAndReplaceSplitInstructions(
1586 SmallVectorImpl<Instruction *> &Origs) {
1587 for (Instruction *
I : ConditionalTemps)
1588 I->eraseFromParent();
1590 for (Instruction *
I : Origs) {
1596 for (DbgVariableRecord *Dbg : Dbgs) {
1597 auto &
DL =
I->getDataLayout();
1599 "We should've RAUW'd away loads, stores, etc. at this point");
1600 DbgVariableRecord *OffDbg =
Dbg->clone();
1601 auto [Rsrc,
Off] = getPtrParts(
I);
1603 int64_t RsrcSz =
DL.getTypeSizeInBits(Rsrc->
getType());
1604 int64_t OffSz =
DL.getTypeSizeInBits(
Off->getType());
1606 std::optional<DIExpression *> RsrcExpr =
1609 std::optional<DIExpression *> OffExpr =
1620 Dbg->setExpression(*RsrcExpr);
1621 Dbg->replaceVariableLocationOp(
I, Rsrc);
1628 I->replaceUsesWithIf(
Poison, [&](
const Use &U) ->
bool {
1634 if (
I->use_empty()) {
1635 I->eraseFromParent();
1638 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1639 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1640 auto [Rsrc,
Off] = getPtrParts(
I);
1642 Struct = IRB.CreateInsertValue(Struct, Rsrc, 0);
1643 Struct = IRB.CreateInsertValue(Struct, Off, 1);
1644 copyMetadata(Struct,
I);
1646 I->replaceAllUsesWith(Struct);
1647 I->eraseFromParent();
1651void SplitPtrStructs::setAlign(CallInst *Intr, Align
A,
unsigned RsrcArgIdx) {
1653 Intr->
addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx,
A));
1659 case AtomicOrdering::Release:
1660 case AtomicOrdering::AcquireRelease:
1661 case AtomicOrdering::SequentiallyConsistent:
1662 IRB.CreateFence(AtomicOrdering::Release, SSID);
1672 case AtomicOrdering::Acquire:
1673 case AtomicOrdering::AcquireRelease:
1674 case AtomicOrdering::SequentiallyConsistent:
1675 IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1682Value *SplitPtrStructs::handleMemoryInst(Instruction *
I,
Value *Arg,
Value *Ptr,
1683 Type *Ty, Align Alignment,
1686 IRB.SetInsertPoint(
I);
1688 auto [Rsrc,
Off] = getPtrParts(Ptr);
1691 Args.push_back(Arg);
1692 Args.push_back(Rsrc);
1693 Args.push_back(Off);
1694 insertPreMemOpFence(Order, SSID);
1698 Args.push_back(IRB.getInt32(0));
1703 Args.push_back(IRB.getInt32(Aux));
1707 IID = Order == AtomicOrdering::NotAtomic
1708 ? Intrinsic::amdgcn_raw_ptr_buffer_load
1709 : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;
1711 IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1713 switch (RMW->getOperation()) {
1715 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1718 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1721 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1724 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1727 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1730 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1733 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1736 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1739 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1742 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1745 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1748 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1751 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1754 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_cond_sub_u32;
1757 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub_clamp_u32;
1761 "atomic floating point subtraction not supported for "
1762 "buffer resources and should've been expanded away");
1767 "atomic floating point fmaximum not supported for "
1768 "buffer resources and should've been expanded away");
1773 "atomic floating point fminimum not supported for "
1774 "buffer resources and should've been expanded away");
1779 "atomic floating point fmaximumnum not supported for "
1780 "buffer resources and should've been expanded away");
1785 "atomic floating point fminimumnum not supported for "
1786 "buffer resources and should've been expanded away");
1791 "atomic nand not supported for buffer resources and "
1792 "should've been expanded away");
1797 "wrapping increment/decrement not supported for "
1798 "buffer resources and should've been expanded away");
1805 auto *
Call = IRB.CreateIntrinsic(IID, Ty, Args);
1806 copyMetadata(
Call,
I);
1807 setAlign(
Call, Alignment, Arg ? 1 : 0);
1810 insertPostMemOpFence(Order, SSID);
1814 I->replaceAllUsesWith(
Call);
1818PtrParts SplitPtrStructs::visitInstruction(Instruction &
I) {
1819 return {
nullptr,
nullptr};
1822PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {
1824 return {
nullptr,
nullptr};
1828 return {
nullptr,
nullptr};
1831PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {
1833 return {
nullptr,
nullptr};
1834 Value *Arg =
SI.getValueOperand();
1835 handleMemoryInst(&SI, Arg,
SI.getPointerOperand(), Arg->
getType(),
1836 SI.getAlign(),
SI.getOrdering(),
SI.isVolatile(),
1837 SI.getSyncScopeID());
1838 return {
nullptr,
nullptr};
1841PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {
1843 return {
nullptr,
nullptr};
1848 return {
nullptr,
nullptr};
1853PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
1856 return {
nullptr,
nullptr};
1857 IRB.SetInsertPoint(&AI);
1862 bool IsNonTemporal = AI.
getMetadata(LLVMContext::MD_nontemporal);
1864 auto [Rsrc,
Off] = getPtrParts(Ptr);
1865 insertPreMemOpFence(Order, SSID);
1873 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1875 Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1876 copyMetadata(
Call, &AI);
1879 insertPostMemOpFence(Order, SSID);
1882 Res = IRB.CreateInsertValue(Res,
Call, 0);
1885 Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1889 return {
nullptr,
nullptr};
1892PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &
GEP) {
1893 using namespace llvm::PatternMatch;
1894 Value *Ptr =
GEP.getPointerOperand();
1896 return {
nullptr,
nullptr};
1897 IRB.SetInsertPoint(&
GEP);
1899 auto [Rsrc,
Off] = getPtrParts(Ptr);
1900 const DataLayout &
DL =
GEP.getDataLayout();
1901 bool IsNUW =
GEP.hasNoUnsignedWrap();
1902 bool IsNUSW =
GEP.hasNoUnsignedSignedWrap();
1913 GEP.mutateType(FatPtrTy);
1915 GEP.mutateType(ResTy);
1917 if (BroadcastsPtr) {
1918 Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
1920 Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
1928 bool HasNonNegativeOff =
false;
1930 HasNonNegativeOff = !CI->isNegative();
1936 NewOff = IRB.CreateAdd(Off, OffAccum,
"",
1937 IsNUW || (IsNUSW && HasNonNegativeOff),
1940 copyMetadata(NewOff, &
GEP);
1943 return {Rsrc, NewOff};
1946PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) {
1949 return {
nullptr,
nullptr};
1950 IRB.SetInsertPoint(&PI);
1955 auto [Rsrc,
Off] = getPtrParts(Ptr);
1961 Res = IRB.CreateIntCast(Off, ResTy,
false,
1964 Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.
getName() +
".rsrc");
1965 Value *Shl = IRB.CreateShl(
1968 "", Width >= FatPtrWidth, Width > FatPtrWidth);
1969 Value *OffCast = IRB.CreateIntCast(Off, ResTy,
false,
1971 Res = IRB.CreateOr(Shl, OffCast);
1974 copyMetadata(Res, &PI);
1978 return {
nullptr,
nullptr};
1981PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) {
1984 return {
nullptr,
nullptr};
1985 IRB.SetInsertPoint(&PA);
1987 auto [Rsrc,
Off] = getPtrParts(Ptr);
1988 Value *Res = IRB.CreateIntCast(Off, PA.
getType(),
false);
1989 copyMetadata(Res, &PA);
1993 return {
nullptr,
nullptr};
1996PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {
1998 return {
nullptr,
nullptr};
1999 IRB.SetInsertPoint(&IP);
2008 Type *RsrcTy = RetTy->getElementType(0);
2009 Type *OffTy = RetTy->getElementType(1);
2010 Value *RsrcPart = IRB.CreateLShr(
2013 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy,
false);
2014 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.
getName() +
".rsrc");
2016 IRB.CreateIntCast(
Int, OffTy,
false, IP.
getName() +
".off");
2018 copyMetadata(Rsrc, &IP);
2023PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &
I) {
2027 return {
nullptr,
nullptr};
2028 IRB.SetInsertPoint(&
I);
2031 if (
In->getType() ==
I.getType()) {
2032 auto [Rsrc,
Off] = getPtrParts(In);
2038 Type *RsrcTy = ResTy->getElementType(0);
2039 Type *OffTy = ResTy->getElementType(1);
2045 if (InConst && InConst->isNullValue()) {
2048 return {NullRsrc, ZeroOff};
2054 return {PoisonRsrc, PoisonOff};
2060 return {UndefRsrc, UndefOff};
2065 "only buffer resources (addrspace 8) and null/poison pointers can be "
2066 "cast to buffer fat pointers (addrspace 7)");
2068 return {
In, ZeroOff};
2071PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {
2074 return {
nullptr,
nullptr};
2076 IRB.SetInsertPoint(&Cmp);
2077 ICmpInst::Predicate Pred =
Cmp.getPredicate();
2079 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2080 "Pointer comparison is only equal or unequal");
2081 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
2082 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
2083 Value *Res = IRB.CreateICmp(Pred, LhsOff, RhsOff);
2084 copyMetadata(Res, &Cmp);
2087 Cmp.replaceAllUsesWith(Res);
2088 return {
nullptr,
nullptr};
2091PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &
I) {
2093 return {
nullptr,
nullptr};
2094 IRB.SetInsertPoint(&
I);
2095 auto [Rsrc,
Off] = getPtrParts(
I.getOperand(0));
2097 Value *RsrcRes = IRB.CreateFreeze(Rsrc,
I.getName() +
".rsrc");
2098 copyMetadata(RsrcRes, &
I);
2099 Value *OffRes = IRB.CreateFreeze(Off,
I.getName() +
".off");
2100 copyMetadata(OffRes, &
I);
2102 return {RsrcRes, OffRes};
2105PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &
I) {
2107 return {
nullptr,
nullptr};
2108 IRB.SetInsertPoint(&
I);
2109 Value *Vec =
I.getVectorOperand();
2110 Value *Idx =
I.getIndexOperand();
2111 auto [Rsrc,
Off] = getPtrParts(Vec);
2113 Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx,
I.getName() +
".rsrc");
2114 copyMetadata(RsrcRes, &
I);
2115 Value *OffRes = IRB.CreateExtractElement(Off, Idx,
I.getName() +
".off");
2116 copyMetadata(OffRes, &
I);
2118 return {RsrcRes, OffRes};
2121PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &
I) {
2125 return {
nullptr,
nullptr};
2126 IRB.SetInsertPoint(&
I);
2127 Value *Vec =
I.getOperand(0);
2128 Value *Elem =
I.getOperand(1);
2129 Value *Idx =
I.getOperand(2);
2130 auto [VecRsrc, VecOff] = getPtrParts(Vec);
2131 auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
2134 IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx,
I.getName() +
".rsrc");
2135 copyMetadata(RsrcRes, &
I);
2137 IRB.CreateInsertElement(VecOff, ElemOff, Idx,
I.getName() +
".off");
2138 copyMetadata(OffRes, &
I);
2140 return {RsrcRes, OffRes};
2143PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &
I) {
2146 return {
nullptr,
nullptr};
2147 IRB.SetInsertPoint(&
I);
2149 Value *V1 =
I.getOperand(0);
2150 Value *V2 =
I.getOperand(1);
2151 ArrayRef<int>
Mask =
I.getShuffleMask();
2152 auto [V1Rsrc, V1Off] = getPtrParts(V1);
2153 auto [V2Rsrc, V2Off] = getPtrParts(V2);
2156 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask,
I.getName() +
".rsrc");
2157 copyMetadata(RsrcRes, &
I);
2159 IRB.CreateShuffleVector(V1Off, V2Off, Mask,
I.getName() +
".off");
2160 copyMetadata(OffRes, &
I);
2162 return {RsrcRes, OffRes};
2165PtrParts SplitPtrStructs::visitPHINode(PHINode &
PHI) {
2167 return {
nullptr,
nullptr};
2168 IRB.SetInsertPoint(*
PHI.getInsertionPointAfterDef());
2174 Value *TmpRsrc = IRB.CreateExtractValue(&
PHI, 0,
PHI.getName() +
".rsrc");
2175 Value *TmpOff = IRB.CreateExtractValue(&
PHI, 1,
PHI.getName() +
".off");
2176 Conditionals.push_back(&
PHI);
2178 return {TmpRsrc, TmpOff};
2181PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) {
2183 return {
nullptr,
nullptr};
2184 IRB.SetInsertPoint(&SI);
2187 Value *True =
SI.getTrueValue();
2188 Value *False =
SI.getFalseValue();
2189 auto [TrueRsrc, TrueOff] = getPtrParts(True);
2190 auto [FalseRsrc, FalseOff] = getPtrParts(False);
2193 IRB.CreateSelect(
Cond, TrueRsrc, FalseRsrc,
SI.getName() +
".rsrc", &SI);
2194 copyMetadata(RsrcRes, &SI);
2195 Conditionals.push_back(&SI);
2197 IRB.CreateSelect(
Cond, TrueOff, FalseOff,
SI.getName() +
".off", &SI);
2198 copyMetadata(OffRes, &SI);
2200 return {RsrcRes, OffRes};
2211 case Intrinsic::amdgcn_make_buffer_rsrc:
2212 case Intrinsic::ptrmask:
2213 case Intrinsic::invariant_start:
2214 case Intrinsic::invariant_end:
2215 case Intrinsic::launder_invariant_group:
2216 case Intrinsic::strip_invariant_group:
2217 case Intrinsic::memcpy:
2218 case Intrinsic::memcpy_inline:
2219 case Intrinsic::memmove:
2220 case Intrinsic::memset:
2221 case Intrinsic::memset_inline:
2222 case Intrinsic::experimental_memset_pattern:
2223 case Intrinsic::amdgcn_load_to_lds:
2224 case Intrinsic::amdgcn_load_async_to_lds:
2229PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &
I) {
2234 case Intrinsic::amdgcn_make_buffer_rsrc: {
2236 return {
nullptr,
nullptr};
2238 Value *Stride =
I.getArgOperand(1);
2239 Value *NumRecords =
I.getArgOperand(2);
2242 Type *RsrcType = SplitType->getElementType(0);
2243 Type *OffType = SplitType->getElementType(1);
2244 IRB.SetInsertPoint(&
I);
2245 Value *Rsrc = IRB.CreateIntrinsic(IID, {RsrcType,
Base->getType()},
2247 copyMetadata(Rsrc, &
I);
2251 return {Rsrc,
Zero};
2253 case Intrinsic::ptrmask: {
2254 Value *Ptr =
I.getArgOperand(0);
2256 return {
nullptr,
nullptr};
2258 IRB.SetInsertPoint(&
I);
2259 auto [Rsrc,
Off] = getPtrParts(Ptr);
2260 if (
Mask->getType() !=
Off->getType())
2262 "pointer (data layout not set up correctly?)");
2263 Value *OffRes = IRB.CreateAnd(Off, Mask,
I.getName() +
".off");
2264 copyMetadata(OffRes, &
I);
2266 return {Rsrc, OffRes};
2270 case Intrinsic::invariant_start: {
2271 Value *Ptr =
I.getArgOperand(1);
2273 return {
nullptr,
nullptr};
2274 IRB.SetInsertPoint(&
I);
2275 auto [Rsrc,
Off] = getPtrParts(Ptr);
2277 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {
I.getOperand(0), Rsrc});
2278 copyMetadata(NewRsrc, &
I);
2281 I.replaceAllUsesWith(NewRsrc);
2282 return {
nullptr,
nullptr};
2284 case Intrinsic::invariant_end: {
2285 Value *RealPtr =
I.getArgOperand(2);
2287 return {
nullptr,
nullptr};
2288 IRB.SetInsertPoint(&
I);
2289 Value *RealRsrc = getPtrParts(RealPtr).first;
2290 Value *InvPtr =
I.getArgOperand(0);
2292 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->
getType()},
2293 {InvPtr,
Size, RealRsrc});
2294 copyMetadata(NewRsrc, &
I);
2297 I.replaceAllUsesWith(NewRsrc);
2298 return {
nullptr,
nullptr};
2300 case Intrinsic::launder_invariant_group:
2301 case Intrinsic::strip_invariant_group: {
2302 Value *Ptr =
I.getArgOperand(0);
2304 return {
nullptr,
nullptr};
2305 IRB.SetInsertPoint(&
I);
2306 auto [Rsrc,
Off] = getPtrParts(Ptr);
2307 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->
getType()}, {Rsrc});
2308 copyMetadata(NewRsrc, &
I);
2311 return {NewRsrc,
Off};
2313 case Intrinsic::amdgcn_load_to_lds:
2314 case Intrinsic::amdgcn_load_async_to_lds: {
2315 Value *Ptr =
I.getArgOperand(0);
2317 return {
nullptr,
nullptr};
2318 IRB.SetInsertPoint(&
I);
2319 auto [Rsrc,
Off] = getPtrParts(Ptr);
2320 Value *LDSPtr =
I.getArgOperand(1);
2321 Value *LoadSize =
I.getArgOperand(2);
2322 Value *ImmOff =
I.getArgOperand(3);
2323 Value *Aux =
I.getArgOperand(4);
2324 Value *SOffset = IRB.getInt32(0);
2326 IID == Intrinsic::amdgcn_load_to_lds
2327 ? Intrinsic::amdgcn_raw_ptr_buffer_load_lds
2328 : Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds;
2330 NewIntr, {}, {Rsrc, LDSPtr, LoadSize,
Off, SOffset, ImmOff, Aux});
2331 copyMetadata(NewLoad, &
I);
2333 I.replaceAllUsesWith(NewLoad);
2334 return {
nullptr,
nullptr};
2337 return {
nullptr,
nullptr};
2340void SplitPtrStructs::processFunction(Function &
F) {
2342 SmallVector<Instruction *, 0> Originals(
2344 LLVM_DEBUG(
dbgs() <<
"Splitting pointer structs in function: " <<
F.getName()
2346 for (Instruction *
I : Originals) {
2354 assert(((Rsrc && Off) || (!Rsrc && !Off)) &&
2355 "Can't have a resource but no offset");
2357 RsrcParts[
I] = Rsrc;
2361 processConditionals();
2362 killAndReplaceSplitInstructions(Originals);
2368 Conditionals.clear();
2369 ConditionalTemps.clear();
2373class AMDGPULowerBufferFatPointers :
public ModulePass {
2377 AMDGPULowerBufferFatPointers() : ModulePass(
ID) {}
2379 bool run(
Module &M,
const TargetMachine &TM);
2380 bool runOnModule(
Module &M)
override;
2382 void getAnalysisUsage(AnalysisUsage &AU)
const override;
2390 BufferFatPtrToStructTypeMap *TypeMap) {
2391 bool HasFatPointers =
false;
2394 HasFatPointers |= (
I.getType() != TypeMap->remapType(
I.getType()));
2396 for (
const Value *V :
I.operand_values())
2397 HasFatPointers |= (V->getType() != TypeMap->remapType(V->getType()));
2399 return HasFatPointers;
2403 BufferFatPtrToStructTypeMap *TypeMap) {
2404 Type *Ty =
F.getFunctionType();
2405 return Ty != TypeMap->remapType(Ty);
2421 while (!OldF->
empty()) {
2435 CloneMap[&NewArg] = &OldArg;
2436 NewArg.takeName(&OldArg);
2437 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
2439 NewArg.mutateType(OldArgTy);
2440 OldArg.replaceAllUsesWith(&NewArg);
2441 NewArg.mutateType(NewArgTy);
2445 if (OldArgTy != NewArgTy && !IsIntrinsic)
2448 AttributeFuncs::typeIncompatible(NewArgTy, ArgAttr));
2455 AttributeFuncs::typeIncompatible(NewF->
getReturnType(), RetAttrs));
2457 NewF->
getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs));
2465 CloneMap[&BB] = &BB;
2471bool AMDGPULowerBufferFatPointers::run(
Module &M,
const TargetMachine &TM) {
2473 const DataLayout &
DL =
M.getDataLayout();
2479 LLVMContext &Ctx =
M.getContext();
2481 BufferFatPtrToStructTypeMap StructTM(
DL);
2482 BufferFatPtrToIntTypeMap IntTM(
DL);
2486 Ctx.
emitError(
"global variables with a buffer fat pointer address "
2487 "space (7) are not supported");
2489 GV.eraseFromParent();
2494 Type *VT = GV.getValueType();
2495 if (VT != StructTM.remapType(VT)) {
2497 Ctx.
emitError(
"global variables that contain buffer fat pointers "
2498 "(address space 7 pointers) are unsupported. Use "
2499 "buffer resource pointers (address space 8) instead");
2501 GV.eraseFromParent();
2510 for (Function &
F :
M.functions())
2517 SmallPtrSet<Constant *, 8> Visited;
2518 SetVector<Constant *> BufferFatPtrConsts;
2519 while (!Worklist.
empty()) {
2521 if (!Visited.
insert(
C).second)
2537 StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM,
DL,
2538 M.getContext(), &TM);
2539 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(
DL,
2541 for (Function &
F :
M.functions()) {
2544 Changed |= MemOpsRewrite.processFunction(
F);
2545 if (InterfaceChange || BodyChanges) {
2546 NeedsRemap.
push_back(std::make_pair(&
F, InterfaceChange));
2547 Changed |= BufferContentsTypeRewrite.processFunction(
F);
2550 if (NeedsRemap.
empty())
2557 FatPtrConstMaterializer Materializer(&StructTM, CloneMap);
2559 ValueMapper LowerInFuncs(CloneMap,
RF_None, &StructTM, &Materializer);
2560 for (
auto [
F, InterfaceChange] : NeedsRemap) {
2562 if (InterfaceChange)
2568 LowerInFuncs.remapFunction(*NewF);
2573 if (InterfaceChange) {
2574 F->replaceAllUsesWith(NewF);
2575 F->eraseFromParent();
2583 SplitPtrStructs Splitter(
DL,
M.getContext(), &TM);
2584 for (Function *
F : NeedsPostProcess)
2585 Splitter.processFunction(*
F);
2586 for (Function *
F : Intrinsics) {
2590 F->eraseFromParent();
2594 F->replaceAllUsesWith(*NewF);
2600bool AMDGPULowerBufferFatPointers::runOnModule(
Module &M) {
2601 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
2602 const TargetMachine &TM = TPC.
getTM<TargetMachine>();
2606char AMDGPULowerBufferFatPointers::ID = 0;
2610void AMDGPULowerBufferFatPointers::getAnalysisUsage(
AnalysisUsage &AU)
const {
2614#define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
2623 return new AMDGPULowerBufferFatPointers();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
static Function * moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, ValueToValueMapTy &CloneMap)
Move the body of OldF into a new function, returning it.
static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap)
static bool isBufferFatPtrOrVector(Type *Ty)
static bool isSplitFatPtr(Type *Ty)
std::pair< Value *, Value * > PtrParts
static bool hasFatPointerInterface(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
static bool isRemovablePointerIntrinsic(Intrinsic::ID IID)
Returns true if this intrinsic needs to be removed when it is applied to ptr addrspace(7) values.
static bool containsBufferFatPointers(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
Returns true if there are values that have a buffer fat pointer in them, which means we'll need to pe...
static Value * rsrcPartRoot(Value *V)
Returns the instruction that defines the resource part of the value V.
static constexpr unsigned BufferOffsetWidth
static bool isBufferFatPtrConst(Constant *C)
static std::pair< Constant *, Constant * > splitLoweredFatBufferConst(Constant *C)
Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered buffer fat pointer const...
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
AMD GCN specific subclass of TargetSubtarget.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Machine Check Debug Module
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
Value * getPointerOperand()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const
Remove the specified attributes from this set.
LLVM Basic Block Representation.
LLVM_ABI void removeFromParent()
Unlink 'this' from the containing function, but do not delete it.
LLVM_ABI void insertInto(Function *Parent, BasicBlock *InsertBefore=nullptr)
Insert unlinked basic block into a function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void insertBefore(DbgRecord *InsertBefore)
LLVM_ABI void eraseFromParent()
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
void setExpression(DIExpression *NewExpr)
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & front() const
iterator_range< arg_iterator > args()
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void updateAfterNameChange()
Update internal caches that depend on the function name (such as the intrinsic ID and libcall cache).
Type * getReturnType() const
Returns the type of the ret val.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
static GEPNoWrapFlags noUnsignedWrap()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
LinkageTypes getLinkage() const
void setDLLStorageClass(DLLStorageClassTypes C)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
DLLStorageClassTypes getDLLStorageClass() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
Base class for instruction visitors.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class represents a cast from an integer to a pointer.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
unsigned getDestAddressSpace() const
unsigned getSourceAddressSpace() const
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
This class represents the LLVM 'select' instruction.
ArrayRef< value_type > getArrayRef() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
MutableArrayRef< TypeSize > getMemberOffsets()
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition.
Type * getElementType(unsigned N) const
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const
Return a TargetTransformInfo for a given function.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getArrayElementType() const
ArrayRef< Type * > subtypes() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
This is a class that can be implemented by clients to remap types when cloning constants and instruct...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
iterator find(const KeyT &Val)
ValueMapIteratorImpl< MapT, const Value *, false > iterator
LLVM_ABI Constant * mapConstant(const Constant &C)
LLVM_ABI Value * mapValue(const Value &V)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
constexpr ScalarTy getFixedValue() const
self_iterator getIterator()
iterator insertAfter(iterator where, pointer New)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
ModulePass * createAMDGPULowerBufferFatPointersPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
char & AMDGPULowerBufferFatPointersID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSetPattern as a loop.
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.