241#include "llvm/IR/IntrinsicsAMDGPU.h"
259#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
281 Type *remapType(
Type *SrcTy)
override;
282 void clear() { Map.clear(); }
288class BufferFatPtrToIntTypeMap :
public BufferFatPtrTypeLoweringBase {
289 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
299class BufferFatPtrToStructTypeMap :
public BufferFatPtrTypeLoweringBase {
300 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
309Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
Type *Ty) {
315 return *
Entry = remapScalar(PT);
321 return *
Entry = remapVector(VT);
329 bool IsUniqued = !TyAsStruct || TyAsStruct->
isLiteral();
338 Type *NewElem = remapTypeImpl(OldElem);
339 ElementTypes[
I] = NewElem;
340 Changed |= (OldElem != NewElem);
348 return *
Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
350 return *
Entry = FunctionType::get(ElementTypes[0],
360 SmallString<16>
Name(STy->getName());
368Type *BufferFatPtrTypeLoweringBase::remapType(
Type *SrcTy) {
369 return remapTypeImpl(SrcTy);
372Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) {
373 LLVMContext &Ctx = PT->getContext();
378Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) {
379 ElementCount
EC = VT->getElementCount();
380 LLVMContext &Ctx = VT->getContext();
399 if (!ST->isLiteral() || ST->getNumElements() != 2)
405 return MaybeRsrc && MaybeOff &&
414 return isBufferFatPtrOrVector(U.get()->getType());
427class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
428 :
public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor, bool> {
429 BufferFatPtrToIntTypeMap *TypeMap;
435 const TargetMachine *TM;
446 StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap,
447 const DataLayout &
DL,
449 const TargetMachine *TM)
450 : TypeMap(TypeMap), IRB(Ctx, InstSimplifyFolder(
DL)),
TM(
TM) {}
453 bool visitInstruction(Instruction &
I) {
return false; }
454 bool visitAllocaInst(AllocaInst &
I);
455 bool visitLoadInst(LoadInst &LI);
456 bool visitStoreInst(StoreInst &SI);
457 bool visitGetElementPtrInst(GetElementPtrInst &
I);
459 bool visitMemCpyInst(MemCpyInst &MCI);
460 bool visitMemMoveInst(MemMoveInst &MMI);
461 bool visitMemSetInst(MemSetInst &MSI);
462 bool visitMemSetPatternInst(MemSetPatternInst &MSPI);
466Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
471 if (
Find != ConvertedForStore.
end())
474 Value *Cast = IRB.CreatePtrToInt(V, To, Name +
".int");
475 ConvertedForStore[
V] = Cast;
483 Type *FromPart = AT->getArrayElementType();
485 for (uint64_t
I = 0,
E = AT->getArrayNumElements();
I <
E; ++
I) {
488 fatPtrsToInts(
Field, FromPart, ToPart, Name +
"." + Twine(
I));
489 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
492 for (
auto [Idx, FromPart, ToPart] :
494 Value *
Field = IRB.CreateExtractValue(V, Idx);
496 fatPtrsToInts(
Field, FromPart, ToPart, Name +
"." + Twine(Idx));
497 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
500 ConvertedForStore[
V] =
Ret;
504Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
509 Value *Cast = IRB.CreateIntToPtr(V, To, Name +
".ptr");
519 for (uint64_t
I = 0,
E = AT->getArrayNumElements();
I <
E; ++
I) {
522 intsToFatPtrs(
Field, FromPart, ToPart, Name +
"." + Twine(
I));
523 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
526 for (
auto [Idx, FromPart, ToPart] :
528 Value *
Field = IRB.CreateExtractValue(V, Idx);
530 intsToFatPtrs(
Field, FromPart, ToPart, Name +
"." + Twine(Idx));
531 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
537bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::processFunction(Function &
F) {
551 ConvertedForStore.
clear();
555bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &
I) {
556 Type *Ty =
I.getAllocatedType();
557 Type *NewTy = TypeMap->remapType(Ty);
560 I.setAllocatedType(NewTy);
564bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
565 GetElementPtrInst &
I) {
566 Type *Ty =
I.getSourceElementType();
567 Type *NewTy = TypeMap->remapType(Ty);
572 I.setSourceElementType(NewTy);
573 I.setResultElementType(TypeMap->remapType(
I.getResultElementType()));
577bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
579 Type *IntTy = TypeMap->remapType(Ty);
583 IRB.SetInsertPoint(&LI);
585 NLI->mutateType(IntTy);
586 NLI = IRB.Insert(NLI);
589 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
595bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
597 Type *Ty =
V->getType();
598 Type *IntTy = TypeMap->remapType(Ty);
602 IRB.SetInsertPoint(&SI);
603 Value *IntV = fatPtrsToInts(V, Ty, IntTy,
V->getName());
607 SI.setOperand(0, IntV);
611bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst(
624bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst(
630 "memmove() on buffer descriptors is not implemented because pointer "
631 "comparison on buffer descriptors isn't implemented\n");
634bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(
643bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst(
644 MemSetPatternInst &MSPI) {
668class LegalizeBufferContentTypesVisitor
669 :
public InstVisitor<LegalizeBufferContentTypesVisitor, bool> {
670 friend class InstVisitor<LegalizeBufferContentTypesVisitor, bool>;
674 const DataLayout &
DL;
678 Type *scalarArrayTypeAsVector(
Type *MaybeArrayType);
679 Value *arrayToVector(
Value *V,
Type *TargetType,
const Twine &Name);
680 Value *vectorToArray(
Value *V,
Type *OrigType,
const Twine &Name);
688 Value *makeLegalNonAggregate(
Value *V,
Type *TargetType,
const Twine &Name);
689 Value *makeIllegalNonAggregate(
Value *V,
Type *OrigType,
const Twine &Name);
702 void getVecSlices(
Type *
T, SmallVectorImpl<VecSlice> &Slices);
704 Value *extractSlice(
Value *Vec, VecSlice S,
const Twine &Name);
705 Value *insertSlice(
Value *Whole,
Value *Part, VecSlice S,
const Twine &Name);
715 Type *intrinsicTypeFor(
Type *LegalType);
717 bool visitLoadImpl(LoadInst &OrigLI,
Type *PartType,
718 SmallVectorImpl<uint32_t> &AggIdxs, uint64_t AggByteOffset,
719 Value *&Result,
const Twine &Name);
721 std::pair<bool, bool> visitStoreImpl(StoreInst &OrigSI,
Type *PartType,
722 SmallVectorImpl<uint32_t> &AggIdxs,
723 uint64_t AggByteOffset,
726 bool visitInstruction(Instruction &
I) {
return false; }
727 bool visitLoadInst(LoadInst &LI);
728 bool visitStoreInst(StoreInst &SI);
731 LegalizeBufferContentTypesVisitor(
const DataLayout &
DL, LLVMContext &Ctx)
732 : IRB(Ctx, InstSimplifyFolder(
DL)),
DL(
DL) {}
737Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(
Type *
T) {
741 Type *ET = AT->getElementType();
744 "should have recursed");
745 if (!
DL.typeSizeEqualsStoreSize(AT))
747 "loading padded arrays from buffer fat pinters should have recursed");
751Value *LegalizeBufferContentTypesVisitor::arrayToVector(
Value *V,
756 unsigned EC = VT->getNumElements();
757 for (
auto I : iota_range<unsigned>(0, EC,
false)) {
758 Value *Elem = IRB.CreateExtractValue(V,
I, Name +
".elem." + Twine(
I));
759 VectorRes = IRB.CreateInsertElement(VectorRes, Elem,
I,
760 Name +
".as.vec." + Twine(
I));
765Value *LegalizeBufferContentTypesVisitor::vectorToArray(
Value *V,
770 unsigned EC = AT->getNumElements();
771 for (
auto I : iota_range<unsigned>(0, EC,
false)) {
772 Value *Elem = IRB.CreateExtractElement(V,
I, Name +
".elem." + Twine(
I));
773 ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem,
I,
774 Name +
".as.array." + Twine(
I));
779Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(
Type *
T) {
780 TypeSize
Size =
DL.getTypeStoreSizeInBits(
T);
782 if (!
DL.typeSizeEqualsStoreSize(
T))
783 T = IRB.getIntNTy(
Size.getFixedValue());
784 Type *ElemTy =
T->getScalarType();
790 unsigned ElemSize =
DL.getTypeSizeInBits(ElemTy).getFixedValue();
791 if (
isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {
796 Type *BestVectorElemType =
nullptr;
797 if (
Size.isKnownMultipleOf(32))
798 BestVectorElemType = IRB.getInt32Ty();
799 else if (
Size.isKnownMultipleOf(16))
800 BestVectorElemType = IRB.getInt16Ty();
802 BestVectorElemType = IRB.getInt8Ty();
803 unsigned NumCastElems =
805 if (NumCastElems == 1)
806 return BestVectorElemType;
810Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
811 Value *V,
Type *TargetType,
const Twine &Name) {
812 Type *SourceType =
V->getType();
813 TypeSize SourceSize =
DL.getTypeSizeInBits(SourceType);
814 TypeSize TargetSize =
DL.getTypeSizeInBits(TargetType);
815 if (SourceSize != TargetSize) {
818 Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy, Name +
".as.scalar");
819 Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy, Name +
".zext");
821 SourceType = ByteScalarTy;
823 return IRB.CreateBitCast(V, TargetType, Name +
".legal");
826Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(
827 Value *V,
Type *OrigType,
const Twine &Name) {
828 Type *LegalType =
V->getType();
829 TypeSize LegalSize =
DL.getTypeSizeInBits(LegalType);
830 TypeSize OrigSize =
DL.getTypeSizeInBits(OrigType);
831 if (LegalSize != OrigSize) {
834 Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy, Name +
".bytes.cast");
835 Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy, Name +
".trunc");
836 return IRB.CreateBitCast(Trunc, OrigType, Name +
".orig");
838 return IRB.CreateBitCast(V, OrigType, Name +
".real.ty");
841Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(
Type *LegalType) {
845 Type *ET = VT->getElementType();
848 if (VT->getNumElements() == 1)
850 if (
DL.getTypeSizeInBits(LegalType) == 96 &&
DL.getTypeSizeInBits(ET) < 32)
853 switch (VT->getNumElements()) {
857 return IRB.getInt8Ty();
859 return IRB.getInt16Ty();
861 return IRB.getInt32Ty();
871void LegalizeBufferContentTypesVisitor::getVecSlices(
872 Type *
T, SmallVectorImpl<VecSlice> &Slices) {
878 uint64_t ElemBitWidth =
879 DL.getTypeSizeInBits(VT->getElementType()).getFixedValue();
881 uint64_t ElemsPer4Words = 128 / ElemBitWidth;
882 uint64_t ElemsPer2Words = ElemsPer4Words / 2;
883 uint64_t ElemsPerWord = ElemsPer2Words / 2;
884 uint64_t ElemsPerShort = ElemsPerWord / 2;
885 uint64_t ElemsPerByte = ElemsPerShort / 2;
889 uint64_t ElemsPer3Words = ElemsPerWord * 3;
891 uint64_t TotalElems = VT->getNumElements();
893 auto TrySlice = [&](
unsigned MaybeLen) {
894 if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) {
895 VecSlice Slice{
Index, MaybeLen};
902 while (Index < TotalElems) {
903 TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) ||
904 TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) ||
905 TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte);
909Value *LegalizeBufferContentTypesVisitor::extractSlice(
Value *Vec, VecSlice S,
914 if (S.Length == VecVT->getNumElements() && S.Index == 0)
917 return IRB.CreateExtractElement(Vec, S.Index,
918 Name +
".slice." + Twine(S.Index));
920 llvm::iota_range<int>(S.Index, S.Index + S.Length,
false));
921 return IRB.CreateShuffleVector(Vec, Mask, Name +
".slice." + Twine(S.Index));
924Value *LegalizeBufferContentTypesVisitor::insertSlice(
Value *Whole,
Value *Part,
930 if (S.Length == WholeVT->getNumElements() && S.Index == 0)
933 return IRB.CreateInsertElement(Whole, Part, S.Index,
934 Name +
".slice." + Twine(S.Index));
939 SmallVector<int> ExtPartMask(NumElems, -1);
944 Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask,
945 Name +
".ext." + Twine(S.Index));
947 SmallVector<int>
Mask =
952 return IRB.CreateShuffleVector(Whole, ExtPart, Mask,
953 Name +
".parts." + Twine(S.Index));
956bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
957 LoadInst &OrigLI,
Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,
958 uint64_t AggByteOff,
Value *&Result,
const Twine &Name) {
960 const StructLayout *Layout =
DL.getStructLayout(ST);
962 for (
auto [
I, ElemTy,
Offset] :
965 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
966 AggByteOff +
Offset.getFixedValue(), Result,
967 Name +
"." + Twine(
I));
973 Type *ElemTy = AT->getElementType();
976 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
978 for (
auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
981 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
983 Result, Name + Twine(
I));
992 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
993 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
996 getVecSlices(LegalType, Slices);
997 bool HasSlices = Slices.
size() > 1;
998 bool IsAggPart = !AggIdxs.
empty();
1000 if (!HasSlices && !IsAggPart) {
1001 Type *LoadableType = intrinsicTypeFor(LegalType);
1002 if (LoadableType == PartType)
1005 IRB.SetInsertPoint(&OrigLI);
1007 NLI->mutateType(LoadableType);
1008 NLI = IRB.Insert(NLI);
1009 NLI->setName(Name +
".loadable");
1011 LoadsRes = IRB.CreateBitCast(NLI, LegalType, Name +
".from.loadable");
1013 IRB.SetInsertPoint(&OrigLI);
1021 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1023 if (IsAggPart && Slices.
empty())
1025 for (VecSlice S : Slices) {
1028 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1030 Value *NewPtr = IRB.CreateGEP(
1032 OrigPtr->
getName() +
".off.ptr." + Twine(ByteOffset),
1034 Type *LoadableType = intrinsicTypeFor(SliceType);
1035 LoadInst *NewLI = IRB.CreateAlignedLoad(
1037 Name +
".off." + Twine(ByteOffset));
1043 Value *Loaded = IRB.CreateBitCast(NewLI, SliceType,
1044 NewLI->
getName() +
".from.loadable");
1045 LoadsRes = insertSlice(LoadsRes, Loaded, S, Name);
1048 if (LegalType != ArrayAsVecType)
1049 LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType, Name);
1050 if (ArrayAsVecType != PartType)
1051 LoadsRes = vectorToArray(LoadsRes, PartType, Name);
1054 Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs, Name);
1060bool LegalizeBufferContentTypesVisitor::visitLoadInst(LoadInst &LI) {
1064 SmallVector<uint32_t> AggIdxs;
1067 bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.
getName());
1076std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
1077 StoreInst &OrigSI,
Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,
1078 uint64_t AggByteOff,
const Twine &Name) {
1080 const StructLayout *Layout =
DL.getStructLayout(ST);
1082 for (
auto [
I, ElemTy,
Offset] :
1085 Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs,
1086 AggByteOff +
Offset.getFixedValue(),
1087 Name +
"." + Twine(
I)));
1090 return std::make_pair(
Changed,
false);
1093 Type *ElemTy = AT->getElementType();
1096 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
1098 for (
auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
1101 Changed |= std::get<0>(visitStoreImpl(
1102 OrigSI, ElemTy, AggIdxs,
1106 return std::make_pair(
Changed,
false);
1111 Value *NewData = OrigData;
1113 bool IsAggPart = !AggIdxs.
empty();
1115 NewData = IRB.CreateExtractValue(NewData, AggIdxs, Name);
1117 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
1118 if (ArrayAsVecType != PartType) {
1119 NewData = arrayToVector(NewData, ArrayAsVecType, Name);
1122 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
1123 if (LegalType != ArrayAsVecType) {
1124 NewData = makeLegalNonAggregate(NewData, LegalType, Name);
1128 getVecSlices(LegalType, Slices);
1129 bool NeedToSplit = Slices.
size() > 1 || IsAggPart;
1131 Type *StorableType = intrinsicTypeFor(LegalType);
1132 if (StorableType == PartType)
1133 return std::make_pair(
false,
false);
1134 NewData = IRB.CreateBitCast(NewData, StorableType, Name +
".storable");
1136 return std::make_pair(
true,
true);
1141 if (IsAggPart && Slices.
empty())
1143 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1145 for (VecSlice S : Slices) {
1148 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1150 IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset),
1151 OrigPtr->
getName() +
".part." + Twine(S.Index),
1153 Value *DataSlice = extractSlice(NewData, S, Name);
1154 Type *StorableType = intrinsicTypeFor(SliceType);
1155 DataSlice = IRB.CreateBitCast(DataSlice, StorableType,
1156 DataSlice->
getName() +
".storable");
1160 NewSI->setOperand(0, DataSlice);
1161 NewSI->setOperand(1, NewPtr);
1164 return std::make_pair(
true,
false);
1167bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {
1170 IRB.SetInsertPoint(&SI);
1171 SmallVector<uint32_t> AggIdxs;
1172 Value *OrigData =
SI.getValueOperand();
1173 auto [
Changed, ModifiedInPlace] =
1174 visitStoreImpl(SI, OrigData->
getType(), AggIdxs, 0, OrigData->
getName());
1175 if (
Changed && !ModifiedInPlace)
1176 SI.eraseFromParent();
1180bool LegalizeBufferContentTypesVisitor::processFunction(Function &
F) {
1191static std::pair<Constant *, Constant *>
1194 return std::make_pair(
C->getAggregateElement(0u),
C->getAggregateElement(1u));
1199class FatPtrConstMaterializer final :
public ValueMaterializer {
1200 BufferFatPtrToStructTypeMap *TypeMap;
1206 ValueMapper InternalMapper;
1208 Constant *materializeBufferFatPtrConst(Constant *
C);
1212 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
1215 InternalMapper(UnderlyingMap,
RF_None, TypeMap, this) {}
1216 ~FatPtrConstMaterializer() =
default;
1222Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *
C) {
1223 Type *SrcTy =
C->getType();
1225 if (
C->isNullValue())
1226 return ConstantAggregateZero::getNullValue(NewTy);
1239 if (Constant *S =
VC->getSplatValue()) {
1244 auto EC =
VC->getType()->getElementCount();
1250 for (
Value *
Op :
VC->operand_values()) {
1265 "fat pointer) values are not supported");
1269 "constant exprs containing ptr addrspace(7) (buffer "
1270 "fat pointer) values should have been expanded earlier");
1275Value *FatPtrConstMaterializer::materialize(
Value *V) {
1283 return materializeBufferFatPtrConst(
C);
1291class SplitPtrStructs :
public InstVisitor<SplitPtrStructs, PtrParts> {
1334 void processConditionals();
1384void SplitPtrStructs::copyMetadata(
Value *Dest,
Value *Src) {
1388 if (!DestI || !SrcI)
1391 DestI->copyMetadata(*SrcI);
1396 "of something that wasn't rewritten");
1397 auto *RsrcEntry = &RsrcParts[
V];
1398 auto *OffEntry = &OffParts[
V];
1399 if (*RsrcEntry && *OffEntry)
1400 return {*RsrcEntry, *OffEntry};
1404 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1407 IRBuilder<InstSimplifyFolder>::InsertPointGuard Guard(IRB);
1412 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1415 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1416 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1418 IRB.SetInsertPointPastAllocas(
A->getParent());
1419 IRB.SetCurrentDebugLocation(
DebugLoc());
1421 Value *Rsrc = IRB.CreateExtractValue(V, 0,
V->getName() +
".rsrc");
1422 Value *
Off = IRB.CreateExtractValue(V, 1,
V->getName() +
".off");
1423 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1436 V =
GEP->getPointerOperand();
1438 V = ASC->getPointerOperand();
1442void SplitPtrStructs::getPossibleRsrcRoots(Instruction *
I,
1443 SmallPtrSetImpl<Value *> &Roots,
1444 SmallPtrSetImpl<Value *> &Seen) {
1448 for (
Value *In :
PHI->incoming_values()) {
1455 if (!Seen.
insert(SI).second)
1470void SplitPtrStructs::processConditionals() {
1471 SmallDenseMap<Value *, Value *> FoundRsrcs;
1472 SmallPtrSet<Value *, 4> Roots;
1473 SmallPtrSet<Value *, 4> Seen;
1474 for (Instruction *
I : Conditionals) {
1476 Value *Rsrc = RsrcParts[
I];
1478 assert(Rsrc && Off &&
"must have visited conditionals by now");
1480 std::optional<Value *> MaybeRsrc;
1481 auto MaybeFoundRsrc = FoundRsrcs.
find(
I);
1482 if (MaybeFoundRsrc != FoundRsrcs.
end()) {
1483 MaybeRsrc = MaybeFoundRsrc->second;
1485 IRBuilder<InstSimplifyFolder>::InsertPointGuard Guard(IRB);
1488 getPossibleRsrcRoots(
I, Roots, Seen);
1491 for (
Value *V : Roots)
1493 for (
Value *V : Seen)
1505 if (Diff.size() == 1) {
1506 Value *RootVal = *Diff.begin();
1510 MaybeRsrc = std::get<0>(getPtrParts(RootVal));
1512 MaybeRsrc = RootVal;
1520 IRB.SetInsertPoint(*
PHI->getInsertionPointAfterDef());
1521 IRB.SetCurrentDebugLocation(
PHI->getDebugLoc());
1523 NewRsrc = *MaybeRsrc;
1526 auto *RsrcPHI = IRB.CreatePHI(RsrcTy,
PHI->getNumIncomingValues());
1527 RsrcPHI->takeName(Rsrc);
1528 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1529 Value *VRsrc = std::get<0>(getPtrParts(V));
1530 RsrcPHI->addIncoming(VRsrc, BB);
1532 copyMetadata(RsrcPHI,
PHI);
1537 auto *NewOff = IRB.CreatePHI(OffTy,
PHI->getNumIncomingValues());
1538 NewOff->takeName(Off);
1539 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1540 assert(OffParts.
count(V) &&
"An offset part had to be created by now");
1541 Value *VOff = std::get<1>(getPtrParts(V));
1542 NewOff->addIncoming(VOff, BB);
1544 copyMetadata(NewOff,
PHI);
1553 ConditionalTemps.push_back(RsrcInst);
1554 RsrcInst->replaceAllUsesWith(NewRsrc);
1557 ConditionalTemps.push_back(OffInst);
1558 OffInst->replaceAllUsesWith(NewOff);
1563 for (
Value *V : Seen)
1564 FoundRsrcs[
V] = NewRsrc;
1569 if (RsrcInst != *MaybeRsrc) {
1570 ConditionalTemps.push_back(RsrcInst);
1571 RsrcInst->replaceAllUsesWith(*MaybeRsrc);
1574 for (
Value *V : Seen)
1575 FoundRsrcs[
V] = *MaybeRsrc;
1583void SplitPtrStructs::killAndReplaceSplitInstructions(
1584 SmallVectorImpl<Instruction *> &Origs) {
1585 for (Instruction *
I : ConditionalTemps)
1586 I->eraseFromParent();
1588 for (Instruction *
I : Origs) {
1589 if (!SplitUsers.contains(
I))
1594 for (DbgVariableRecord *Dbg : Dbgs) {
1595 auto &
DL =
I->getDataLayout();
1597 "We should've RAUW'd away loads, stores, etc. at this point");
1598 DbgVariableRecord *OffDbg =
Dbg->clone();
1599 auto [Rsrc,
Off] = getPtrParts(
I);
1601 int64_t RsrcSz =
DL.getTypeSizeInBits(Rsrc->
getType());
1602 int64_t OffSz =
DL.getTypeSizeInBits(
Off->getType());
1604 std::optional<DIExpression *> RsrcExpr =
1607 std::optional<DIExpression *> OffExpr =
1618 Dbg->setExpression(*RsrcExpr);
1619 Dbg->replaceVariableLocationOp(
I, Rsrc);
1626 I->replaceUsesWithIf(
Poison, [&](
const Use &U) ->
bool {
1628 return SplitUsers.contains(UI);
1632 if (
I->use_empty()) {
1633 I->eraseFromParent();
1636 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1637 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1638 auto [Rsrc,
Off] = getPtrParts(
I);
1644 I->replaceAllUsesWith(
Struct);
1645 I->eraseFromParent();
1649void SplitPtrStructs::setAlign(CallInst *Intr, Align
A,
unsigned RsrcArgIdx) {
1651 Intr->
addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx,
A));
1657 case AtomicOrdering::Release:
1658 case AtomicOrdering::AcquireRelease:
1659 case AtomicOrdering::SequentiallyConsistent:
1660 IRB.CreateFence(AtomicOrdering::Release, SSID);
1670 case AtomicOrdering::Acquire:
1671 case AtomicOrdering::AcquireRelease:
1672 case AtomicOrdering::SequentiallyConsistent:
1673 IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1681 Type *Ty, Align Alignment,
1684 IRB.SetInsertPoint(
I);
1686 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1689 Args.push_back(Arg);
1690 Args.push_back(Rsrc);
1691 Args.push_back(Off);
1692 insertPreMemOpFence(Order, SSID);
1696 Args.push_back(IRB.getInt32(0));
1701 Args.push_back(IRB.getInt32(Aux));
1705 IID = Order == AtomicOrdering::NotAtomic
1706 ? Intrinsic::amdgcn_raw_ptr_buffer_load
1707 : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;
1709 IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1711 switch (RMW->getOperation()) {
1713 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1716 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1719 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1722 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1725 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1728 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1731 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1734 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1737 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1740 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1743 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1746 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1749 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1753 "atomic floating point subtraction not supported for "
1754 "buffer resources and should've been expanded away");
1759 "atomic floating point fmaximum not supported for "
1760 "buffer resources and should've been expanded away");
1765 "atomic floating point fminimum not supported for "
1766 "buffer resources and should've been expanded away");
1771 "atomic nand not supported for buffer resources and "
1772 "should've been expanded away");
1777 "buffer resources and should've ben expanded away");
1787 auto *
Call = IRB.CreateIntrinsic(IID, Ty, Args);
1788 copyMetadata(
Call,
I);
1789 setAlign(
Call, Alignment, Arg ? 1 : 0);
1792 insertPostMemOpFence(Order, SSID);
1795 SplitUsers.insert(
I);
1796 I->replaceAllUsesWith(
Call);
1800PtrParts SplitPtrStructs::visitInstruction(Instruction &
I) {
1801 return {
nullptr,
nullptr};
1804PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {
1806 return {
nullptr,
nullptr};
1810 return {
nullptr,
nullptr};
1813PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {
1815 return {
nullptr,
nullptr};
1816 Value *Arg =
SI.getValueOperand();
1817 handleMemoryInst(&SI, Arg,
SI.getPointerOperand(), Arg->
getType(),
1818 SI.getAlign(),
SI.getOrdering(),
SI.isVolatile(),
1819 SI.getSyncScopeID());
1820 return {
nullptr,
nullptr};
1823PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {
1825 return {
nullptr,
nullptr};
1830 return {
nullptr,
nullptr};
1835PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
1838 return {
nullptr,
nullptr};
1839 IRB.SetInsertPoint(&AI);
1844 bool IsNonTemporal = AI.
getMetadata(LLVMContext::MD_nontemporal);
1846 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1847 insertPreMemOpFence(Order, SSID);
1855 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1857 Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1858 copyMetadata(
Call, &AI);
1861 insertPostMemOpFence(Order, SSID);
1864 Res = IRB.CreateInsertValue(Res,
Call, 0);
1867 Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1869 SplitUsers.insert(&AI);
1871 return {
nullptr,
nullptr};
1874PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &
GEP) {
1875 using namespace llvm::PatternMatch;
1878 return {
nullptr,
nullptr};
1879 IRB.SetInsertPoint(&
GEP);
1881 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1882 const DataLayout &
DL =
GEP.getDataLayout();
1883 bool IsNUW =
GEP.hasNoUnsignedWrap();
1884 bool IsNUSW =
GEP.hasNoUnsignedSignedWrap();
1895 GEP.mutateType(FatPtrTy);
1897 GEP.mutateType(ResTy);
1899 if (BroadcastsPtr) {
1900 Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
1902 Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
1906 SplitUsers.insert(&
GEP);
1910 bool HasNonNegativeOff =
false;
1912 HasNonNegativeOff = !CI->isNegative();
1918 NewOff = IRB.CreateAdd(Off, OffAccum,
"",
1919 IsNUW || (IsNUSW && HasNonNegativeOff),
1922 copyMetadata(NewOff, &
GEP);
1924 SplitUsers.insert(&
GEP);
1925 return {Rsrc, NewOff};
1928PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) {
1931 return {
nullptr,
nullptr};
1932 IRB.SetInsertPoint(&PI);
1937 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1943 Res = IRB.CreateIntCast(Off, ResTy,
false,
1946 Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.
getName() +
".rsrc");
1947 Value *Shl = IRB.CreateShl(
1950 "", Width >= FatPtrWidth, Width > FatPtrWidth);
1951 Value *OffCast = IRB.CreateIntCast(Off, ResTy,
false,
1953 Res = IRB.CreateOr(Shl, OffCast);
1956 copyMetadata(Res, &PI);
1958 SplitUsers.insert(&PI);
1960 return {
nullptr,
nullptr};
1963PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) {
1966 return {
nullptr,
nullptr};
1967 IRB.SetInsertPoint(&PA);
1969 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1970 Value *Res = IRB.CreateIntCast(Off, PA.
getType(),
false);
1971 copyMetadata(Res, &PA);
1973 SplitUsers.insert(&PA);
1975 return {
nullptr,
nullptr};
1978PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {
1980 return {
nullptr,
nullptr};
1981 IRB.SetInsertPoint(&IP);
1990 Type *RsrcTy = RetTy->getElementType(0);
1991 Type *OffTy = RetTy->getElementType(1);
1992 Value *RsrcPart = IRB.CreateLShr(
1995 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy,
false);
1996 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.
getName() +
".rsrc");
1998 IRB.CreateIntCast(
Int, OffTy,
false, IP.
getName() +
".off");
2000 copyMetadata(Rsrc, &IP);
2001 SplitUsers.insert(&IP);
2005PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &
I) {
2009 return {
nullptr,
nullptr};
2010 IRB.SetInsertPoint(&
I);
2013 if (
In->getType() ==
I.getType()) {
2014 auto [Rsrc,
Off] = getPtrParts(In);
2015 SplitUsers.insert(&
I);
2020 Type *RsrcTy = ResTy->getElementType(0);
2021 Type *OffTy = ResTy->getElementType(1);
2027 if (InConst && InConst->isNullValue()) {
2029 SplitUsers.insert(&
I);
2030 return {NullRsrc, ZeroOff};
2035 SplitUsers.insert(&
I);
2036 return {PoisonRsrc, PoisonOff};
2041 SplitUsers.insert(&
I);
2042 return {UndefRsrc, UndefOff};
2047 "only buffer resources (addrspace 8) and null/poison pointers can be "
2048 "cast to buffer fat pointers (addrspace 7)");
2049 SplitUsers.insert(&
I);
2050 return {
In, ZeroOff};
2053PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {
2056 return {
nullptr,
nullptr};
2058 IRB.SetInsertPoint(&Cmp);
2059 ICmpInst::Predicate Pred =
Cmp.getPredicate();
2061 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2062 "Pointer comparison is only equal or unequal");
2063 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
2064 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
2066 IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc,
Cmp.getName() +
".rsrc");
2067 copyMetadata(RsrcCmp, &Cmp);
2068 Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff,
Cmp.getName() +
".off");
2069 copyMetadata(OffCmp, &Cmp);
2071 Value *Res =
nullptr;
2072 if (Pred == ICmpInst::ICMP_EQ)
2073 Res = IRB.CreateAnd(RsrcCmp, OffCmp);
2074 else if (Pred == ICmpInst::ICMP_NE)
2075 Res = IRB.CreateOr(RsrcCmp, OffCmp);
2076 copyMetadata(Res, &Cmp);
2078 SplitUsers.insert(&Cmp);
2079 Cmp.replaceAllUsesWith(Res);
2080 return {
nullptr,
nullptr};
2083PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &
I) {
2085 return {
nullptr,
nullptr};
2086 IRB.SetInsertPoint(&
I);
2087 auto [Rsrc,
Off] = getPtrParts(
I.getOperand(0));
2089 Value *RsrcRes = IRB.CreateFreeze(Rsrc,
I.getName() +
".rsrc");
2090 copyMetadata(RsrcRes, &
I);
2091 Value *OffRes = IRB.CreateFreeze(Off,
I.getName() +
".off");
2092 copyMetadata(OffRes, &
I);
2093 SplitUsers.insert(&
I);
2094 return {RsrcRes, OffRes};
2097PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &
I) {
2099 return {
nullptr,
nullptr};
2100 IRB.SetInsertPoint(&
I);
2101 Value *Vec =
I.getVectorOperand();
2102 Value *Idx =
I.getIndexOperand();
2103 auto [Rsrc,
Off] = getPtrParts(Vec);
2105 Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx,
I.getName() +
".rsrc");
2106 copyMetadata(RsrcRes, &
I);
2107 Value *OffRes = IRB.CreateExtractElement(Off, Idx,
I.getName() +
".off");
2108 copyMetadata(OffRes, &
I);
2109 SplitUsers.insert(&
I);
2110 return {RsrcRes, OffRes};
2113PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &
I) {
2117 return {
nullptr,
nullptr};
2118 IRB.SetInsertPoint(&
I);
2119 Value *Vec =
I.getOperand(0);
2120 Value *Elem =
I.getOperand(1);
2121 Value *Idx =
I.getOperand(2);
2122 auto [VecRsrc, VecOff] = getPtrParts(Vec);
2123 auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
2126 IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx,
I.getName() +
".rsrc");
2127 copyMetadata(RsrcRes, &
I);
2129 IRB.CreateInsertElement(VecOff, ElemOff, Idx,
I.getName() +
".off");
2130 copyMetadata(OffRes, &
I);
2131 SplitUsers.insert(&
I);
2132 return {RsrcRes, OffRes};
2135PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &
I) {
2138 return {
nullptr,
nullptr};
2139 IRB.SetInsertPoint(&
I);
2141 Value *V1 =
I.getOperand(0);
2142 Value *V2 =
I.getOperand(1);
2143 ArrayRef<int>
Mask =
I.getShuffleMask();
2144 auto [V1Rsrc, V1Off] = getPtrParts(V1);
2145 auto [V2Rsrc, V2Off] = getPtrParts(V2);
2148 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask,
I.getName() +
".rsrc");
2149 copyMetadata(RsrcRes, &
I);
2151 IRB.CreateShuffleVector(V1Off, V2Off, Mask,
I.getName() +
".off");
2152 copyMetadata(OffRes, &
I);
2153 SplitUsers.insert(&
I);
2154 return {RsrcRes, OffRes};
2157PtrParts SplitPtrStructs::visitPHINode(PHINode &
PHI) {
2159 return {
nullptr,
nullptr};
2160 IRB.SetInsertPoint(*
PHI.getInsertionPointAfterDef());
2166 Value *TmpRsrc = IRB.CreateExtractValue(&
PHI, 0,
PHI.getName() +
".rsrc");
2167 Value *TmpOff = IRB.CreateExtractValue(&
PHI, 1,
PHI.getName() +
".off");
2168 Conditionals.push_back(&
PHI);
2169 SplitUsers.insert(&
PHI);
2170 return {TmpRsrc, TmpOff};
2173PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) {
2175 return {
nullptr,
nullptr};
2176 IRB.SetInsertPoint(&SI);
2179 Value *True =
SI.getTrueValue();
2180 Value *False =
SI.getFalseValue();
2181 auto [TrueRsrc, TrueOff] = getPtrParts(True);
2182 auto [FalseRsrc, FalseOff] = getPtrParts(False);
2185 IRB.CreateSelect(
Cond, TrueRsrc, FalseRsrc,
SI.getName() +
".rsrc", &SI);
2186 copyMetadata(RsrcRes, &SI);
2187 Conditionals.push_back(&SI);
2189 IRB.CreateSelect(
Cond, TrueOff, FalseOff,
SI.getName() +
".off", &SI);
2190 copyMetadata(OffRes, &SI);
2191 SplitUsers.insert(&SI);
2192 return {RsrcRes, OffRes};
2203 case Intrinsic::amdgcn_make_buffer_rsrc:
2204 case Intrinsic::ptrmask:
2205 case Intrinsic::invariant_start:
2206 case Intrinsic::invariant_end:
2207 case Intrinsic::launder_invariant_group:
2208 case Intrinsic::strip_invariant_group:
2209 case Intrinsic::memcpy:
2210 case Intrinsic::memcpy_inline:
2211 case Intrinsic::memmove:
2212 case Intrinsic::memset:
2213 case Intrinsic::memset_inline:
2214 case Intrinsic::experimental_memset_pattern:
2215 case Intrinsic::amdgcn_load_to_lds:
2220PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &
I) {
2225 case Intrinsic::amdgcn_make_buffer_rsrc: {
2227 return {
nullptr,
nullptr};
2229 Value *Stride =
I.getArgOperand(1);
2230 Value *NumRecords =
I.getArgOperand(2);
2233 Type *RsrcType = SplitType->getElementType(0);
2234 Type *OffType = SplitType->getElementType(1);
2235 IRB.SetInsertPoint(&
I);
2236 Value *Rsrc = IRB.CreateIntrinsic(IID, {RsrcType,
Base->getType()},
2238 copyMetadata(Rsrc, &
I);
2241 SplitUsers.insert(&
I);
2242 return {Rsrc,
Zero};
2244 case Intrinsic::ptrmask: {
2247 return {
nullptr,
nullptr};
2249 IRB.SetInsertPoint(&
I);
2250 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2251 if (
Mask->getType() !=
Off->getType())
2253 "pointer (data layout not set up correctly?)");
2254 Value *OffRes = IRB.CreateAnd(Off, Mask,
I.getName() +
".off");
2255 copyMetadata(OffRes, &
I);
2256 SplitUsers.insert(&
I);
2257 return {Rsrc, OffRes};
2261 case Intrinsic::invariant_start: {
2264 return {
nullptr,
nullptr};
2265 IRB.SetInsertPoint(&
I);
2266 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2268 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {
I.getOperand(0), Rsrc});
2269 copyMetadata(NewRsrc, &
I);
2271 SplitUsers.insert(&
I);
2272 I.replaceAllUsesWith(NewRsrc);
2273 return {
nullptr,
nullptr};
2275 case Intrinsic::invariant_end: {
2276 Value *RealPtr =
I.getArgOperand(2);
2278 return {
nullptr,
nullptr};
2279 IRB.SetInsertPoint(&
I);
2280 Value *RealRsrc = getPtrParts(RealPtr).first;
2281 Value *InvPtr =
I.getArgOperand(0);
2283 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->
getType()},
2284 {InvPtr,
Size, RealRsrc});
2285 copyMetadata(NewRsrc, &
I);
2287 SplitUsers.insert(&
I);
2288 I.replaceAllUsesWith(NewRsrc);
2289 return {
nullptr,
nullptr};
2291 case Intrinsic::launder_invariant_group:
2292 case Intrinsic::strip_invariant_group: {
2295 return {
nullptr,
nullptr};
2296 IRB.SetInsertPoint(&
I);
2297 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2298 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->
getType()}, {Rsrc});
2299 copyMetadata(NewRsrc, &
I);
2301 SplitUsers.insert(&
I);
2302 return {NewRsrc,
Off};
2304 case Intrinsic::amdgcn_load_to_lds: {
2307 return {
nullptr,
nullptr};
2308 IRB.SetInsertPoint(&
I);
2309 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2310 Value *LDSPtr =
I.getArgOperand(1);
2311 Value *LoadSize =
I.getArgOperand(2);
2312 Value *ImmOff =
I.getArgOperand(3);
2313 Value *Aux =
I.getArgOperand(4);
2314 Value *SOffset = IRB.getInt32(0);
2316 Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},
2317 {Rsrc, LDSPtr, LoadSize,
Off, SOffset, ImmOff, Aux});
2318 copyMetadata(NewLoad, &
I);
2319 SplitUsers.insert(&
I);
2320 I.replaceAllUsesWith(NewLoad);
2321 return {
nullptr,
nullptr};
2324 return {
nullptr,
nullptr};
2327void SplitPtrStructs::processFunction(Function &
F) {
2331 LLVM_DEBUG(
dbgs() <<
"Splitting pointer structs in function: " <<
F.getName()
2333 for (Instruction *
I : Originals) {
2338 if (SplitUsers.contains(
I))
2341 assert(((Rsrc && Off) || (!Rsrc && !Off)) &&
2342 "Can't have a resource but no offset");
2344 RsrcParts[
I] = Rsrc;
2348 processConditionals();
2349 killAndReplaceSplitInstructions(Originals);
2355 Conditionals.clear();
2356 ConditionalTemps.clear();
2360class AMDGPULowerBufferFatPointers :
public ModulePass {
2364 AMDGPULowerBufferFatPointers() : ModulePass(
ID) {}
2366 bool run(
Module &M,
const TargetMachine &TM);
2367 bool runOnModule(
Module &M)
override;
2369 void getAnalysisUsage(AnalysisUsage &AU)
const override;
2377 BufferFatPtrToStructTypeMap *TypeMap) {
2378 bool HasFatPointers =
false;
2381 HasFatPointers |= (
I.getType() != TypeMap->remapType(
I.getType()));
2383 for (
const Value *V :
I.operand_values())
2384 HasFatPointers |= (V->getType() != TypeMap->remapType(V->getType()));
2386 return HasFatPointers;
2390 BufferFatPtrToStructTypeMap *TypeMap) {
2391 Type *Ty =
F.getFunctionType();
2392 return Ty != TypeMap->remapType(Ty);
2408 while (!OldF->
empty()) {
2422 CloneMap[&NewArg] = &OldArg;
2423 NewArg.takeName(&OldArg);
2424 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
2426 NewArg.mutateType(OldArgTy);
2427 OldArg.replaceAllUsesWith(&NewArg);
2428 NewArg.mutateType(NewArgTy);
2432 if (OldArgTy != NewArgTy && !IsIntrinsic)
2435 AttributeFuncs::typeIncompatible(NewArgTy, ArgAttr));
2442 AttributeFuncs::typeIncompatible(NewF->
getReturnType(), RetAttrs));
2444 NewF->
getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs));
2452 CloneMap[&BB] = &BB;
2458bool AMDGPULowerBufferFatPointers::run(
Module &M,
const TargetMachine &TM) {
2460 const DataLayout &
DL =
M.getDataLayout();
2466 LLVMContext &Ctx =
M.getContext();
2468 BufferFatPtrToStructTypeMap StructTM(
DL);
2469 BufferFatPtrToIntTypeMap IntTM(
DL);
2470 for (
const GlobalVariable &GV :
M.globals()) {
2473 Ctx.
emitError(
"global variables with a buffer fat pointer address "
2474 "space (7) are not supported");
2478 Type *VT = GV.getValueType();
2479 if (VT != StructTM.remapType(VT)) {
2481 Ctx.
emitError(
"global variables that contain buffer fat pointers "
2482 "(address space 7 pointers) are unsupported. Use "
2483 "buffer resource pointers (address space 8) instead");
2491 for (Function &
F :
M.functions())
2498 SmallPtrSet<Constant *, 8> Visited;
2499 SetVector<Constant *> BufferFatPtrConsts;
2500 while (!Worklist.
empty()) {
2502 if (!Visited.
insert(
C).second)
2518 StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM,
DL,
2519 M.getContext(), &TM);
2520 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(
DL,
2522 for (Function &
F :
M.functions()) {
2525 Changed |= MemOpsRewrite.processFunction(
F);
2526 if (InterfaceChange || BodyChanges) {
2527 NeedsRemap.
push_back(std::make_pair(&
F, InterfaceChange));
2528 Changed |= BufferContentsTypeRewrite.processFunction(
F);
2531 if (NeedsRemap.
empty())
2538 FatPtrConstMaterializer Materializer(&StructTM, CloneMap);
2540 ValueMapper LowerInFuncs(CloneMap,
RF_None, &StructTM, &Materializer);
2541 for (
auto [
F, InterfaceChange] : NeedsRemap) {
2543 if (InterfaceChange)
2549 LowerInFuncs.remapFunction(*NewF);
2554 if (InterfaceChange) {
2555 F->replaceAllUsesWith(NewF);
2556 F->eraseFromParent();
2564 SplitPtrStructs Splitter(
DL,
M.getContext(), &TM);
2565 for (Function *
F : NeedsPostProcess)
2566 Splitter.processFunction(*
F);
2567 for (Function *
F : Intrinsics) {
2571 F->eraseFromParent();
2575 F->replaceAllUsesWith(*NewF);
2581bool AMDGPULowerBufferFatPointers::runOnModule(
Module &M) {
2582 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
2583 const TargetMachine &
TM = TPC.
getTM<TargetMachine>();
2587char AMDGPULowerBufferFatPointers::ID = 0;
2591void AMDGPULowerBufferFatPointers::getAnalysisUsage(
AnalysisUsage &AU)
const {
2595#define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
2604 return new AMDGPULowerBufferFatPointers();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
static Function * moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, ValueToValueMapTy &CloneMap)
Move the body of OldF into a new function, returning it.
static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap)
static bool isBufferFatPtrOrVector(Type *Ty)
static bool isSplitFatPtr(Type *Ty)
std::pair< Value *, Value * > PtrParts
static bool hasFatPointerInterface(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
static bool isRemovablePointerIntrinsic(Intrinsic::ID IID)
Returns true if this intrinsic needs to be removed when it is applied to ptr addrspace(7) values.
static bool containsBufferFatPointers(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
Returns true if there are values that have a buffer fat pointer in them, which means we'll need to pe...
static Value * rsrcPartRoot(Value *V)
Returns the instruction that defines the resource part of the value V.
static constexpr unsigned BufferOffsetWidth
static bool isBufferFatPtrConst(Constant *C)
static std::pair< Constant *, Constant * > splitLoweredFatBufferConst(Constant *C)
Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered buffer fat pointer const...
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
AMD GCN specific subclass of TargetSubtarget.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Machine Check Debug Module
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const
Remove the specified attributes from this set.
LLVM Basic Block Representation.
LLVM_ABI void removeFromParent()
Unlink 'this' from the containing function, but do not delete it.
LLVM_ABI void insertInto(Function *Parent, BasicBlock *InsertBefore=nullptr)
Insert unlinked basic block into a function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void insertBefore(DbgRecord *InsertBefore)
LLVM_ABI void eraseFromParent()
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
void setExpression(DIExpression *NewExpr)
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & front() const
iterator_range< arg_iterator > args()
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void updateAfterNameChange()
Update internal caches that depend on the function name (such as the intrinsic ID and libcall cache).
Type * getReturnType() const
Returns the type of the ret val.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
static GEPNoWrapFlags noUnsignedWrap()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
LinkageTypes getLinkage() const
void setDLLStorageClass(DLLStorageClassTypes C)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
DLLStorageClassTypes getDLLStorageClass() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
Base class for instruction visitors.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class represents a cast from an integer to a pointer.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
unsigned getDestAddressSpace() const
unsigned getSourceAddressSpace() const
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
This class represents the LLVM 'select' instruction.
ArrayRef< value_type > getArrayRef() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
MutableArrayRef< TypeSize > getMemberOffsets()
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition.
Type * getElementType(unsigned N) const
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const
Return a TargetTransformInfo for a given function.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getArrayElementType() const
ArrayRef< Type * > subtypes() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
This is a class that can be implemented by clients to remap types when cloning constants and instruct...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
iterator find(const KeyT &Val)
ValueMapIteratorImpl< MapT, const Value *, false > iterator
LLVM_ABI Constant * mapConstant(const Constant &C)
LLVM_ABI Value * mapValue(const Value &V)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
constexpr ScalarTy getFixedValue() const
self_iterator getIterator()
iterator insertAfter(iterator where, pointer New)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
ModulePass * createAMDGPULowerBufferFatPointersPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
char & AMDGPULowerBufferFatPointersID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.