24#define DEBUG_TYPE "lower-mem-intrinsics"
35 Value *OpSize,
unsigned OpSizeVal) {
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
48 Value *RTLoopRemainder =
nullptr) {
51 return B.CreateSub(Len, RTLoopRemainder);
56struct LoopExpansionInfo {
61 Value *MainLoopIndex =
nullptr;
69 Value *ResidualLoopIndex =
nullptr;
72std::optional<uint64_t> getAverageMemOpLoopTripCount(
const MemIntrinsic &
I) {
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !
EC->getCount())
79 if (
const auto Len =
I.getLengthInBytes())
80 return Len->getZExtValue();
84 std::numeric_limits<uint32_t>::max(),
Total);
88 for (
const auto &
P : ProfData)
89 TripCount +=
P.Count *
P.Value;
90 return std::round(1.0 * TripCount /
Total);
127static LoopExpansionInfo
129 unsigned MainLoopStep,
unsigned ResidualLoopStep,
131 std::optional<uint64_t> AverageTripCount) {
132 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
133 "ResidualLoopStep must divide MainLoopStep if specified");
134 assert(ResidualLoopStep <= MainLoopStep &&
135 "ResidualLoopStep cannot be larger than MainLoopStep");
136 assert(MainLoopStep > 0 &&
"MainLoopStep must be non-zero");
137 LoopExpansionInfo LEI;
140 InsertBefore, BBNamePrefix +
"-post-expansion");
149 Type *LenType = Len->getType();
151 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
153 Value *LoopUnits = Len;
154 Value *ResidualUnits =
nullptr;
157 bool MustTakeMainLoop =
false;
158 if (MainLoopStep != 1) {
160 uint64_t TotalUnits = CLen->getZExtValue();
162 uint64_t ResidualCount = TotalUnits - LoopEndCount;
163 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
164 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
165 MustTakeMainLoop = LoopEndCount > 0;
173 CIMainLoopStep, MainLoopStep);
175 MainLoopStep, ResidualUnits);
178 MustTakeMainLoop = CLen->getZExtValue() > 0;
182 Ctx, BBNamePrefix +
"-expansion-main-body", ParentFunc, PostLoopBB);
187 LEI.MainLoopIndex = LoopIndex;
188 LoopIndex->
addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
191 LoopBuilder.
CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
198 if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
209 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
210 if (MustTakeMainLoop)
211 PreLoopBuilder.
CreateBr(MainLoopBB);
214 PreLoopBuilder.
CreateICmpNE(LoopUnits, Zero), MainLoopBB,
216 if (AverageTripCount.has_value()) {
219 {AverageTripCount.value() % MainLoopStep, 1},
230 MainLoopBB, ResidualCondBB);
236 ResLoopBB, PostLoopBB);
241 ResBuilder.
CreatePHI(LenType, 2,
"residual-loop-index");
246 Value *FullOffset = ResBuilder.
CreateAdd(LoopUnits, ResidualIndex);
247 LEI.ResidualLoopIndex = FullOffset;
250 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
251 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
259 ResBuilder.
CreateICmpULT(ResNewIndex, ResidualUnits), ResLoopBB,
267 if (MustTakeMainLoop) {
268 PreLoopBuilder.
CreateBr(MainLoopBB);
270 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
273 MainLoopBB, PostLoopBB,
274 B.createLikelyBranchWeights());
279 LoopBuilder.
CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
280 if (AverageTripCount.has_value())
292 bool SrcIsVolatile,
bool DstIsVolatile,
295 std::optional<uint32_t> AtomicElementSize,
296 std::optional<uint64_t> AverageTripCount) {
314 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
315 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
317 "Atomic memcpy lowering is not supported for vector operand type");
320 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
321 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
322 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
323 "Atomic memcpy lowering is not supported for selected operand size");
329 if (LoopEndCount != 0) {
330 LoopExpansionInfo LEI =
332 "static-memcpy", AverageTripCount);
346 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
349 Load->setMetadata(LLVMContext::MD_alias_scope,
355 Load, DstGEP, PartDstAlign, DstIsVolatile);
358 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
360 if (AtomicElementSize) {
364 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
365 "No residual loop was requested");
369 uint64_t BytesCopied = LoopEndCount;
371 if (RemainingBytes == 0)
376 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
377 SrcAS, DstAS, SrcAlign, DstAlign,
380 for (
auto *OpTy : RemainingOps) {
384 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
385 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
386 "Atomic memcpy lowering is not supported for selected operand size");
389 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
394 Load->setMetadata(LLVMContext::MD_alias_scope,
398 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
403 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
405 if (AtomicElementSize) {
409 BytesCopied += OperandSize;
412 "Bytes copied should match size in the call!");
417 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
419 std::optional<uint32_t> AtomicElementSize,
420 std::optional<uint64_t> AverageTripCount) {
426 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
428 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
433 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
434 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
436 "Atomic memcpy lowering is not supported for vector operand type");
437 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
438 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
439 "Atomic memcpy lowering is not supported for selected operand size");
443 Type *ResidualLoopOpType = AtomicElementSize
446 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
447 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
448 "Store size is expected to match type size");
450 LoopExpansionInfo LEI =
452 "dynamic-memcpy", AverageTripCount);
464 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
465 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
466 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
469 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
472 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
474 Load, DstGEP, PartDstAlign, DstIsVolatile);
479 if (AtomicElementSize) {
485 if (!LEI.ResidualLoopIP)
492 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
493 LEI.ResidualLoopIndex);
494 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
495 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
501 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
502 LEI.ResidualLoopIndex);
503 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
504 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
509 if (AtomicElementSize) {
519static std::pair<Value *, Value *>
522 Value *ResAddr1 = Addr1;
523 Value *ResAddr2 = Addr2;
528 if (
TTI.isValidAddrSpaceCast(AS2, AS1))
529 ResAddr2 =
B.CreateAddrSpaceCast(Addr2, Addr1->
getType());
530 else if (
TTI.isValidAddrSpaceCast(AS1, AS2))
531 ResAddr1 =
B.CreateAddrSpaceCast(Addr1, Addr2->
getType());
534 "support addrspacecast");
536 return {ResAddr1, ResAddr2};
568 Align DstAlign,
bool SrcIsVolatile,
579 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
581 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
583 bool LoopOpIsInt8 = LoopOpType == Int8Type;
587 bool RequiresResidual = !LoopOpIsInt8;
589 Type *ResidualLoopOpType = Int8Type;
590 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
594 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
596 ConstantInt::get(ILengthType, ResidualLoopOpSize);
597 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
603 Value *RuntimeLoopBytes = CopyLen;
604 Value *RuntimeLoopRemainder =
nullptr;
605 Value *SkipResidualCondition =
nullptr;
606 if (RequiresResidual) {
607 RuntimeLoopRemainder =
610 LoopOpSize, RuntimeLoopRemainder);
611 SkipResidualCondition =
612 PLBuilder.
CreateICmpEQ(RuntimeLoopRemainder, Zero,
"skip_residual");
614 Value *SkipMainCondition =
615 PLBuilder.
CreateICmpEQ(RuntimeLoopBytes, Zero,
"skip_main");
626 auto [CmpSrcAddr, CmpDstAddr] =
629 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
632 &ThenTerm, &ElseTerm);
659 CopyBackwardsBB->
setName(
"memmove_copy_backwards");
661 CopyForwardBB->
setName(
"memmove_copy_forward");
663 ExitBB->
setName(
"memmove_done");
676 F->getContext(),
"memmove_bwd_main_loop",
F, CopyForwardBB);
682 if (RequiresResidual) {
685 F->getContext(),
"memmove_bwd_residual_loop",
F, MainLoopBB);
690 ResidualLoopPhi, CIResidualLoopOpSize,
"bwd_residual_index");
698 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
703 ResidualDstAlign, DstIsVolatile);
707 F->getContext(),
"memmove_bwd_middle",
F, MainLoopBB);
713 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
714 IntermediateBB, ResidualLoopBB);
716 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
717 ResidualLoopPhi->
addIncoming(CopyLen, CopyBackwardsBB);
726 PredBB = IntermediateBB;
734 MainLoopBuilder.
CreateSub(MainLoopPhi, CILoopOpSize,
"bwd_main_index");
738 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
746 MainLoopPhi->
addIncoming(RuntimeLoopBytes, PredBB);
751 ExitBB, MainLoopBB, SkipMainCondition, PredBBTerm->
getIterator());
764 MainLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_main_index");
768 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
773 Value *MainIndex = MainLoopBuilder.
CreateAdd(MainLoopPhi, CILoopOpSize);
779 if (RequiresResidual)
785 MainLoopBuilder.
CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
795 if (RequiresResidual) {
800 F->getContext(),
"memmove_fwd_residual_loop",
F, ExitBB);
801 IntermediateBuilder.
CreateCondBr(SkipResidualCondition, ExitBB,
808 ResidualLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_residual_index");
812 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
817 ResidualDstAlign, DstIsVolatile);
818 Value *ResidualIndex =
819 ResidualLoopBuilder.
CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
821 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
823 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
824 ResidualLoopPhi->
addIncoming(RuntimeLoopBytes, IntermediateBB);
835 Align DstAlign,
bool SrcIsVolatile,
850 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
852 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
853 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
862 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
863 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
864 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
870 auto [CmpSrcAddr, CmpDstAddr] =
873 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
876 &ThenTerm, &ElseTerm);
881 ExitBB->
setName(
"memmove_done");
893 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
899 Value *SrcGEP = Builder.CreateInBoundsGEP(
900 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
902 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
903 Value *DstGEP = Builder.CreateInBoundsGEP(
904 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
905 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
906 BytesCopied += OperandSize;
910 if (RemainingBytes != 0) {
911 CopyBackwardsBB->
setName(
"memmove_bwd_residual");
912 uint64_t BytesCopied = BytesCopiedInLoop;
923 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
924 SrcAS, DstAS, PartSrcAlign,
926 for (
auto *OpTy : RemainingOps) {
930 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
933 if (BytesCopiedInLoop != 0) {
936 if (RemainingBytes != 0) {
940 PredBB = CopyBackwardsBB;
942 CopyBackwardsBB->
setName(
"memmove_bwd_loop");
947 Value *Index = LoopBuilder.
CreateSub(LoopPhi, CILoopOpSize,
"bwd_index");
950 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
968 if (BytesCopiedInLoop != 0) {
969 CopyForwardBB->
setName(
"memmove_fwd_loop");
972 if (RemainingBytes != 0) {
975 "memmove_fwd_residual");
976 FwdResidualBB = SuccBB;
983 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
998 if (RemainingBytes != 0) {
999 uint64_t BytesCopied = BytesCopiedInLoop;
1006 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1007 SrcAS, DstAS, PartSrcAlign,
1009 for (
auto *OpTy : RemainingOps)
1010 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1018 TypeSize DstSize =
DL.getTypeStoreSize(DstType);
1020 TypeSize SetValueSize =
DL.getTypeStoreSize(SetValueType);
1021 assert(SetValueSize ==
DL.getTypeAllocSize(SetValueType) &&
1022 "Store size and alloc size of SetValue's type must match");
1023 assert(SetValueSize != 0 && DstSize % SetValueSize == 0 &&
1024 "DstType size must be a multiple of SetValue size");
1027 if (DstSize != SetValueSize) {
1036 B.CreateVectorSplat(DstSize / SetValueSize, Result,
"setvalue.splat");
1041 Result =
B.CreateBitCast(Result, DstType,
"setvalue.splat.cast");
1049 std::optional<uint64_t> AverageTripCount) {
1061 Type *TypeOfLen = Len->getType();
1065 Type *LoopOpType = Int8Type;
1069 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1070 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1072 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1073 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1078 if (LoopEndCount != 0) {
1079 Value *SplatSetValue =
nullptr;
1089 InsertBefore, Len, LoopOpSize, 0,
"static-memset", AverageTripCount);
1101 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
1102 "No residual loop was requested");
1106 uint64_t RemainingBytes = Len->getZExtValue() - BytesSet;
1107 if (RemainingBytes == 0)
1112 assert(
TTI &&
"there cannot be a residual loop without TTI");
1114 TTI->getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1115 DstAS, DstAS, DstAlign, DstAlign,
1118 Type *PreviousOpTy =
nullptr;
1119 Value *SplatSetValue =
nullptr;
1120 for (
auto *OpTy : RemainingOps) {
1121 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
1123 "Operand types cannot be scalable vector types");
1128 if (OpTy != PreviousOpTy)
1132 Int8Type, DstAddr, ConstantInt::get(TypeOfLen, BytesSet));
1135 BytesSet += OperandSize;
1136 PreviousOpTy = OpTy;
1138 assert(BytesSet == Len->getZExtValue() &&
1139 "Bytes set should match size in the call!");
1146 std::optional<uint64_t> AverageTripCount) {
1157 Type *LoopOpType = Int8Type;
1159 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1160 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1162 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1163 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1165 Type *ResidualLoopOpType = Int8Type;
1166 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
1174 LoopExpansionInfo LEI =
1176 "dynamic-memset", AverageTripCount);
1188 if (!LEI.ResidualLoopIP)
1196 LEI.ResidualLoopIndex);
1203 std::optional<uint64_t> AverageTripCount,
1219 Builder.SetCurrentDebugLocation(DbgLoc);
1221 auto *ToLoopBR = Builder.CreateCondBr(
1222 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
1225 if (AverageTripCount.has_value())
1226 ToLoopBR->setMetadata(LLVMContext::MD_prof,
1239 LoopIndex->
addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
1244 PartAlign, IsVolatile);
1247 LoopBuilder.
CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
1251 LoopBuilder.
CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
1252 if (AverageTripCount.has_value())
1259template <
typename T>
1263 const SCEV *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
1274 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1314 bool DstIsVolatile = SrcIsVolatile;
1320 if (SrcAS != DstAS) {
1321 if (!
TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1324 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1327 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1328 SrcIsVolatile, DstIsVolatile,
1329 false,
TTI, std::nullopt, AverageTripCount);
1332 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1333 DstAlign, SrcIsVolatile, DstIsVolatile,
1334 false,
TTI, std::nullopt, AverageTripCount);
1340 if (!(
TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1341 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1346 dbgs() <<
"Do not know how to expand memmove between different "
1347 "address spaces\n");
1354 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1355 SrcIsVolatile, DstIsVolatile,
TTI);
1358 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1359 SrcIsVolatile, DstIsVolatile,
TTI);
1366 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memset);
1401 getAverageMemOpLoopTripCount(*Memset),
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static Value * createMemSetSplat(const DataLayout &DL, IRBuilderBase &B, Value *SetValue, Type *DstType)
Create a Value of DstType that consists of a sequence of copies of SetValue, using bitcasts and a vec...
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > AverageTripCount)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoopUnknownSize(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemSetLoopKnownSize(Instruction *InsertBefore, Value *DstAddr, ConstantInt *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, std::optional< uint64_t > AverageTripCount, bool IsVolatile)
This file contains the declarations for profiling metadata utility functions.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
A parsed version of the target data layout string in and methods for querying it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.