24#define DEBUG_TYPE "lower-mem-intrinsics"
35 Value *OpSize,
unsigned OpSizeVal) {
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
48 Value *RTLoopRemainder =
nullptr) {
51 return B.CreateSub(Len, RTLoopRemainder);
56struct LoopExpansionInfo {
61 Value *MainLoopIndex =
nullptr;
69 Value *ResidualLoopIndex =
nullptr;
72std::optional<uint64_t> getAverageMemOpLoopTripCount(
const MemIntrinsic &
I) {
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !
EC->getCount())
79 if (
const auto Len =
I.getLengthInBytes())
80 return Len->getZExtValue();
84 std::numeric_limits<uint32_t>::max(),
Total);
88 for (
const auto &
P : ProfData)
89 TripCount +=
P.Count *
P.Value;
90 return std::round(1.0 * TripCount /
Total);
130static LoopExpansionInfo
132 unsigned MainLoopStep,
unsigned ResidualLoopStep,
134 std::optional<uint64_t> ExpectedUnits) {
135 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
136 "ResidualLoopStep must divide MainLoopStep if specified");
137 assert(ResidualLoopStep <= MainLoopStep &&
138 "ResidualLoopStep cannot be larger than MainLoopStep");
139 assert(MainLoopStep > 0 &&
"MainLoopStep must be non-zero");
140 LoopExpansionInfo LEI;
149 InsertBefore, BBNamePrefix +
"-post-expansion");
158 Type *LenType = Len->getType();
160 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
161 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
169 bool MustTakeMainLoop =
false;
170 bool MayTakeMainLoop =
true;
171 bool MustTakeResidualLoop =
false;
172 bool MayTakeResidualLoop =
true;
174 Value *LoopUnits = Len;
175 Value *ResidualUnits =
nullptr;
176 if (MainLoopStep != 1) {
178 uint64_t TotalUnits = CLen->getZExtValue();
180 uint64_t ResidualCount = TotalUnits - LoopEndCount;
181 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
182 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
183 MustTakeMainLoop = LoopEndCount > 0;
184 MayTakeMainLoop = MustTakeMainLoop;
185 MustTakeResidualLoop = ResidualCount > 0;
186 MayTakeResidualLoop = MustTakeResidualLoop;
192 CIMainLoopStep, MainLoopStep);
194 MainLoopStep, ResidualUnits);
197 MustTakeMainLoop = CLen->getZExtValue() > 0;
198 MayTakeMainLoop = MustTakeMainLoop;
203 assert((MayTakeMainLoop || MayTakeResidualLoop) &&
204 "At least one of the loops must be generated");
210 if (MayTakeMainLoop) {
212 ParentFunc, PostLoopBB);
217 LEI.MainLoopIndex = LoopIndex;
218 LoopIndex->
addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
221 LoopIndex, ConstantInt::get(LenType, MainLoopStep));
231 LoopBuilder.
CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
233 if (ExpectedUnits.has_value()) {
234 uint64_t BackedgeTakenCount = ExpectedUnits.value() / MainLoopStep;
235 if (BackedgeTakenCount > 0)
236 BackedgeTakenCount -= 1;
247 bool ResidualLoopRequested =
248 ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep;
251 if (ResidualLoopRequested && MayTakeResidualLoop) {
265 if (MustTakeResidualLoop) {
267 PredOfResLoopBody = MainLoopBB;
279 ResidualLoopBB, PostLoopBB);
280 if (ExpectedUnits.has_value()) {
282 BR->setMetadata(LLVMContext::MD_prof,
289 PredOfResLoopBody = ResidualCondBB;
296 ResBuilder.
CreatePHI(LenType, 2,
"residual-loop-index");
297 ResidualIndex->
addIncoming(Zero, PredOfResLoopBody);
303 LEI.ResidualLoopIndex = ResBuilder.
CreateAdd(LoopUnits, ResidualIndex);
305 LEI.ResidualLoopIndex = ResidualIndex;
308 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
309 ResidualIndex->
addIncoming(ResNewIndex, ResidualLoopBB);
317 ResBuilder.
CreateICmpULT(ResNewIndex, ResidualUnits), ResidualLoopBB,
320 if (ExpectedUnits.has_value()) {
322 (ExpectedUnits.value() % MainLoopStep) / ResidualLoopStep;
323 if (BackedgeTakenCount > 0)
324 BackedgeTakenCount -= 1;
334 if (MustTakeMainLoop) {
338 PreLoopBuilder.
CreateBr(MainLoopBB);
339 }
else if (!MainLoopBB && ResidualLoopBB) {
340 if (MustTakeResidualLoop) {
343 PreLoopBuilder.
CreateBr(ResidualLoopBB);
349 PreLoopBuilder.
CreateICmpNE(ResidualUnits, Zero), ResidualLoopBB,
351 if (ExpectedUnits.has_value()) {
362 if (ResidualCondBB) {
365 FalseBB = ResidualCondBB;
366 }
else if (ResidualLoopBB) {
370 assert(MustTakeResidualLoop);
371 FalseBB = ResidualLoopBB;
375 PreLoopBuilder.
CreateICmpNE(LoopUnits, Zero), MainLoopBB, FalseBB);
377 if (ExpectedUnits.has_value()) {
393 bool SrcIsVolatile,
bool DstIsVolatile,
396 std::optional<uint32_t> AtomicElementSize,
397 std::optional<uint64_t> AverageTripCount) {
415 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
416 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
418 "Atomic memcpy lowering is not supported for vector operand type");
421 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
422 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
423 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
424 "Atomic memcpy lowering is not supported for selected operand size");
430 if (LoopEndCount != 0) {
431 LoopExpansionInfo LEI =
433 "static-memcpy", AverageTripCount);
434 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
435 "Main loop should be generated for non-zero loop count");
449 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
452 Load->setMetadata(LLVMContext::MD_alias_scope,
458 Load, DstGEP, PartDstAlign, DstIsVolatile);
461 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
463 if (AtomicElementSize) {
467 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
468 "No residual loop was requested");
472 uint64_t BytesCopied = LoopEndCount;
474 if (RemainingBytes == 0)
479 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
480 SrcAS, DstAS, SrcAlign, DstAlign,
483 for (
auto *OpTy : RemainingOps) {
487 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
488 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
489 "Atomic memcpy lowering is not supported for selected operand size");
492 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
497 Load->setMetadata(LLVMContext::MD_alias_scope,
501 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
506 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
508 if (AtomicElementSize) {
512 BytesCopied += OperandSize;
515 "Bytes copied should match size in the call!");
520 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
522 std::optional<uint32_t> AtomicElementSize,
523 std::optional<uint64_t> AverageTripCount) {
529 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
531 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
536 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
537 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
539 "Atomic memcpy lowering is not supported for vector operand type");
540 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
541 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
542 "Atomic memcpy lowering is not supported for selected operand size");
546 Type *ResidualLoopOpType = AtomicElementSize
549 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
550 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
551 "Store size is expected to match type size");
553 LoopExpansionInfo LEI =
555 "dynamic-memcpy", AverageTripCount);
556 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
557 "Main loop should be generated for unknown size copy");
569 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
570 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
571 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
574 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
577 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
579 Load, DstGEP, PartDstAlign, DstIsVolatile);
584 if (AtomicElementSize) {
590 if (!LEI.ResidualLoopIP)
597 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
598 LEI.ResidualLoopIndex);
599 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
600 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
606 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
607 LEI.ResidualLoopIndex);
608 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
609 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
614 if (AtomicElementSize) {
624static std::pair<Value *, Value *>
627 Value *ResAddr1 = Addr1;
628 Value *ResAddr2 = Addr2;
633 if (
TTI.isValidAddrSpaceCast(AS2, AS1))
634 ResAddr2 =
B.CreateAddrSpaceCast(Addr2, Addr1->
getType());
635 else if (
TTI.isValidAddrSpaceCast(AS1, AS2))
636 ResAddr1 =
B.CreateAddrSpaceCast(Addr1, Addr2->
getType());
639 "support addrspacecast");
641 return {ResAddr1, ResAddr2};
673 Align DstAlign,
bool SrcIsVolatile,
684 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
686 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
688 bool LoopOpIsInt8 = LoopOpType == Int8Type;
692 bool RequiresResidual = !LoopOpIsInt8;
694 Type *ResidualLoopOpType = Int8Type;
695 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
699 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
701 ConstantInt::get(ILengthType, ResidualLoopOpSize);
702 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
708 Value *RuntimeLoopBytes = CopyLen;
709 Value *RuntimeLoopRemainder =
nullptr;
710 Value *SkipResidualCondition =
nullptr;
711 if (RequiresResidual) {
712 RuntimeLoopRemainder =
715 LoopOpSize, RuntimeLoopRemainder);
716 SkipResidualCondition =
717 PLBuilder.
CreateICmpEQ(RuntimeLoopRemainder, Zero,
"skip_residual");
719 Value *SkipMainCondition =
720 PLBuilder.
CreateICmpEQ(RuntimeLoopBytes, Zero,
"skip_main");
731 auto [CmpSrcAddr, CmpDstAddr] =
734 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
737 &ThenTerm, &ElseTerm);
764 CopyBackwardsBB->
setName(
"memmove_copy_backwards");
766 CopyForwardBB->
setName(
"memmove_copy_forward");
768 ExitBB->
setName(
"memmove_done");
781 F->getContext(),
"memmove_bwd_main_loop",
F, CopyForwardBB);
787 if (RequiresResidual) {
790 F->getContext(),
"memmove_bwd_residual_loop",
F, MainLoopBB);
795 ResidualLoopPhi, CIResidualLoopOpSize,
"bwd_residual_index");
803 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
808 ResidualDstAlign, DstIsVolatile);
812 F->getContext(),
"memmove_bwd_middle",
F, MainLoopBB);
818 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
819 IntermediateBB, ResidualLoopBB);
821 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
822 ResidualLoopPhi->
addIncoming(CopyLen, CopyBackwardsBB);
831 PredBB = IntermediateBB;
839 MainLoopBuilder.
CreateSub(MainLoopPhi, CILoopOpSize,
"bwd_main_index");
843 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
851 MainLoopPhi->
addIncoming(RuntimeLoopBytes, PredBB);
856 SkipMainCondition, ExitBB, MainLoopBB, PredBBTerm->
getIterator());
869 MainLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_main_index");
873 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
878 Value *MainIndex = MainLoopBuilder.
CreateAdd(MainLoopPhi, CILoopOpSize);
884 if (RequiresResidual)
890 MainLoopBuilder.
CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
900 if (RequiresResidual) {
905 F->getContext(),
"memmove_fwd_residual_loop",
F, ExitBB);
906 IntermediateBuilder.
CreateCondBr(SkipResidualCondition, ExitBB,
913 ResidualLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_residual_index");
917 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
922 ResidualDstAlign, DstIsVolatile);
923 Value *ResidualIndex =
924 ResidualLoopBuilder.
CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
926 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
928 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
929 ResidualLoopPhi->
addIncoming(RuntimeLoopBytes, IntermediateBB);
940 Align DstAlign,
bool SrcIsVolatile,
955 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
957 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
958 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
967 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
968 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
969 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
975 auto [CmpSrcAddr, CmpDstAddr] =
978 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
981 &ThenTerm, &ElseTerm);
986 ExitBB->
setName(
"memmove_done");
998 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
1004 Value *SrcGEP = Builder.CreateInBoundsGEP(
1005 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
1007 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
1008 Value *DstGEP = Builder.CreateInBoundsGEP(
1009 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
1010 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
1011 BytesCopied += OperandSize;
1015 if (RemainingBytes != 0) {
1016 CopyBackwardsBB->
setName(
"memmove_bwd_residual");
1017 uint64_t BytesCopied = BytesCopiedInLoop;
1028 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1029 SrcAS, DstAS, PartSrcAlign,
1031 for (
auto *OpTy : RemainingOps) {
1035 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
1038 if (BytesCopiedInLoop != 0) {
1041 if (RemainingBytes != 0) {
1045 PredBB = CopyBackwardsBB;
1047 CopyBackwardsBB->
setName(
"memmove_bwd_loop");
1052 Value *Index = LoopBuilder.
CreateSub(LoopPhi, CILoopOpSize,
"bwd_index");
1055 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
1073 if (BytesCopiedInLoop != 0) {
1074 CopyForwardBB->
setName(
"memmove_fwd_loop");
1077 if (RemainingBytes != 0) {
1080 "memmove_fwd_residual");
1081 FwdResidualBB = SuccBB;
1088 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
1103 if (RemainingBytes != 0) {
1104 uint64_t BytesCopied = BytesCopiedInLoop;
1111 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1112 SrcAS, DstAS, PartSrcAlign,
1114 for (
auto *OpTy : RemainingOps)
1115 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1123 TypeSize DstSize =
DL.getTypeStoreSize(DstType);
1125 TypeSize SetValueSize =
DL.getTypeStoreSize(SetValueType);
1126 assert(SetValueSize ==
DL.getTypeAllocSize(SetValueType) &&
1127 "Store size and alloc size of SetValue's type must match");
1128 assert(SetValueSize != 0 && DstSize % SetValueSize == 0 &&
1129 "DstType size must be a multiple of SetValue size");
1132 if (DstSize != SetValueSize) {
1141 B.CreateVectorSplat(DstSize / SetValueSize, Result,
"setvalue.splat");
1146 Result =
B.CreateBitCast(Result, DstType,
"setvalue.splat.cast");
1154 std::optional<uint64_t> AverageTripCount) {
1166 Type *TypeOfLen = Len->getType();
1170 Type *LoopOpType = Int8Type;
1174 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1175 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1177 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1178 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1183 if (LoopEndCount != 0) {
1184 Value *SplatSetValue =
nullptr;
1194 InsertBefore, Len, LoopOpSize, 0,
"static-memset", AverageTripCount);
1195 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
1196 "Main loop should be generated for non-zero loop count");
1208 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
1209 "No residual loop was requested");
1213 uint64_t RemainingBytes = Len->getZExtValue() - BytesSet;
1214 if (RemainingBytes == 0)
1219 assert(
TTI &&
"there cannot be a residual loop without TTI");
1221 TTI->getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1222 DstAS, DstAS, DstAlign, DstAlign,
1225 Type *PreviousOpTy =
nullptr;
1226 Value *SplatSetValue =
nullptr;
1227 for (
auto *OpTy : RemainingOps) {
1228 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
1230 "Operand types cannot be scalable vector types");
1235 if (OpTy != PreviousOpTy)
1239 Int8Type, DstAddr, ConstantInt::get(TypeOfLen, BytesSet));
1242 BytesSet += OperandSize;
1243 PreviousOpTy = OpTy;
1245 assert(BytesSet == Len->getZExtValue() &&
1246 "Bytes set should match size in the call!");
1253 std::optional<uint64_t> AverageTripCount) {
1264 Type *LoopOpType = Int8Type;
1266 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1267 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1269 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1270 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1272 Type *ResidualLoopOpType = Int8Type;
1273 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
1281 LoopExpansionInfo LEI =
1283 "dynamic-memset", AverageTripCount);
1284 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
1285 "Main loop should be generated for unknown size memset");
1297 if (!LEI.ResidualLoopIP)
1305 LEI.ResidualLoopIndex);
1314 std::optional<uint64_t> AverageTripCount) {
1329 PreferredLoopOpType =
TTI->getMemcpyLoopLoweringType(
1330 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1332 TypeSize PreferredLoopOpStoreSize =
DL.getTypeStoreSize(PreferredLoopOpType);
1334 "PreferredLoopOpType cannot be a scalable vector type");
1336 TypeSize PreferredLoopOpAllocSize =
DL.getTypeAllocSize(PreferredLoopOpType);
1339 TypeSize OriginalTypeStoreSize =
DL.getTypeStoreSize(OriginalType);
1340 TypeSize OriginalTypeAllocSize =
DL.getTypeAllocSize(OriginalType);
1351 unsigned MainLoopStep = 1;
1352 Type *MainLoopType = OriginalType;
1353 TypeSize MainLoopAllocSize = OriginalTypeAllocSize;
1354 unsigned ResidualLoopStep = 0;
1355 Type *ResidualLoopType =
nullptr;
1357 if (PreferredLoopOpStoreSize == PreferredLoopOpAllocSize &&
1358 OriginalTypeStoreSize == OriginalTypeAllocSize &&
1359 OriginalTypeStoreSize < PreferredLoopOpStoreSize &&
1360 PreferredLoopOpStoreSize % OriginalTypeStoreSize == 0) {
1363 MainLoopStep = PreferredLoopOpStoreSize / OriginalTypeStoreSize;
1364 MainLoopType = PreferredLoopOpType;
1365 MainLoopAllocSize = PreferredLoopOpStoreSize;
1367 ResidualLoopStep = 1;
1368 ResidualLoopType = OriginalType;
1373 LoopExpansionInfo LEI =
1375 "memset.pattern", AverageTripCount);
1379 if (LEI.MainLoopIP) {
1383 if (MainLoopType != OriginalType)
1395 if (!LEI.ResidualLoopIP)
1404 LEI.ResidualLoopIndex);
1409template <
typename T>
1413 const SCEV *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
1424 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1464 bool DstIsVolatile = SrcIsVolatile;
1470 if (SrcAS != DstAS) {
1471 if (!
TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1474 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1477 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1478 SrcIsVolatile, DstIsVolatile,
1479 false,
TTI, std::nullopt, AverageTripCount);
1482 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1483 DstAlign, SrcIsVolatile, DstIsVolatile,
1484 false,
TTI, std::nullopt, AverageTripCount);
1490 if (!(
TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1491 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1496 dbgs() <<
"Do not know how to expand memmove between different "
1497 "address spaces\n");
1504 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1505 SrcIsVolatile, DstIsVolatile,
TTI);
1508 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1509 SrcIsVolatile, DstIsVolatile,
TTI);
1516 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memset);
1555 getAverageMemOpLoopTripCount(*Memset));
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static Value * createMemSetSplat(const DataLayout &DL, IRBuilderBase &B, Value *SetValue, Type *DstType)
Create a Value of DstType that consists of a sequence of copies of SetValue, using bitcasts and a vec...
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static void createMemSetPatternLoop(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoopUnknownSize(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemSetLoopKnownSize(Instruction *InsertBefore, Value *DstAddr, ConstantInt *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > ExpectedUnits)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
This file contains the declarations for profiling metadata utility functions.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
A parsed version of the target data layout string in and methods for querying it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
UnreachableInst * CreateUnreachable()
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
cl::opt< bool > ProfcheckDisableMetadataFixes
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSetPattern as a loop.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.