LLVM 23.0.0git
LowerMemIntrinsics.cpp
Go to the documentation of this file.
1//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/IR/IRBuilder.h"
14#include "llvm/IR/MDBuilder.h"
17#include "llvm/Support/Debug.h"
21#include <limits>
22#include <optional>
23
24#define DEBUG_TYPE "lower-mem-intrinsics"
25
26using namespace llvm;
27
28namespace llvm {
30}
31
32/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
33/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
35 Value *OpSize, unsigned OpSizeVal) {
36 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
37 if (isPowerOf2_32(OpSizeVal))
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
40}
41
42/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
43/// opportunities.
44/// If \p RTLoopRemainder is provided, it must be the result of
45/// \c getRuntimeLoopRemainder() with the same arguments.
47 unsigned OpSizeVal,
48 Value *RTLoopRemainder = nullptr) {
49 if (!RTLoopRemainder)
50 RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
51 return B.CreateSub(Len, RTLoopRemainder);
52}
53
54namespace {
55/// Container for the return values of insertLoopExpansion.
56struct LoopExpansionInfo {
57 /// The instruction at the end of the main loop body.
58 Instruction *MainLoopIP = nullptr;
59
60 /// The unit index in the main loop body.
61 Value *MainLoopIndex = nullptr;
62
63 /// The instruction at the end of the residual loop body. Can be nullptr if no
64 /// residual is required.
65 Instruction *ResidualLoopIP = nullptr;
66
67 /// The unit index in the residual loop body. Can be nullptr if no residual is
68 /// required.
69 Value *ResidualLoopIndex = nullptr;
70};
71
72std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
74 return std::nullopt;
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !EC->getCount())
78 return std::nullopt;
79 if (const auto Len = I.getLengthInBytes())
80 return Len->getZExtValue();
81 uint64_t Total = 0;
83 getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize,
84 std::numeric_limits<uint32_t>::max(), Total);
85 if (!Total)
86 return std::nullopt;
87 uint64_t TripCount = 0;
88 for (const auto &P : ProfData)
89 TripCount += P.Count * P.Value;
90 return std::round(1.0 * TripCount / Total);
91}
92
93} // namespace
94
95/// Insert the control flow and loop counters for a memcpy/memset loop
96/// expansion.
97///
98/// This function inserts IR corresponding to the following C code before
99/// \p InsertBefore:
100/// \code
101/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
102/// ResidualUnits = Len - LoopUnits;
103/// MainLoopIndex = 0;
104/// if (LoopUnits > 0) {
105/// do {
106/// // MainLoopIP
107/// MainLoopIndex += MainLoopStep;
108/// } while (MainLoopIndex < LoopUnits);
109/// }
110/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
111/// ResidualLoopIndex = LoopUnits + i;
112/// // ResidualLoopIP
113/// }
114/// \endcode
115///
116/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
117/// loop index is increased in each iteration of the main and residual loops,
118/// respectively. In most cases, the "unit" will be bytes, but larger units are
119/// useful for lowering memset.pattern.
120///
121/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
122/// time if \p Len is a \c ConstantInt.
123/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
124/// to \p MainLoopStep.
125/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
126/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
127static LoopExpansionInfo
129 unsigned MainLoopStep, unsigned ResidualLoopStep,
130 StringRef BBNamePrefix,
131 std::optional<uint64_t> AverageTripCount) {
132 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
133 "ResidualLoopStep must divide MainLoopStep if specified");
134 assert(ResidualLoopStep <= MainLoopStep &&
135 "ResidualLoopStep cannot be larger than MainLoopStep");
136 assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
137 LoopExpansionInfo LEI;
138 BasicBlock *PreLoopBB = InsertBefore->getParent();
139 BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
140 InsertBefore, BBNamePrefix + "-post-expansion");
141 Function *ParentFunc = PreLoopBB->getParent();
142 LLVMContext &Ctx = PreLoopBB->getContext();
143 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
144 IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
145 PreLoopBuilder.SetCurrentDebugLocation(DbgLoc);
146
147 // Calculate the main loop trip count and remaining units to cover after the
148 // loop.
149 Type *LenType = Len->getType();
150 IntegerType *ILenType = cast<IntegerType>(LenType);
151 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
152
153 Value *LoopUnits = Len;
154 Value *ResidualUnits = nullptr;
155 // We can make a conditional branch unconditional if we know that the
156 // MainLoop must be executed at least once.
157 bool MustTakeMainLoop = false;
158 if (MainLoopStep != 1) {
159 if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
160 uint64_t TotalUnits = CLen->getZExtValue();
161 uint64_t LoopEndCount = alignDown(TotalUnits, MainLoopStep);
162 uint64_t ResidualCount = TotalUnits - LoopEndCount;
163 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
164 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
165 MustTakeMainLoop = LoopEndCount > 0;
166 // As an optimization, we could skip generating the residual loop if
167 // ResidualCount is known to be 0. However, current uses of this function
168 // don't request a residual loop if the length is constant (they generate
169 // a (potentially empty) sequence of loads and stores instead), so this
170 // optimization would have no effect here.
171 } else {
172 ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
173 CIMainLoopStep, MainLoopStep);
174 LoopUnits = getRuntimeLoopUnits(PreLoopBuilder, Len, CIMainLoopStep,
175 MainLoopStep, ResidualUnits);
176 }
177 } else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
178 MustTakeMainLoop = CLen->getZExtValue() > 0;
179 }
180
181 BasicBlock *MainLoopBB = BasicBlock::Create(
182 Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
183 IRBuilder<> LoopBuilder(MainLoopBB);
184 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
185
186 PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
187 LEI.MainLoopIndex = LoopIndex;
188 LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
189
190 Value *NewIndex =
191 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
192 LoopIndex->addIncoming(NewIndex, MainLoopBB);
193
194 // One argument of the addition is a loop-variant PHI, so it must be an
195 // Instruction (i.e., it cannot be a Constant).
196 LEI.MainLoopIP = cast<Instruction>(NewIndex);
197
198 if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
199 // Loop body for the residual accesses.
200 BasicBlock *ResLoopBB =
201 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-body",
202 PreLoopBB->getParent(), PostLoopBB);
203 // BB to check if the residual loop is needed.
204 BasicBlock *ResidualCondBB =
205 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-cond",
206 PreLoopBB->getParent(), ResLoopBB);
207
208 // Enter the MainLoop unless no main loop iteration is required.
209 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
210 if (MustTakeMainLoop)
211 PreLoopBuilder.CreateBr(MainLoopBB);
212 else {
213 auto *BR = PreLoopBuilder.CreateCondBr(
214 PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
215 ResidualCondBB);
216 if (AverageTripCount.has_value()) {
217 MDBuilder MDB(ParentFunc->getContext());
219 {AverageTripCount.value() % MainLoopStep, 1},
220 /*IsExpected=*/false);
221 } else {
223 }
224 }
225 PreLoopBB->getTerminator()->eraseFromParent();
226
227 // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
228 // the residual condition BB.
229 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
230 MainLoopBB, ResidualCondBB);
231
232 // Determine if we need to branch to the residual loop or bypass it.
233 IRBuilder<> RCBuilder(ResidualCondBB);
234 RCBuilder.SetCurrentDebugLocation(DbgLoc);
235 RCBuilder.CreateCondBr(RCBuilder.CreateICmpNE(ResidualUnits, Zero),
236 ResLoopBB, PostLoopBB);
237
238 IRBuilder<> ResBuilder(ResLoopBB);
239 ResBuilder.SetCurrentDebugLocation(DbgLoc);
240 PHINode *ResidualIndex =
241 ResBuilder.CreatePHI(LenType, 2, "residual-loop-index");
242 ResidualIndex->addIncoming(Zero, ResidualCondBB);
243
244 // Add the offset at the end of the main loop to the loop counter of the
245 // residual loop to get the proper index.
246 Value *FullOffset = ResBuilder.CreateAdd(LoopUnits, ResidualIndex);
247 LEI.ResidualLoopIndex = FullOffset;
248
249 Value *ResNewIndex = ResBuilder.CreateAdd(
250 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
251 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
252
253 // One argument of the addition is a loop-variant PHI, so it must be an
254 // Instruction (i.e., it cannot be a Constant).
255 LEI.ResidualLoopIP = cast<Instruction>(ResNewIndex);
256
257 // Stay in the residual loop until all ResidualUnits are handled.
258 ResBuilder.CreateCondBr(
259 ResBuilder.CreateICmpULT(ResNewIndex, ResidualUnits), ResLoopBB,
260 PostLoopBB);
261 } else {
262 // There is no need for a residual loop after the main loop. We do however
263 // need to patch up the control flow by creating the terminators for the
264 // preloop block and the main loop.
265
266 // Enter the MainLoop unless no main loop iteration is required.
267 if (MustTakeMainLoop) {
268 PreLoopBuilder.CreateBr(MainLoopBB);
269 } else {
270 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
271 MDBuilder B(ParentFunc->getContext());
272 PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
273 MainLoopBB, PostLoopBB,
274 B.createLikelyBranchWeights());
275 }
276 PreLoopBB->getTerminator()->eraseFromParent();
277 // Stay in the MainLoop until we have handled all the LoopUnits.
278 auto *Br = LoopBuilder.CreateCondBr(
279 LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
280 if (AverageTripCount.has_value())
281 setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
282 /*IsExpected=*/false);
283 else
285 }
286 return LEI;
287}
288
290 Value *DstAddr, ConstantInt *CopyLen,
291 Align SrcAlign, Align DstAlign,
292 bool SrcIsVolatile, bool DstIsVolatile,
293 bool CanOverlap,
295 std::optional<uint32_t> AtomicElementSize,
296 std::optional<uint64_t> AverageTripCount) {
297 // No need to expand zero length copies.
298 if (CopyLen->isZero())
299 return;
300
301 BasicBlock *PreLoopBB = InsertBefore->getParent();
302 Function *ParentFunc = PreLoopBB->getParent();
303 LLVMContext &Ctx = PreLoopBB->getContext();
304 const DataLayout &DL = ParentFunc->getDataLayout();
305 MDBuilder MDB(Ctx);
306 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
307 StringRef Name = "MemCopyAliasScope";
308 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
309
310 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
311 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
312
313 Type *TypeOfCopyLen = CopyLen->getType();
314 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
315 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
316 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
317 "Atomic memcpy lowering is not supported for vector operand type");
318
319 Type *Int8Type = Type::getInt8Ty(Ctx);
320 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
321 assert(LoopOpSize.isFixed() && "LoopOpType cannot be a scalable vector type");
322 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
323 "Atomic memcpy lowering is not supported for selected operand size");
324
325 uint64_t LoopEndCount =
326 alignDown(CopyLen->getZExtValue(), LoopOpSize.getFixedValue());
327
328 // Skip the loop expansion entirely if the loop would never be taken.
329 if (LoopEndCount != 0) {
330 LoopExpansionInfo LEI =
331 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
332 "static-memcpy", AverageTripCount);
333
334 // Fill MainLoopBB
335 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
336 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
337 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
338
339 // If we used LoopOpType as GEP element type, we would iterate over the
340 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
341 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
342 // byte offsets computed from the TypeStoreSize.
343 Value *SrcGEP =
344 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
345 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
346 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
347 if (!CanOverlap) {
348 // Set alias scope for loads.
349 Load->setMetadata(LLVMContext::MD_alias_scope,
350 MDNode::get(Ctx, NewScope));
351 }
352 Value *DstGEP =
353 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
354 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
355 Load, DstGEP, PartDstAlign, DstIsVolatile);
356 if (!CanOverlap) {
357 // Indicate that stores don't overlap loads.
358 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
359 }
360 if (AtomicElementSize) {
361 Load->setAtomic(AtomicOrdering::Unordered);
362 Store->setAtomic(AtomicOrdering::Unordered);
363 }
364 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
365 "No residual loop was requested");
366 }
367
368 // Copy the remaining bytes with straight-line code.
369 uint64_t BytesCopied = LoopEndCount;
370 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
371 if (RemainingBytes == 0)
372 return;
373
374 IRBuilder<> RBuilder(InsertBefore);
375 SmallVector<Type *, 5> RemainingOps;
376 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
377 SrcAS, DstAS, SrcAlign, DstAlign,
378 AtomicElementSize);
379
380 for (auto *OpTy : RemainingOps) {
381 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
382 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
383
384 TypeSize OperandSize = DL.getTypeStoreSize(OpTy);
385 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
386 "Atomic memcpy lowering is not supported for selected operand size");
387
388 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
389 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
390 LoadInst *Load =
391 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
392 if (!CanOverlap) {
393 // Set alias scope for loads.
394 Load->setMetadata(LLVMContext::MD_alias_scope,
395 MDNode::get(Ctx, NewScope));
396 }
397 Value *DstGEP = RBuilder.CreateInBoundsGEP(
398 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
399 StoreInst *Store =
400 RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
401 if (!CanOverlap) {
402 // Indicate that stores don't overlap loads.
403 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
404 }
405 if (AtomicElementSize) {
406 Load->setAtomic(AtomicOrdering::Unordered);
407 Store->setAtomic(AtomicOrdering::Unordered);
408 }
409 BytesCopied += OperandSize;
410 }
411 assert(BytesCopied == CopyLen->getZExtValue() &&
412 "Bytes copied should match size in the call!");
413}
414
416 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
417 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
418 bool CanOverlap, const TargetTransformInfo &TTI,
419 std::optional<uint32_t> AtomicElementSize,
420 std::optional<uint64_t> AverageTripCount) {
421 BasicBlock *PreLoopBB = InsertBefore->getParent();
422 Function *ParentFunc = PreLoopBB->getParent();
423 const DataLayout &DL = ParentFunc->getDataLayout();
424 LLVMContext &Ctx = PreLoopBB->getContext();
425 MDBuilder MDB(Ctx);
426 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
427 StringRef Name = "MemCopyAliasScope";
428 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
429
430 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
431 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
432
433 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
434 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
435 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
436 "Atomic memcpy lowering is not supported for vector operand type");
437 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
438 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
439 "Atomic memcpy lowering is not supported for selected operand size");
440
441 Type *Int8Type = Type::getInt8Ty(Ctx);
442
443 Type *ResidualLoopOpType = AtomicElementSize
444 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
445 : Int8Type;
446 TypeSize ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
447 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
448 "Store size is expected to match type size");
449
450 LoopExpansionInfo LEI =
451 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
452 "dynamic-memcpy", AverageTripCount);
453
454 // Fill MainLoopBB
455 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
456 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
457 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
458
459 // If we used LoopOpType as GEP element type, we would iterate over the
460 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
461 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use byte
462 // offsets computed from the TypeStoreSize.
463 Value *SrcGEP =
464 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
465 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
466 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
467 if (!CanOverlap) {
468 // Set alias scope for loads.
469 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
470 }
471 Value *DstGEP =
472 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
473 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
474 Load, DstGEP, PartDstAlign, DstIsVolatile);
475 if (!CanOverlap) {
476 // Indicate that stores don't overlap loads.
477 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
478 }
479 if (AtomicElementSize) {
482 }
483
484 // Fill ResidualLoopBB.
485 if (!LEI.ResidualLoopIP)
486 return;
487
488 Align ResSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
489 Align ResDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
490
491 IRBuilder<> ResLoopBuilder(LEI.ResidualLoopIP);
492 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
493 LEI.ResidualLoopIndex);
494 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
495 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
496 if (!CanOverlap) {
497 // Set alias scope for loads.
498 ResLoad->setMetadata(LLVMContext::MD_alias_scope,
499 MDNode::get(Ctx, NewScope));
500 }
501 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
502 LEI.ResidualLoopIndex);
503 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
504 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
505 if (!CanOverlap) {
506 // Indicate that stores don't overlap loads.
507 ResStore->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
508 }
509 if (AtomicElementSize) {
512 }
513}
514
515// If \p Addr1 and \p Addr2 are pointers to different address spaces, create an
516// addresspacecast to obtain a pair of pointers in the same addressspace. The
517// caller needs to ensure that addrspacecasting is possible.
518// No-op if the pointers are in the same address space.
519static std::pair<Value *, Value *>
521 const TargetTransformInfo &TTI) {
522 Value *ResAddr1 = Addr1;
523 Value *ResAddr2 = Addr2;
524
525 unsigned AS1 = cast<PointerType>(Addr1->getType())->getAddressSpace();
526 unsigned AS2 = cast<PointerType>(Addr2->getType())->getAddressSpace();
527 if (AS1 != AS2) {
528 if (TTI.isValidAddrSpaceCast(AS2, AS1))
529 ResAddr2 = B.CreateAddrSpaceCast(Addr2, Addr1->getType());
530 else if (TTI.isValidAddrSpaceCast(AS1, AS2))
531 ResAddr1 = B.CreateAddrSpaceCast(Addr1, Addr2->getType());
532 else
533 llvm_unreachable("Can only lower memmove between address spaces if they "
534 "support addrspacecast");
535 }
536 return {ResAddr1, ResAddr2};
537}
538
539// Lower memmove to IR. memmove is required to correctly copy overlapping memory
540// regions; therefore, it has to check the relative positions of the source and
541// destination pointers and choose the copy direction accordingly.
542//
543// The code below is an IR rendition of this C function:
544//
545// void* memmove(void* dst, const void* src, size_t n) {
546// unsigned char* d = dst;
547// const unsigned char* s = src;
548// if (s < d) {
549// // copy backwards
550// while (n--) {
551// d[n] = s[n];
552// }
553// } else {
554// // copy forward
555// for (size_t i = 0; i < n; ++i) {
556// d[i] = s[i];
557// }
558// }
559// return dst;
560// }
561//
562// If the TargetTransformInfo specifies a wider MemcpyLoopLoweringType, it is
563// used for the memory accesses in the loops. Then, additional loops with
564// byte-wise accesses are added for the remaining bytes.
566 Value *SrcAddr, Value *DstAddr,
567 Value *CopyLen, Align SrcAlign,
568 Align DstAlign, bool SrcIsVolatile,
569 bool DstIsVolatile,
570 const TargetTransformInfo &TTI) {
571 Type *TypeOfCopyLen = CopyLen->getType();
572 BasicBlock *OrigBB = InsertBefore->getParent();
573 Function *F = OrigBB->getParent();
574 const DataLayout &DL = F->getDataLayout();
575 LLVMContext &Ctx = OrigBB->getContext();
576 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
577 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
578
579 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
580 SrcAlign, DstAlign);
581 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
582 Type *Int8Type = Type::getInt8Ty(Ctx);
583 bool LoopOpIsInt8 = LoopOpType == Int8Type;
584
585 // If the memory accesses are wider than one byte, residual loops with
586 // i8-accesses are required to move remaining bytes.
587 bool RequiresResidual = !LoopOpIsInt8;
588
589 Type *ResidualLoopOpType = Int8Type;
590 TypeSize ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
591
592 // Calculate the loop trip count and remaining bytes to copy after the loop.
593 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
594 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
595 ConstantInt *CIResidualLoopOpSize =
596 ConstantInt::get(ILengthType, ResidualLoopOpSize);
597 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
598
599 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
600 IRBuilder<> PLBuilder(InsertBefore);
601 PLBuilder.SetCurrentDebugLocation(DbgLoc);
602
603 Value *RuntimeLoopBytes = CopyLen;
604 Value *RuntimeLoopRemainder = nullptr;
605 Value *SkipResidualCondition = nullptr;
606 if (RequiresResidual) {
607 RuntimeLoopRemainder =
608 getRuntimeLoopRemainder(PLBuilder, CopyLen, CILoopOpSize, LoopOpSize);
609 RuntimeLoopBytes = getRuntimeLoopUnits(PLBuilder, CopyLen, CILoopOpSize,
610 LoopOpSize, RuntimeLoopRemainder);
611 SkipResidualCondition =
612 PLBuilder.CreateICmpEQ(RuntimeLoopRemainder, Zero, "skip_residual");
613 }
614 Value *SkipMainCondition =
615 PLBuilder.CreateICmpEQ(RuntimeLoopBytes, Zero, "skip_main");
616
617 // Create the a comparison of src and dst, based on which we jump to either
618 // the forward-copy part of the function (if src >= dst) or the backwards-copy
619 // part (if src < dst).
620 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
621 // structure. Its block terminators (unconditional branches) are replaced by
622 // the appropriate conditional branches when the loop is built.
623 // If the pointers are in different address spaces, they need to be converted
624 // to a compatible one. Cases where memory ranges in the different address
625 // spaces cannot overlap are lowered as memcpy and not handled here.
626 auto [CmpSrcAddr, CmpDstAddr] =
627 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
628 Value *PtrCompare =
629 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
630 Instruction *ThenTerm, *ElseTerm;
631 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
632 &ThenTerm, &ElseTerm);
633
634 // If the LoopOpSize is greater than 1, each part of the function consists of
635 // four blocks:
636 // memmove_copy_backwards:
637 // skip the residual loop when 0 iterations are required
638 // memmove_bwd_residual_loop:
639 // copy the last few bytes individually so that the remaining length is
640 // a multiple of the LoopOpSize
641 // memmove_bwd_middle: skip the main loop when 0 iterations are required
642 // memmove_bwd_main_loop: the actual backwards loop BB with wide accesses
643 // memmove_copy_forward: skip the main loop when 0 iterations are required
644 // memmove_fwd_main_loop: the actual forward loop BB with wide accesses
645 // memmove_fwd_middle: skip the residual loop when 0 iterations are required
646 // memmove_fwd_residual_loop: copy the last few bytes individually
647 //
648 // The main and residual loop are switched between copying forward and
649 // backward so that the residual loop always operates on the end of the moved
650 // range. This is based on the assumption that buffers whose start is aligned
651 // with the LoopOpSize are more common than buffers whose end is.
652 //
653 // If the LoopOpSize is 1, each part of the function consists of two blocks:
654 // memmove_copy_backwards: skip the loop when 0 iterations are required
655 // memmove_bwd_main_loop: the actual backwards loop BB
656 // memmove_copy_forward: skip the loop when 0 iterations are required
657 // memmove_fwd_main_loop: the actual forward loop BB
658 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
659 CopyBackwardsBB->setName("memmove_copy_backwards");
660 BasicBlock *CopyForwardBB = ElseTerm->getParent();
661 CopyForwardBB->setName("memmove_copy_forward");
662 BasicBlock *ExitBB = InsertBefore->getParent();
663 ExitBB->setName("memmove_done");
664
665 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
666 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
667
668 // Accesses in the residual loops do not share the same alignment as those in
669 // the main loops.
670 Align ResidualSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
671 Align ResidualDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
672
673 // Copying backwards.
674 {
675 BasicBlock *MainLoopBB = BasicBlock::Create(
676 F->getContext(), "memmove_bwd_main_loop", F, CopyForwardBB);
677
678 // The predecessor of the memmove_bwd_main_loop. Updated in the
679 // following if a residual loop is emitted first.
680 BasicBlock *PredBB = CopyBackwardsBB;
681
682 if (RequiresResidual) {
683 // backwards residual loop
684 BasicBlock *ResidualLoopBB = BasicBlock::Create(
685 F->getContext(), "memmove_bwd_residual_loop", F, MainLoopBB);
686 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
687 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
688 PHINode *ResidualLoopPhi = ResidualLoopBuilder.CreatePHI(ILengthType, 0);
689 Value *ResidualIndex = ResidualLoopBuilder.CreateSub(
690 ResidualLoopPhi, CIResidualLoopOpSize, "bwd_residual_index");
691 // If we used LoopOpType as GEP element type, we would iterate over the
692 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes,
693 // i.e., we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore,
694 // use byte offsets computed from the TypeStoreSize.
695 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
696 ResidualIndex);
697 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
698 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
699 "element");
700 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
701 ResidualIndex);
702 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
703 ResidualDstAlign, DstIsVolatile);
704
705 // After the residual loop, go to an intermediate block.
706 BasicBlock *IntermediateBB = BasicBlock::Create(
707 F->getContext(), "memmove_bwd_middle", F, MainLoopBB);
708 // Later code expects a terminator in the PredBB.
709 IRBuilder<> IntermediateBuilder(IntermediateBB);
710 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
711 IntermediateBuilder.CreateUnreachable();
712 ResidualLoopBuilder.CreateCondBr(
713 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
714 IntermediateBB, ResidualLoopBB);
715
716 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
717 ResidualLoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
718
719 // How to get to the residual:
720 BranchInst *BrInst =
721 BranchInst::Create(IntermediateBB, ResidualLoopBB,
722 SkipResidualCondition, ThenTerm->getIterator());
723 BrInst->setDebugLoc(DbgLoc);
724 ThenTerm->eraseFromParent();
725
726 PredBB = IntermediateBB;
727 }
728
729 // main loop
730 IRBuilder<> MainLoopBuilder(MainLoopBB);
731 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
732 PHINode *MainLoopPhi = MainLoopBuilder.CreatePHI(ILengthType, 0);
733 Value *MainIndex =
734 MainLoopBuilder.CreateSub(MainLoopPhi, CILoopOpSize, "bwd_main_index");
735 Value *LoadGEP =
736 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainIndex);
737 Value *Element = MainLoopBuilder.CreateAlignedLoad(
738 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
739 Value *StoreGEP =
740 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainIndex);
741 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
742 DstIsVolatile);
743 MainLoopBuilder.CreateCondBr(MainLoopBuilder.CreateICmpEQ(MainIndex, Zero),
744 ExitBB, MainLoopBB);
745 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
746 MainLoopPhi->addIncoming(RuntimeLoopBytes, PredBB);
747
748 // How to get to the main loop:
749 Instruction *PredBBTerm = PredBB->getTerminator();
751 ExitBB, MainLoopBB, SkipMainCondition, PredBBTerm->getIterator());
752 BrInst->setDebugLoc(DbgLoc);
753 PredBBTerm->eraseFromParent();
754 }
755
756 // Copying forward.
757 // main loop
758 {
759 BasicBlock *MainLoopBB =
760 BasicBlock::Create(F->getContext(), "memmove_fwd_main_loop", F, ExitBB);
761 IRBuilder<> MainLoopBuilder(MainLoopBB);
762 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
763 PHINode *MainLoopPhi =
764 MainLoopBuilder.CreatePHI(ILengthType, 0, "fwd_main_index");
765 Value *LoadGEP =
766 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainLoopPhi);
767 Value *Element = MainLoopBuilder.CreateAlignedLoad(
768 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
769 Value *StoreGEP =
770 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainLoopPhi);
771 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
772 DstIsVolatile);
773 Value *MainIndex = MainLoopBuilder.CreateAdd(MainLoopPhi, CILoopOpSize);
774 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
775 MainLoopPhi->addIncoming(Zero, CopyForwardBB);
776
777 Instruction *CopyFwdBBTerm = CopyForwardBB->getTerminator();
778 BasicBlock *SuccessorBB = ExitBB;
779 if (RequiresResidual)
780 SuccessorBB =
781 BasicBlock::Create(F->getContext(), "memmove_fwd_middle", F, ExitBB);
782
783 // leaving or staying in the main loop
784 MainLoopBuilder.CreateCondBr(
785 MainLoopBuilder.CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
786 MainLoopBB);
787
788 // getting in or skipping the main loop
789 BranchInst *BrInst =
790 BranchInst::Create(SuccessorBB, MainLoopBB, SkipMainCondition,
791 CopyFwdBBTerm->getIterator());
792 BrInst->setDebugLoc(DbgLoc);
793 CopyFwdBBTerm->eraseFromParent();
794
795 if (RequiresResidual) {
796 BasicBlock *IntermediateBB = SuccessorBB;
797 IRBuilder<> IntermediateBuilder(IntermediateBB);
798 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
799 BasicBlock *ResidualLoopBB = BasicBlock::Create(
800 F->getContext(), "memmove_fwd_residual_loop", F, ExitBB);
801 IntermediateBuilder.CreateCondBr(SkipResidualCondition, ExitBB,
802 ResidualLoopBB);
803
804 // Residual loop
805 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
806 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
807 PHINode *ResidualLoopPhi =
808 ResidualLoopBuilder.CreatePHI(ILengthType, 0, "fwd_residual_index");
809 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
810 ResidualLoopPhi);
811 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
812 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
813 "element");
814 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
815 ResidualLoopPhi);
816 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
817 ResidualDstAlign, DstIsVolatile);
818 Value *ResidualIndex =
819 ResidualLoopBuilder.CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
820 ResidualLoopBuilder.CreateCondBr(
821 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
822 ResidualLoopBB);
823 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
824 ResidualLoopPhi->addIncoming(RuntimeLoopBytes, IntermediateBB);
825 }
826 }
827}
828
829// Similar to createMemMoveLoopUnknownSize, only the trip counts are computed at
830// compile time, obsolete loops and branches are omitted, and the residual code
831// is straight-line code instead of a loop.
832static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
833 Value *SrcAddr, Value *DstAddr,
834 ConstantInt *CopyLen, Align SrcAlign,
835 Align DstAlign, bool SrcIsVolatile,
836 bool DstIsVolatile,
837 const TargetTransformInfo &TTI) {
838 // No need to expand zero length moves.
839 if (CopyLen->isZero())
840 return;
841
842 Type *TypeOfCopyLen = CopyLen->getType();
843 BasicBlock *OrigBB = InsertBefore->getParent();
844 Function *F = OrigBB->getParent();
845 const DataLayout &DL = F->getDataLayout();
846 LLVMContext &Ctx = OrigBB->getContext();
847 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
848 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
849
850 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
851 SrcAlign, DstAlign);
852 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
853 assert(LoopOpSize.isFixed() && "LoopOpType cannot be a scalable vector type");
854 Type *Int8Type = Type::getInt8Ty(Ctx);
855
856 // Calculate the loop trip count and remaining bytes to copy after the loop.
857 uint64_t BytesCopiedInLoop =
858 alignDown(CopyLen->getZExtValue(), LoopOpSize.getFixedValue());
859 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopiedInLoop;
860
861 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
862 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
863 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
864 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
865
866 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
867 IRBuilder<> PLBuilder(InsertBefore);
868 PLBuilder.SetCurrentDebugLocation(DbgLoc);
869
870 auto [CmpSrcAddr, CmpDstAddr] =
871 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
872 Value *PtrCompare =
873 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
874 Instruction *ThenTerm, *ElseTerm;
875 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
876 &ThenTerm, &ElseTerm);
877
878 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
879 BasicBlock *CopyForwardBB = ElseTerm->getParent();
880 BasicBlock *ExitBB = InsertBefore->getParent();
881 ExitBB->setName("memmove_done");
882
883 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
884 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
885
886 // Helper function to generate a load/store pair of a given type in the
887 // residual. Used in the forward and backward branches.
888 auto GenerateResidualLdStPair = [&](Type *OpTy, IRBuilderBase &Builder,
889 uint64_t &BytesCopied) {
890 Align ResSrcAlign(commonAlignment(SrcAlign, BytesCopied));
891 Align ResDstAlign(commonAlignment(DstAlign, BytesCopied));
892
893 TypeSize OperandSize = DL.getTypeStoreSize(OpTy);
894
895 // If we used LoopOpType as GEP element type, we would iterate over the
896 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
897 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
898 // byte offsets computed from the TypeStoreSize.
899 Value *SrcGEP = Builder.CreateInBoundsGEP(
900 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
901 LoadInst *Load =
902 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
903 Value *DstGEP = Builder.CreateInBoundsGEP(
904 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
905 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
906 BytesCopied += OperandSize;
907 };
908
909 // Copying backwards.
910 if (RemainingBytes != 0) {
911 CopyBackwardsBB->setName("memmove_bwd_residual");
912 uint64_t BytesCopied = BytesCopiedInLoop;
913
914 // Residual code is required to move the remaining bytes. We need the same
915 // instructions as in the forward case, only in reverse. So we generate code
916 // the same way, except that we change the IRBuilder insert point for each
917 // load/store pair so that each one is inserted before the previous one
918 // instead of after it.
919 IRBuilder<> BwdResBuilder(CopyBackwardsBB,
920 CopyBackwardsBB->getFirstNonPHIIt());
921 BwdResBuilder.SetCurrentDebugLocation(DbgLoc);
922 SmallVector<Type *, 5> RemainingOps;
923 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
924 SrcAS, DstAS, PartSrcAlign,
925 PartDstAlign);
926 for (auto *OpTy : RemainingOps) {
927 // reverse the order of the emitted operations
928 BwdResBuilder.SetInsertPoint(CopyBackwardsBB,
929 CopyBackwardsBB->getFirstNonPHIIt());
930 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
931 }
932 }
933 if (BytesCopiedInLoop != 0) {
934 BasicBlock *LoopBB = CopyBackwardsBB;
935 BasicBlock *PredBB = OrigBB;
936 if (RemainingBytes != 0) {
937 // if we introduce residual code, it needs its separate BB
938 LoopBB = CopyBackwardsBB->splitBasicBlock(
939 CopyBackwardsBB->getTerminator(), "memmove_bwd_loop");
940 PredBB = CopyBackwardsBB;
941 } else {
942 CopyBackwardsBB->setName("memmove_bwd_loop");
943 }
944 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
945 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
946 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0);
947 Value *Index = LoopBuilder.CreateSub(LoopPhi, CILoopOpSize, "bwd_index");
948 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, Index);
949 Value *Element = LoopBuilder.CreateAlignedLoad(
950 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
951 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, Index);
952 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
953 DstIsVolatile);
954
955 // Replace the unconditional branch introduced by
956 // SplitBlockAndInsertIfThenElse to turn LoopBB into a loop.
957 Instruction *UncondTerm = LoopBB->getTerminator();
958 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, Zero), ExitBB,
959 LoopBB);
960 UncondTerm->eraseFromParent();
961
962 LoopPhi->addIncoming(Index, LoopBB);
963 LoopPhi->addIncoming(LoopBound, PredBB);
964 }
965
966 // Copying forward.
967 BasicBlock *FwdResidualBB = CopyForwardBB;
968 if (BytesCopiedInLoop != 0) {
969 CopyForwardBB->setName("memmove_fwd_loop");
970 BasicBlock *LoopBB = CopyForwardBB;
971 BasicBlock *SuccBB = ExitBB;
972 if (RemainingBytes != 0) {
973 // if we introduce residual code, it needs its separate BB
974 SuccBB = CopyForwardBB->splitBasicBlock(CopyForwardBB->getTerminator(),
975 "memmove_fwd_residual");
976 FwdResidualBB = SuccBB;
977 }
978 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
979 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
980 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0, "fwd_index");
981 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopPhi);
982 Value *Element = LoopBuilder.CreateAlignedLoad(
983 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
984 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopPhi);
985 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
986 DstIsVolatile);
987 Value *Index = LoopBuilder.CreateAdd(LoopPhi, CILoopOpSize);
988 LoopPhi->addIncoming(Index, LoopBB);
989 LoopPhi->addIncoming(Zero, OrigBB);
990
991 // Replace the unconditional branch to turn LoopBB into a loop.
992 Instruction *UncondTerm = LoopBB->getTerminator();
993 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, LoopBound), SuccBB,
994 LoopBB);
995 UncondTerm->eraseFromParent();
996 }
997
998 if (RemainingBytes != 0) {
999 uint64_t BytesCopied = BytesCopiedInLoop;
1000
1001 // Residual code is required to move the remaining bytes. In the forward
1002 // case, we emit it in the normal order.
1003 IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
1004 FwdResBuilder.SetCurrentDebugLocation(DbgLoc);
1005 SmallVector<Type *, 5> RemainingOps;
1006 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1007 SrcAS, DstAS, PartSrcAlign,
1008 PartDstAlign);
1009 for (auto *OpTy : RemainingOps)
1010 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1011 }
1012}
1013
1014/// Create a Value of \p DstType that consists of a sequence of copies of
1015/// \p SetValue, using bitcasts and a vector splat.
1017 Value *SetValue, Type *DstType) {
1018 TypeSize DstSize = DL.getTypeStoreSize(DstType);
1019 Type *SetValueType = SetValue->getType();
1020 TypeSize SetValueSize = DL.getTypeStoreSize(SetValueType);
1021 assert(SetValueSize == DL.getTypeAllocSize(SetValueType) &&
1022 "Store size and alloc size of SetValue's type must match");
1023 assert(SetValueSize != 0 && DstSize % SetValueSize == 0 &&
1024 "DstType size must be a multiple of SetValue size");
1025
1026 Value *Result = SetValue;
1027 if (DstSize != SetValueSize) {
1028 if (!SetValueType->isIntegerTy() && !SetValueType->isFloatingPointTy()) {
1029 // If the type cannot be put into a vector, bitcast to iN first.
1030 LLVMContext &Ctx = SetValue->getContext();
1031 Result = B.CreateBitCast(Result, Type::getIntNTy(Ctx, SetValueSize * 8),
1032 "setvalue.toint");
1033 }
1034 // Form a sufficiently large vector consisting of SetValue, repeated.
1035 Result =
1036 B.CreateVectorSplat(DstSize / SetValueSize, Result, "setvalue.splat");
1037 }
1038
1039 // The value has the right size, but we might have to bitcast it to the right
1040 // type.
1041 Result = B.CreateBitCast(Result, DstType, "setvalue.splat.cast");
1042 return Result;
1043}
1044
1045static void
1047 ConstantInt *Len, Value *SetValue, Align DstAlign,
1048 bool IsVolatile, const TargetTransformInfo *TTI,
1049 std::optional<uint64_t> AverageTripCount) {
1050 // No need to expand zero length memsets.
1051 if (Len->isZero())
1052 return;
1053
1054 BasicBlock *PreLoopBB = InsertBefore->getParent();
1055 Function *ParentFunc = PreLoopBB->getParent();
1056 const DataLayout &DL = ParentFunc->getDataLayout();
1057 LLVMContext &Ctx = PreLoopBB->getContext();
1058
1059 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
1060
1061 Type *TypeOfLen = Len->getType();
1062 Type *Int8Type = Type::getInt8Ty(Ctx);
1063 assert(SetValue->getType() == Int8Type && "Can only set bytes");
1064
1065 Type *LoopOpType = Int8Type;
1066 if (TTI) {
1067 // Use the same memory access type as for a memcpy with the same Dst and Src
1068 // alignment and address space.
1069 LoopOpType = TTI->getMemcpyLoopLoweringType(
1070 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1071 }
1072 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
1073 assert(LoopOpSize.isFixed() && "LoopOpType cannot be a scalable vector type");
1074
1075 uint64_t LoopEndCount =
1076 alignDown(Len->getZExtValue(), LoopOpSize.getFixedValue());
1077
1078 if (LoopEndCount != 0) {
1079 Value *SplatSetValue = nullptr;
1080 {
1081 IRBuilder<> PreLoopBuilder(InsertBefore);
1082 SplatSetValue =
1083 createMemSetSplat(DL, PreLoopBuilder, SetValue, LoopOpType);
1084 }
1085
1086 // Don't generate a residual loop, the remaining bytes are set with
1087 // straight-line code.
1088 LoopExpansionInfo LEI = insertLoopExpansion(
1089 InsertBefore, Len, LoopOpSize, 0, "static-memset", AverageTripCount);
1090
1091 // Fill MainLoopBB
1092 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
1093 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
1094
1095 Value *DstGEP =
1096 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
1097
1098 MainLoopBuilder.CreateAlignedStore(SplatSetValue, DstGEP, PartDstAlign,
1099 IsVolatile);
1100
1101 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
1102 "No residual loop was requested");
1103 }
1104
1105 uint64_t BytesSet = LoopEndCount;
1106 uint64_t RemainingBytes = Len->getZExtValue() - BytesSet;
1107 if (RemainingBytes == 0)
1108 return;
1109
1110 IRBuilder<> RBuilder(InsertBefore);
1111
1112 assert(TTI && "there cannot be a residual loop without TTI");
1113 SmallVector<Type *, 5> RemainingOps;
1114 TTI->getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1115 DstAS, DstAS, DstAlign, DstAlign,
1116 std::nullopt);
1117
1118 Type *PreviousOpTy = nullptr;
1119 Value *SplatSetValue = nullptr;
1120 for (auto *OpTy : RemainingOps) {
1121 TypeSize OperandSize = DL.getTypeStoreSize(OpTy);
1122 assert(OperandSize.isFixed() &&
1123 "Operand types cannot be scalable vector types");
1124 Align PartDstAlign(commonAlignment(DstAlign, BytesSet));
1125
1126 // Avoid recomputing the splat SetValue if it's the same as for the last
1127 // iteration.
1128 if (OpTy != PreviousOpTy)
1129 SplatSetValue = createMemSetSplat(DL, RBuilder, SetValue, OpTy);
1130
1131 Value *DstGEP = RBuilder.CreateInBoundsGEP(
1132 Int8Type, DstAddr, ConstantInt::get(TypeOfLen, BytesSet));
1133 RBuilder.CreateAlignedStore(SplatSetValue, DstGEP, PartDstAlign,
1134 IsVolatile);
1135 BytesSet += OperandSize;
1136 PreviousOpTy = OpTy;
1137 }
1138 assert(BytesSet == Len->getZExtValue() &&
1139 "Bytes set should match size in the call!");
1140}
1141
1142static void
1144 Value *Len, Value *SetValue, Align DstAlign,
1145 bool IsVolatile, const TargetTransformInfo *TTI,
1146 std::optional<uint64_t> AverageTripCount) {
1147 BasicBlock *PreLoopBB = InsertBefore->getParent();
1148 Function *ParentFunc = PreLoopBB->getParent();
1149 const DataLayout &DL = ParentFunc->getDataLayout();
1150 LLVMContext &Ctx = PreLoopBB->getContext();
1151
1152 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
1153
1154 Type *Int8Type = Type::getInt8Ty(Ctx);
1155 assert(SetValue->getType() == Int8Type && "Can only set bytes");
1156
1157 Type *LoopOpType = Int8Type;
1158 if (TTI) {
1159 LoopOpType = TTI->getMemcpyLoopLoweringType(
1160 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1161 }
1162 TypeSize LoopOpSize = DL.getTypeStoreSize(LoopOpType);
1163 assert(LoopOpSize.isFixed() && "LoopOpType cannot be a scalable vector type");
1164
1165 Type *ResidualLoopOpType = Int8Type;
1166 TypeSize ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
1167
1168 Value *SplatSetValue = SetValue;
1169 {
1170 IRBuilder<> PreLoopBuilder(InsertBefore);
1171 SplatSetValue = createMemSetSplat(DL, PreLoopBuilder, SetValue, LoopOpType);
1172 }
1173
1174 LoopExpansionInfo LEI =
1175 insertLoopExpansion(InsertBefore, Len, LoopOpSize, ResidualLoopOpSize,
1176 "dynamic-memset", AverageTripCount);
1177
1178 // Fill MainLoopBB
1179 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
1180 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
1181
1182 Value *DstGEP =
1183 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
1184 MainLoopBuilder.CreateAlignedStore(SplatSetValue, DstGEP, PartDstAlign,
1185 IsVolatile);
1186
1187 // Fill ResidualLoopBB
1188 if (!LEI.ResidualLoopIP)
1189 return;
1190
1191 Align ResDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
1192
1193 IRBuilder<> ResLoopBuilder(LEI.ResidualLoopIP);
1194
1195 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
1196 LEI.ResidualLoopIndex);
1197 ResLoopBuilder.CreateAlignedStore(SetValue, ResDstGEP, ResDstAlign,
1198 IsVolatile);
1199}
1200
1201static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
1202 Value *CopyLen, Value *SetValue, Align DstAlign,
1203 std::optional<uint64_t> AverageTripCount,
1204 bool IsVolatile) {
1205 // Currently no longer used for memset, only for memset.pattern.
1206 // TODO: Update the memset.pattern lowering to also use the loop expansion
1207 // framework and remove this function.
1208 Type *TypeOfCopyLen = CopyLen->getType();
1209 BasicBlock *OrigBB = InsertBefore->getParent();
1210 Function *F = OrigBB->getParent();
1211 const DataLayout &DL = F->getDataLayout();
1212 BasicBlock *NewBB =
1213 OrigBB->splitBasicBlock(InsertBefore, "split");
1214 BasicBlock *LoopBB
1215 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
1216
1217 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
1218 IRBuilder<> Builder(OrigBB->getTerminator());
1219 Builder.SetCurrentDebugLocation(DbgLoc);
1220
1221 auto *ToLoopBR = Builder.CreateCondBr(
1222 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
1223 LoopBB);
1224 MDBuilder MDB(F->getContext());
1225 if (AverageTripCount.has_value())
1226 ToLoopBR->setMetadata(LLVMContext::MD_prof,
1228 else
1230
1231 OrigBB->getTerminator()->eraseFromParent();
1232
1233 TypeSize PartSize = DL.getTypeStoreSize(SetValue->getType());
1234 Align PartAlign(commonAlignment(DstAlign, PartSize));
1235
1236 IRBuilder<> LoopBuilder(LoopBB);
1237 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
1238 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
1239 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
1240
1241 LoopBuilder.CreateAlignedStore(
1242 SetValue,
1243 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
1244 PartAlign, IsVolatile);
1245
1246 Value *NewIndex =
1247 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
1248 LoopIndex->addIncoming(NewIndex, LoopBB);
1249
1250 auto *LoopBR = LoopBuilder.CreateCondBr(
1251 LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
1252 if (AverageTripCount.has_value())
1253 setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1},
1254 /*IsExpected=*/false);
1255 else
1257}
1258
1259template <typename T>
1261 if (SE) {
1262 const SCEV *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
1263 const SCEV *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
1264 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
1265 return false;
1266 }
1267 return true;
1268}
1269
1271 const TargetTransformInfo &TTI,
1272 ScalarEvolution *SE) {
1273 bool CanOverlap = canOverlap(Memcpy, SE);
1274 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1275 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
1277 /*InsertBefore=*/Memcpy,
1278 /*SrcAddr=*/Memcpy->getRawSource(),
1279 /*DstAddr=*/Memcpy->getRawDest(),
1280 /*CopyLen=*/CI,
1281 /*SrcAlign=*/Memcpy->getSourceAlign().valueOrOne(),
1282 /*DstAlign=*/Memcpy->getDestAlign().valueOrOne(),
1283 /*SrcIsVolatile=*/Memcpy->isVolatile(),
1284 /*DstIsVolatile=*/Memcpy->isVolatile(),
1285 /*CanOverlap=*/CanOverlap,
1286 /*TTI=*/TTI,
1287 /*AtomicElementSize=*/std::nullopt,
1288 /*AverageTripCount=*/TripCount);
1289 } else {
1291 /*InsertBefore=*/Memcpy,
1292 /*SrcAddr=*/Memcpy->getRawSource(),
1293 /*DstAddr=*/Memcpy->getRawDest(),
1294 /*CopyLen=*/Memcpy->getLength(),
1295 /*SrcAlign=*/Memcpy->getSourceAlign().valueOrOne(),
1296 /*DstAlign=*/Memcpy->getDestAlign().valueOrOne(),
1297 /*SrcIsVolatile=*/Memcpy->isVolatile(),
1298 /*DstIsVolatile=*/Memcpy->isVolatile(),
1299 /*CanOverlap=*/CanOverlap,
1300 /*TTI=*/TTI,
1301 /*AtomicElementSize=*/std::nullopt,
1302 /*AverageTripCount=*/TripCount);
1303 }
1304}
1305
1307 const TargetTransformInfo &TTI) {
1308 Value *CopyLen = Memmove->getLength();
1309 Value *SrcAddr = Memmove->getRawSource();
1310 Value *DstAddr = Memmove->getRawDest();
1311 Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
1312 Align DstAlign = Memmove->getDestAlign().valueOrOne();
1313 bool SrcIsVolatile = Memmove->isVolatile();
1314 bool DstIsVolatile = SrcIsVolatile;
1315 IRBuilder<> CastBuilder(Memmove);
1316 CastBuilder.SetCurrentDebugLocation(Memmove->getStableDebugLoc());
1317
1318 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
1319 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
1320 if (SrcAS != DstAS) {
1321 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1322 // We may not be able to emit a pointer comparison, but we don't have
1323 // to. Expand as memcpy.
1324 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1325 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1327 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1328 SrcIsVolatile, DstIsVolatile,
1329 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1330 } else {
1332 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1333 DstAlign, SrcIsVolatile, DstIsVolatile,
1334 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1335 }
1336
1337 return true;
1338 }
1339
1340 if (!(TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1341 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1342 // We don't know generically if it's legal to introduce an
1343 // addrspacecast. We need to know either if it's legal to insert an
1344 // addrspacecast, or if the address spaces cannot alias.
1345 LLVM_DEBUG(
1346 dbgs() << "Do not know how to expand memmove between different "
1347 "address spaces\n");
1348 return false;
1349 }
1350 }
1351
1352 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1354 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1355 SrcIsVolatile, DstIsVolatile, TTI);
1356 } else {
1358 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1359 SrcIsVolatile, DstIsVolatile, TTI);
1360 }
1361 return true;
1362}
1363
1365 const TargetTransformInfo *TTI) {
1366 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memset);
1367 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memset->getLength())) {
1369 /*InsertBefore=*/Memset,
1370 /*DstAddr=*/Memset->getRawDest(),
1371 /*Len=*/CI,
1372 /*SetValue=*/Memset->getValue(),
1373 /*DstAlign=*/Memset->getDestAlign().valueOrOne(),
1374 /*IsVolatile=*/Memset->isVolatile(),
1375 /*TTI=*/TTI,
1376 /*AverageTripCount=*/AverageTripCount);
1377 } else {
1379 /*InsertBefore=*/Memset,
1380 /*DstAddr=*/Memset->getRawDest(),
1381 /*Len=*/Memset->getLength(),
1382 /*SetValue=*/Memset->getValue(),
1383 /*DstAlign=*/Memset->getDestAlign().valueOrOne(),
1384 /*IsVolatile=*/Memset->isVolatile(),
1385 /*TTI=*/TTI,
1386 /*AverageTripCount=*/AverageTripCount);
1387 }
1388}
1389
1391 const TargetTransformInfo &TTI) {
1392 expandMemSetAsLoop(MemSet, &TTI);
1393}
1394
1396 createMemSetLoop(/*InsertBefore=*/Memset,
1397 /*DstAddr=*/Memset->getRawDest(),
1398 /*CopyLen=*/Memset->getLength(),
1399 /*SetValue=*/Memset->getValue(),
1400 /*DstAlign=*/Memset->getDestAlign().valueOrOne(),
1401 /*AverageTripCount=*/getAverageMemOpLoopTripCount(*Memset),
1402 /*IsVolatile=*/Memset->isVolatile());
1403}
1404
1406 const TargetTransformInfo &TTI,
1407 ScalarEvolution *SE) {
1408 assert(AtomicMemcpy->isAtomic());
1409 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
1411 /*InsertBefore=*/AtomicMemcpy,
1412 /*SrcAddr=*/AtomicMemcpy->getRawSource(),
1413 /*DstAddr=*/AtomicMemcpy->getRawDest(),
1414 /*CopyLen=*/CI,
1415 /*SrcAlign=*/AtomicMemcpy->getSourceAlign().valueOrOne(),
1416 /*DstAlign=*/AtomicMemcpy->getDestAlign().valueOrOne(),
1417 /*SrcIsVolatile=*/AtomicMemcpy->isVolatile(),
1418 /*DstIsVolatile=*/AtomicMemcpy->isVolatile(),
1419 /*CanOverlap=*/false, // SrcAddr & DstAddr may not overlap by spec.
1420 /*TTI=*/TTI,
1421 /*AtomicElementSize=*/AtomicMemcpy->getElementSizeInBytes());
1422 } else {
1424 /*InsertBefore=*/AtomicMemcpy,
1425 /*SrcAddr=*/AtomicMemcpy->getRawSource(),
1426 /*DstAddr=*/AtomicMemcpy->getRawDest(),
1427 /*CopyLen=*/AtomicMemcpy->getLength(),
1428 /*SrcAlign=*/AtomicMemcpy->getSourceAlign().valueOrOne(),
1429 /*DstAlign=*/AtomicMemcpy->getDestAlign().valueOrOne(),
1430 /*SrcIsVolatile=*/AtomicMemcpy->isVolatile(),
1431 /*DstIsVolatile=*/AtomicMemcpy->isVolatile(),
1432 /*CanOverlap=*/false, // SrcAddr & DstAddr may not overlap by spec.
1433 /*TargetTransformInfo=*/TTI,
1434 /*AtomicElementSize=*/AtomicMemcpy->getElementSizeInBytes());
1435 }
1436}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
Definition Execution.cpp:41
#define DEBUG_TYPE
static Value * createMemSetSplat(const DataLayout &DL, IRBuilderBase &B, Value *SetValue, Type *DstType)
Create a Value of DstType that consists of a sequence of copies of SetValue, using bitcasts and a vec...
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > AverageTripCount)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoopUnknownSize(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemSetLoopKnownSize(Instruction *InsertBefore, Value *DstAddr, ConstantInt *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, std::optional< uint64_t > AverageTripCount, bool IsVolatile)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
This file contains the declarations for profiling metadata utility functions.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2324
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1871
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1342
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1952
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2312
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2473
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2308
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1423
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1200
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1406
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1194
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1890
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2787
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
Definition MDBuilder.h:195
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition MDBuilder.cpp:43
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
Definition MDBuilder.h:188
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
TargetTransformInfo TTI
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130