LLVM 23.0.0git
LowerMemIntrinsics.cpp
Go to the documentation of this file.
1//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/IR/IRBuilder.h"
14#include "llvm/IR/MDBuilder.h"
17#include "llvm/Support/Debug.h"
21#include <limits>
22#include <optional>
23
24#define DEBUG_TYPE "lower-mem-intrinsics"
25
26using namespace llvm;
27
28namespace llvm {
30}
31
32/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
33/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
35 Value *OpSize, unsigned OpSizeVal) {
36 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
37 if (isPowerOf2_32(OpSizeVal))
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
40}
41
42/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
43/// opportunities.
44/// If \p RTLoopRemainder is provided, it must be the result of
45/// \c getRuntimeLoopRemainder() with the same arguments.
47 unsigned OpSizeVal,
48 Value *RTLoopRemainder = nullptr) {
49 if (!RTLoopRemainder)
50 RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
51 return B.CreateSub(Len, RTLoopRemainder);
52}
53
54namespace {
55/// Container for the return values of insertLoopExpansion.
56struct LoopExpansionInfo {
57 /// The instruction at the end of the main loop body.
58 Instruction *MainLoopIP = nullptr;
59
60 /// The unit index in the main loop body.
61 Value *MainLoopIndex = nullptr;
62
63 /// The instruction at the end of the residual loop body. Can be nullptr if no
64 /// residual is required.
65 Instruction *ResidualLoopIP = nullptr;
66
67 /// The unit index in the residual loop body. Can be nullptr if no residual is
68 /// required.
69 Value *ResidualLoopIndex = nullptr;
70};
71
72std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
74 return std::nullopt;
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !EC->getCount())
78 return std::nullopt;
79 if (const auto Len = I.getLengthInBytes())
80 return Len->getZExtValue();
81 uint64_t Total = 0;
83 getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize,
84 std::numeric_limits<uint32_t>::max(), Total);
85 if (!Total)
86 return std::nullopt;
87 uint64_t TripCount = 0;
88 for (const auto &P : ProfData)
89 TripCount += P.Count * P.Value;
90 return std::round(1.0 * TripCount / Total);
91}
92
93} // namespace
94
95/// Insert the control flow and loop counters for a memcpy/memset loop
96/// expansion.
97///
98/// This function inserts IR corresponding to the following C code before
99/// \p InsertBefore:
100/// \code
101/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
102/// ResidualUnits = Len - LoopUnits;
103/// MainLoopIndex = 0;
104/// if (LoopUnits > 0) {
105/// do {
106/// // MainLoopIP
107/// MainLoopIndex += MainLoopStep;
108/// } while (MainLoopIndex < LoopUnits);
109/// }
110/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
111/// ResidualLoopIndex = LoopUnits + i;
112/// // ResidualLoopIP
113/// }
114/// \endcode
115///
116/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
117/// loop index is increased in each iteration of the main and residual loops,
118/// respectively. In most cases, the "unit" will be bytes, but larger units are
119/// useful for lowering memset.pattern.
120///
121/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
122/// time if \p Len is a \c ConstantInt.
123/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
124/// to \p MainLoopStep.
125/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
126/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
127static LoopExpansionInfo
129 unsigned MainLoopStep, unsigned ResidualLoopStep,
130 StringRef BBNamePrefix,
131 std::optional<uint64_t> AverageTripCount) {
132 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
133 "ResidualLoopStep must divide MainLoopStep if specified");
134 assert(ResidualLoopStep <= MainLoopStep &&
135 "ResidualLoopStep cannot be larger than MainLoopStep");
136 assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
137 LoopExpansionInfo LEI;
138 BasicBlock *PreLoopBB = InsertBefore->getParent();
139 BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
140 InsertBefore, BBNamePrefix + "-post-expansion");
141 Function *ParentFunc = PreLoopBB->getParent();
142 LLVMContext &Ctx = PreLoopBB->getContext();
143 IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
144
145 // Calculate the main loop trip count and remaining units to cover after the
146 // loop.
147 Type *LenType = Len->getType();
148 IntegerType *ILenType = cast<IntegerType>(LenType);
149 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
150
151 Value *LoopUnits = Len;
152 Value *ResidualUnits = nullptr;
153 // We can make a conditional branch unconditional if we know that the
154 // MainLoop must be executed at least once.
155 bool MustTakeMainLoop = false;
156 if (MainLoopStep != 1) {
157 if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
158 uint64_t TotalUnits = CLen->getZExtValue();
159 uint64_t LoopEndCount = alignDown(TotalUnits, MainLoopStep);
160 uint64_t ResidualCount = TotalUnits - LoopEndCount;
161 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
162 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
163 MustTakeMainLoop = LoopEndCount > 0;
164 // As an optimization, we could skip generating the residual loop if
165 // ResidualCount is known to be 0. However, current uses of this function
166 // don't request a residual loop if the length is constant (they generate
167 // a (potentially empty) sequence of loads and stores instead), so this
168 // optimization would have no effect here.
169 } else {
170 ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
171 CIMainLoopStep, MainLoopStep);
172 LoopUnits = getRuntimeLoopUnits(PreLoopBuilder, Len, CIMainLoopStep,
173 MainLoopStep, ResidualUnits);
174 }
175 } else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
176 MustTakeMainLoop = CLen->getZExtValue() > 0;
177 }
178
179 BasicBlock *MainLoopBB = BasicBlock::Create(
180 Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
181 IRBuilder<> LoopBuilder(MainLoopBB);
182
183 PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
184 LEI.MainLoopIndex = LoopIndex;
185 LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
186
187 Value *NewIndex =
188 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
189 LoopIndex->addIncoming(NewIndex, MainLoopBB);
190
191 // One argument of the addition is a loop-variant PHI, so it must be an
192 // Instruction (i.e., it cannot be a Constant).
193 LEI.MainLoopIP = cast<Instruction>(NewIndex);
194
195 if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
196 // Loop body for the residual accesses.
197 BasicBlock *ResLoopBB =
198 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-body",
199 PreLoopBB->getParent(), PostLoopBB);
200 // BB to check if the residual loop is needed.
201 BasicBlock *ResidualCondBB =
202 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-cond",
203 PreLoopBB->getParent(), ResLoopBB);
204
205 // Enter the MainLoop unless no main loop iteration is required.
206 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
207 if (MustTakeMainLoop)
208 PreLoopBuilder.CreateBr(MainLoopBB);
209 else {
210 auto *BR = PreLoopBuilder.CreateCondBr(
211 PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
212 ResidualCondBB);
213 if (AverageTripCount.has_value()) {
214 MDBuilder MDB(ParentFunc->getContext());
216 {AverageTripCount.value() % MainLoopStep, 1},
217 /*IsExpected=*/false);
218 } else {
220 }
221 }
222 PreLoopBB->getTerminator()->eraseFromParent();
223
224 // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
225 // the residual condition BB.
226 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
227 MainLoopBB, ResidualCondBB);
228
229 // Determine if we need to branch to the residual loop or bypass it.
230 IRBuilder<> RCBuilder(ResidualCondBB);
231 RCBuilder.CreateCondBr(RCBuilder.CreateICmpNE(ResidualUnits, Zero),
232 ResLoopBB, PostLoopBB);
233
234 IRBuilder<> ResBuilder(ResLoopBB);
235 PHINode *ResidualIndex =
236 ResBuilder.CreatePHI(LenType, 2, "residual-loop-index");
237 ResidualIndex->addIncoming(Zero, ResidualCondBB);
238
239 // Add the offset at the end of the main loop to the loop counter of the
240 // residual loop to get the proper index.
241 Value *FullOffset = ResBuilder.CreateAdd(LoopUnits, ResidualIndex);
242 LEI.ResidualLoopIndex = FullOffset;
243
244 Value *ResNewIndex = ResBuilder.CreateAdd(
245 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
246 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
247
248 // One argument of the addition is a loop-variant PHI, so it must be an
249 // Instruction (i.e., it cannot be a Constant).
250 LEI.ResidualLoopIP = cast<Instruction>(ResNewIndex);
251
252 // Stay in the residual loop until all ResidualUnits are handled.
253 ResBuilder.CreateCondBr(
254 ResBuilder.CreateICmpULT(ResNewIndex, ResidualUnits), ResLoopBB,
255 PostLoopBB);
256 } else {
257 // There is no need for a residual loop after the main loop. We do however
258 // need to patch up the control flow by creating the terminators for the
259 // preloop block and the main loop.
260
261 // Enter the MainLoop unless no main loop iteration is required.
262 if (MustTakeMainLoop) {
263 PreLoopBuilder.CreateBr(MainLoopBB);
264 } else {
265 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
266 MDBuilder B(ParentFunc->getContext());
267 PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
268 MainLoopBB, PostLoopBB,
269 B.createLikelyBranchWeights());
270 }
271 PreLoopBB->getTerminator()->eraseFromParent();
272 // Stay in the MainLoop until we have handled all the LoopUnits.
273 auto *Br = LoopBuilder.CreateCondBr(
274 LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
275 if (AverageTripCount.has_value())
276 setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
277 /*IsExpected=*/false);
278 else
280 }
281 return LEI;
282}
283
285 Value *DstAddr, ConstantInt *CopyLen,
286 Align SrcAlign, Align DstAlign,
287 bool SrcIsVolatile, bool DstIsVolatile,
288 bool CanOverlap,
290 std::optional<uint32_t> AtomicElementSize,
291 std::optional<uint64_t> AverageTripCount) {
292 // No need to expand zero length copies.
293 if (CopyLen->isZero())
294 return;
295
296 BasicBlock *PreLoopBB = InsertBefore->getParent();
297 Function *ParentFunc = PreLoopBB->getParent();
298 LLVMContext &Ctx = PreLoopBB->getContext();
299 const DataLayout &DL = ParentFunc->getDataLayout();
300 MDBuilder MDB(Ctx);
301 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
302 StringRef Name = "MemCopyAliasScope";
303 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
304
305 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
306 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
307
308 Type *TypeOfCopyLen = CopyLen->getType();
309 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
310 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
311 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
312 "Atomic memcpy lowering is not supported for vector operand type");
313
314 Type *Int8Type = Type::getInt8Ty(Ctx);
315 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
316 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
317 "Atomic memcpy lowering is not supported for selected operand size");
318
319 uint64_t LoopEndCount = alignDown(CopyLen->getZExtValue(), LoopOpSize);
320
321 // Skip the loop expansion entirely if the loop would never be taken.
322 if (LoopEndCount != 0) {
323 LoopExpansionInfo LEI =
324 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
325 "static-memcpy", AverageTripCount);
326
327 // Fill MainLoopBB
328 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
329 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
330 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
331
332 // If we used LoopOpType as GEP element type, we would iterate over the
333 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
334 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
335 // byte offsets computed from the TypeStoreSize.
336 Value *SrcGEP =
337 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
338 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
339 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
340 if (!CanOverlap) {
341 // Set alias scope for loads.
342 Load->setMetadata(LLVMContext::MD_alias_scope,
343 MDNode::get(Ctx, NewScope));
344 }
345 Value *DstGEP =
346 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
347 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
348 Load, DstGEP, PartDstAlign, DstIsVolatile);
349 if (!CanOverlap) {
350 // Indicate that stores don't overlap loads.
351 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
352 }
353 if (AtomicElementSize) {
354 Load->setAtomic(AtomicOrdering::Unordered);
355 Store->setAtomic(AtomicOrdering::Unordered);
356 }
357 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
358 "No residual loop was requested");
359 }
360
361 // Copy the remaining bytes with straight-line code.
362 uint64_t BytesCopied = LoopEndCount;
363 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
364 if (RemainingBytes == 0)
365 return;
366
367 IRBuilder<> RBuilder(InsertBefore);
368 SmallVector<Type *, 5> RemainingOps;
369 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
370 SrcAS, DstAS, SrcAlign, DstAlign,
371 AtomicElementSize);
372
373 for (auto *OpTy : RemainingOps) {
374 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
375 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
376
377 unsigned OperandSize = DL.getTypeStoreSize(OpTy);
378 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
379 "Atomic memcpy lowering is not supported for selected operand size");
380
381 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
382 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
383 LoadInst *Load =
384 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
385 if (!CanOverlap) {
386 // Set alias scope for loads.
387 Load->setMetadata(LLVMContext::MD_alias_scope,
388 MDNode::get(Ctx, NewScope));
389 }
390 Value *DstGEP = RBuilder.CreateInBoundsGEP(
391 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
392 StoreInst *Store =
393 RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
394 if (!CanOverlap) {
395 // Indicate that stores don't overlap loads.
396 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
397 }
398 if (AtomicElementSize) {
399 Load->setAtomic(AtomicOrdering::Unordered);
400 Store->setAtomic(AtomicOrdering::Unordered);
401 }
402 BytesCopied += OperandSize;
403 }
404 assert(BytesCopied == CopyLen->getZExtValue() &&
405 "Bytes copied should match size in the call!");
406}
407
409 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
410 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
411 bool CanOverlap, const TargetTransformInfo &TTI,
412 std::optional<uint32_t> AtomicElementSize,
413 std::optional<uint64_t> AverageTripCount) {
414 BasicBlock *PreLoopBB = InsertBefore->getParent();
415 Function *ParentFunc = PreLoopBB->getParent();
416 const DataLayout &DL = ParentFunc->getDataLayout();
417 LLVMContext &Ctx = PreLoopBB->getContext();
418 MDBuilder MDB(Ctx);
419 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
420 StringRef Name = "MemCopyAliasScope";
421 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
422
423 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
424 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
425
426 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
427 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
428 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
429 "Atomic memcpy lowering is not supported for vector operand type");
430 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
431 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
432 "Atomic memcpy lowering is not supported for selected operand size");
433
434 Type *Int8Type = Type::getInt8Ty(Ctx);
435
436 Type *ResidualLoopOpType = AtomicElementSize
437 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
438 : Int8Type;
439 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
440 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
441 "Store size is expected to match type size");
442
443 LoopExpansionInfo LEI =
444 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
445 "dynamic-memcpy", AverageTripCount);
446
447 // Fill MainLoopBB
448 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
449 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
450 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
451
452 // If we used LoopOpType as GEP element type, we would iterate over the
453 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
454 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use byte
455 // offsets computed from the TypeStoreSize.
456 Value *SrcGEP =
457 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
458 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
459 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
460 if (!CanOverlap) {
461 // Set alias scope for loads.
462 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
463 }
464 Value *DstGEP =
465 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
466 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
467 Load, DstGEP, PartDstAlign, DstIsVolatile);
468 if (!CanOverlap) {
469 // Indicate that stores don't overlap loads.
470 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
471 }
472 if (AtomicElementSize) {
475 }
476
477 // Fill ResidualLoopBB.
478 if (!LEI.ResidualLoopIP)
479 return;
480
481 Align ResSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
482 Align ResDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
483
484 IRBuilder<> ResLoopBuilder(LEI.ResidualLoopIP);
485 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
486 LEI.ResidualLoopIndex);
487 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
488 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
489 if (!CanOverlap) {
490 // Set alias scope for loads.
491 ResLoad->setMetadata(LLVMContext::MD_alias_scope,
492 MDNode::get(Ctx, NewScope));
493 }
494 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
495 LEI.ResidualLoopIndex);
496 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
497 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
498 if (!CanOverlap) {
499 // Indicate that stores don't overlap loads.
500 ResStore->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
501 }
502 if (AtomicElementSize) {
505 }
506}
507
508// If \p Addr1 and \p Addr2 are pointers to different address spaces, create an
509// addresspacecast to obtain a pair of pointers in the same addressspace. The
510// caller needs to ensure that addrspacecasting is possible.
511// No-op if the pointers are in the same address space.
512static std::pair<Value *, Value *>
514 const TargetTransformInfo &TTI) {
515 Value *ResAddr1 = Addr1;
516 Value *ResAddr2 = Addr2;
517
518 unsigned AS1 = cast<PointerType>(Addr1->getType())->getAddressSpace();
519 unsigned AS2 = cast<PointerType>(Addr2->getType())->getAddressSpace();
520 if (AS1 != AS2) {
521 if (TTI.isValidAddrSpaceCast(AS2, AS1))
522 ResAddr2 = B.CreateAddrSpaceCast(Addr2, Addr1->getType());
523 else if (TTI.isValidAddrSpaceCast(AS1, AS2))
524 ResAddr1 = B.CreateAddrSpaceCast(Addr1, Addr2->getType());
525 else
526 llvm_unreachable("Can only lower memmove between address spaces if they "
527 "support addrspacecast");
528 }
529 return {ResAddr1, ResAddr2};
530}
531
532// Lower memmove to IR. memmove is required to correctly copy overlapping memory
533// regions; therefore, it has to check the relative positions of the source and
534// destination pointers and choose the copy direction accordingly.
535//
536// The code below is an IR rendition of this C function:
537//
538// void* memmove(void* dst, const void* src, size_t n) {
539// unsigned char* d = dst;
540// const unsigned char* s = src;
541// if (s < d) {
542// // copy backwards
543// while (n--) {
544// d[n] = s[n];
545// }
546// } else {
547// // copy forward
548// for (size_t i = 0; i < n; ++i) {
549// d[i] = s[i];
550// }
551// }
552// return dst;
553// }
554//
555// If the TargetTransformInfo specifies a wider MemcpyLoopLoweringType, it is
556// used for the memory accesses in the loops. Then, additional loops with
557// byte-wise accesses are added for the remaining bytes.
559 Value *SrcAddr, Value *DstAddr,
560 Value *CopyLen, Align SrcAlign,
561 Align DstAlign, bool SrcIsVolatile,
562 bool DstIsVolatile,
563 const TargetTransformInfo &TTI) {
564 Type *TypeOfCopyLen = CopyLen->getType();
565 BasicBlock *OrigBB = InsertBefore->getParent();
566 Function *F = OrigBB->getParent();
567 const DataLayout &DL = F->getDataLayout();
568 LLVMContext &Ctx = OrigBB->getContext();
569 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
570 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
571
572 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
573 SrcAlign, DstAlign);
574 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
575 Type *Int8Type = Type::getInt8Ty(Ctx);
576 bool LoopOpIsInt8 = LoopOpType == Int8Type;
577
578 // If the memory accesses are wider than one byte, residual loops with
579 // i8-accesses are required to move remaining bytes.
580 bool RequiresResidual = !LoopOpIsInt8;
581
582 Type *ResidualLoopOpType = Int8Type;
583 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
584
585 // Calculate the loop trip count and remaining bytes to copy after the loop.
586 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
587 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
588 ConstantInt *CIResidualLoopOpSize =
589 ConstantInt::get(ILengthType, ResidualLoopOpSize);
590 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
591
592 IRBuilder<> PLBuilder(InsertBefore);
593
594 Value *RuntimeLoopBytes = CopyLen;
595 Value *RuntimeLoopRemainder = nullptr;
596 Value *SkipResidualCondition = nullptr;
597 if (RequiresResidual) {
598 RuntimeLoopRemainder =
599 getRuntimeLoopRemainder(PLBuilder, CopyLen, CILoopOpSize, LoopOpSize);
600 RuntimeLoopBytes = getRuntimeLoopUnits(PLBuilder, CopyLen, CILoopOpSize,
601 LoopOpSize, RuntimeLoopRemainder);
602 SkipResidualCondition =
603 PLBuilder.CreateICmpEQ(RuntimeLoopRemainder, Zero, "skip_residual");
604 }
605 Value *SkipMainCondition =
606 PLBuilder.CreateICmpEQ(RuntimeLoopBytes, Zero, "skip_main");
607
608 // Create the a comparison of src and dst, based on which we jump to either
609 // the forward-copy part of the function (if src >= dst) or the backwards-copy
610 // part (if src < dst).
611 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
612 // structure. Its block terminators (unconditional branches) are replaced by
613 // the appropriate conditional branches when the loop is built.
614 // If the pointers are in different address spaces, they need to be converted
615 // to a compatible one. Cases where memory ranges in the different address
616 // spaces cannot overlap are lowered as memcpy and not handled here.
617 auto [CmpSrcAddr, CmpDstAddr] =
618 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
619 Value *PtrCompare =
620 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
621 Instruction *ThenTerm, *ElseTerm;
622 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
623 &ThenTerm, &ElseTerm);
624
625 // If the LoopOpSize is greater than 1, each part of the function consists of
626 // four blocks:
627 // memmove_copy_backwards:
628 // skip the residual loop when 0 iterations are required
629 // memmove_bwd_residual_loop:
630 // copy the last few bytes individually so that the remaining length is
631 // a multiple of the LoopOpSize
632 // memmove_bwd_middle: skip the main loop when 0 iterations are required
633 // memmove_bwd_main_loop: the actual backwards loop BB with wide accesses
634 // memmove_copy_forward: skip the main loop when 0 iterations are required
635 // memmove_fwd_main_loop: the actual forward loop BB with wide accesses
636 // memmove_fwd_middle: skip the residual loop when 0 iterations are required
637 // memmove_fwd_residual_loop: copy the last few bytes individually
638 //
639 // The main and residual loop are switched between copying forward and
640 // backward so that the residual loop always operates on the end of the moved
641 // range. This is based on the assumption that buffers whose start is aligned
642 // with the LoopOpSize are more common than buffers whose end is.
643 //
644 // If the LoopOpSize is 1, each part of the function consists of two blocks:
645 // memmove_copy_backwards: skip the loop when 0 iterations are required
646 // memmove_bwd_main_loop: the actual backwards loop BB
647 // memmove_copy_forward: skip the loop when 0 iterations are required
648 // memmove_fwd_main_loop: the actual forward loop BB
649 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
650 CopyBackwardsBB->setName("memmove_copy_backwards");
651 BasicBlock *CopyForwardBB = ElseTerm->getParent();
652 CopyForwardBB->setName("memmove_copy_forward");
653 BasicBlock *ExitBB = InsertBefore->getParent();
654 ExitBB->setName("memmove_done");
655
656 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
657 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
658
659 // Accesses in the residual loops do not share the same alignment as those in
660 // the main loops.
661 Align ResidualSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
662 Align ResidualDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
663
664 // Copying backwards.
665 {
666 BasicBlock *MainLoopBB = BasicBlock::Create(
667 F->getContext(), "memmove_bwd_main_loop", F, CopyForwardBB);
668
669 // The predecessor of the memmove_bwd_main_loop. Updated in the
670 // following if a residual loop is emitted first.
671 BasicBlock *PredBB = CopyBackwardsBB;
672
673 if (RequiresResidual) {
674 // backwards residual loop
675 BasicBlock *ResidualLoopBB = BasicBlock::Create(
676 F->getContext(), "memmove_bwd_residual_loop", F, MainLoopBB);
677 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
678 PHINode *ResidualLoopPhi = ResidualLoopBuilder.CreatePHI(ILengthType, 0);
679 Value *ResidualIndex = ResidualLoopBuilder.CreateSub(
680 ResidualLoopPhi, CIResidualLoopOpSize, "bwd_residual_index");
681 // If we used LoopOpType as GEP element type, we would iterate over the
682 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes,
683 // i.e., we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore,
684 // use byte offsets computed from the TypeStoreSize.
685 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
686 ResidualIndex);
687 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
688 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
689 "element");
690 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
691 ResidualIndex);
692 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
693 ResidualDstAlign, DstIsVolatile);
694
695 // After the residual loop, go to an intermediate block.
696 BasicBlock *IntermediateBB = BasicBlock::Create(
697 F->getContext(), "memmove_bwd_middle", F, MainLoopBB);
698 // Later code expects a terminator in the PredBB.
699 IRBuilder<> IntermediateBuilder(IntermediateBB);
700 IntermediateBuilder.CreateUnreachable();
701 ResidualLoopBuilder.CreateCondBr(
702 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
703 IntermediateBB, ResidualLoopBB);
704
705 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
706 ResidualLoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
707
708 // How to get to the residual:
709 BranchInst::Create(IntermediateBB, ResidualLoopBB, SkipResidualCondition,
710 ThenTerm->getIterator());
711 ThenTerm->eraseFromParent();
712
713 PredBB = IntermediateBB;
714 }
715
716 // main loop
717 IRBuilder<> MainLoopBuilder(MainLoopBB);
718 PHINode *MainLoopPhi = MainLoopBuilder.CreatePHI(ILengthType, 0);
719 Value *MainIndex =
720 MainLoopBuilder.CreateSub(MainLoopPhi, CILoopOpSize, "bwd_main_index");
721 Value *LoadGEP =
722 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainIndex);
723 Value *Element = MainLoopBuilder.CreateAlignedLoad(
724 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
725 Value *StoreGEP =
726 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainIndex);
727 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
728 DstIsVolatile);
729 MainLoopBuilder.CreateCondBr(MainLoopBuilder.CreateICmpEQ(MainIndex, Zero),
730 ExitBB, MainLoopBB);
731 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
732 MainLoopPhi->addIncoming(RuntimeLoopBytes, PredBB);
733
734 // How to get to the main loop:
735 Instruction *PredBBTerm = PredBB->getTerminator();
736 BranchInst::Create(ExitBB, MainLoopBB, SkipMainCondition,
737 PredBBTerm->getIterator());
738 PredBBTerm->eraseFromParent();
739 }
740
741 // Copying forward.
742 // main loop
743 {
744 BasicBlock *MainLoopBB =
745 BasicBlock::Create(F->getContext(), "memmove_fwd_main_loop", F, ExitBB);
746 IRBuilder<> MainLoopBuilder(MainLoopBB);
747 PHINode *MainLoopPhi =
748 MainLoopBuilder.CreatePHI(ILengthType, 0, "fwd_main_index");
749 Value *LoadGEP =
750 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainLoopPhi);
751 Value *Element = MainLoopBuilder.CreateAlignedLoad(
752 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
753 Value *StoreGEP =
754 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainLoopPhi);
755 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
756 DstIsVolatile);
757 Value *MainIndex = MainLoopBuilder.CreateAdd(MainLoopPhi, CILoopOpSize);
758 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
759 MainLoopPhi->addIncoming(Zero, CopyForwardBB);
760
761 Instruction *CopyFwdBBTerm = CopyForwardBB->getTerminator();
762 BasicBlock *SuccessorBB = ExitBB;
763 if (RequiresResidual)
764 SuccessorBB =
765 BasicBlock::Create(F->getContext(), "memmove_fwd_middle", F, ExitBB);
766
767 // leaving or staying in the main loop
768 MainLoopBuilder.CreateCondBr(
769 MainLoopBuilder.CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
770 MainLoopBB);
771
772 // getting in or skipping the main loop
773 BranchInst::Create(SuccessorBB, MainLoopBB, SkipMainCondition,
774 CopyFwdBBTerm->getIterator());
775 CopyFwdBBTerm->eraseFromParent();
776
777 if (RequiresResidual) {
778 BasicBlock *IntermediateBB = SuccessorBB;
779 IRBuilder<> IntermediateBuilder(IntermediateBB);
780 BasicBlock *ResidualLoopBB = BasicBlock::Create(
781 F->getContext(), "memmove_fwd_residual_loop", F, ExitBB);
782 IntermediateBuilder.CreateCondBr(SkipResidualCondition, ExitBB,
783 ResidualLoopBB);
784
785 // Residual loop
786 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
787 PHINode *ResidualLoopPhi =
788 ResidualLoopBuilder.CreatePHI(ILengthType, 0, "fwd_residual_index");
789 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
790 ResidualLoopPhi);
791 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
792 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
793 "element");
794 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
795 ResidualLoopPhi);
796 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
797 ResidualDstAlign, DstIsVolatile);
798 Value *ResidualIndex =
799 ResidualLoopBuilder.CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
800 ResidualLoopBuilder.CreateCondBr(
801 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
802 ResidualLoopBB);
803 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
804 ResidualLoopPhi->addIncoming(RuntimeLoopBytes, IntermediateBB);
805 }
806 }
807}
808
809// Similar to createMemMoveLoopUnknownSize, only the trip counts are computed at
810// compile time, obsolete loops and branches are omitted, and the residual code
811// is straight-line code instead of a loop.
812static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
813 Value *SrcAddr, Value *DstAddr,
814 ConstantInt *CopyLen, Align SrcAlign,
815 Align DstAlign, bool SrcIsVolatile,
816 bool DstIsVolatile,
817 const TargetTransformInfo &TTI) {
818 // No need to expand zero length moves.
819 if (CopyLen->isZero())
820 return;
821
822 Type *TypeOfCopyLen = CopyLen->getType();
823 BasicBlock *OrigBB = InsertBefore->getParent();
824 Function *F = OrigBB->getParent();
825 const DataLayout &DL = F->getDataLayout();
826 LLVMContext &Ctx = OrigBB->getContext();
827 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
828 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
829
830 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
831 SrcAlign, DstAlign);
832 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
833 Type *Int8Type = Type::getInt8Ty(Ctx);
834
835 // Calculate the loop trip count and remaining bytes to copy after the loop.
836 uint64_t BytesCopiedInLoop = alignDown(CopyLen->getZExtValue(), LoopOpSize);
837 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopiedInLoop;
838
839 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
840 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
841 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
842 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
843
844 IRBuilder<> PLBuilder(InsertBefore);
845
846 auto [CmpSrcAddr, CmpDstAddr] =
847 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
848 Value *PtrCompare =
849 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
850 Instruction *ThenTerm, *ElseTerm;
851 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
852 &ThenTerm, &ElseTerm);
853
854 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
855 BasicBlock *CopyForwardBB = ElseTerm->getParent();
856 BasicBlock *ExitBB = InsertBefore->getParent();
857 ExitBB->setName("memmove_done");
858
859 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
860 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
861
862 // Helper function to generate a load/store pair of a given type in the
863 // residual. Used in the forward and backward branches.
864 auto GenerateResidualLdStPair = [&](Type *OpTy, IRBuilderBase &Builder,
865 uint64_t &BytesCopied) {
866 Align ResSrcAlign(commonAlignment(SrcAlign, BytesCopied));
867 Align ResDstAlign(commonAlignment(DstAlign, BytesCopied));
868
869 unsigned OperandSize = DL.getTypeStoreSize(OpTy);
870
871 // If we used LoopOpType as GEP element type, we would iterate over the
872 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
873 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
874 // byte offsets computed from the TypeStoreSize.
875 Value *SrcGEP = Builder.CreateInBoundsGEP(
876 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
877 LoadInst *Load =
878 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
879 Value *DstGEP = Builder.CreateInBoundsGEP(
880 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
881 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
882 BytesCopied += OperandSize;
883 };
884
885 // Copying backwards.
886 if (RemainingBytes != 0) {
887 CopyBackwardsBB->setName("memmove_bwd_residual");
888 uint64_t BytesCopied = BytesCopiedInLoop;
889
890 // Residual code is required to move the remaining bytes. We need the same
891 // instructions as in the forward case, only in reverse. So we generate code
892 // the same way, except that we change the IRBuilder insert point for each
893 // load/store pair so that each one is inserted before the previous one
894 // instead of after it.
895 IRBuilder<> BwdResBuilder(CopyBackwardsBB,
896 CopyBackwardsBB->getFirstNonPHIIt());
897 SmallVector<Type *, 5> RemainingOps;
898 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
899 SrcAS, DstAS, PartSrcAlign,
900 PartDstAlign);
901 for (auto *OpTy : RemainingOps) {
902 // reverse the order of the emitted operations
903 BwdResBuilder.SetInsertPoint(CopyBackwardsBB,
904 CopyBackwardsBB->getFirstNonPHIIt());
905 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
906 }
907 }
908 if (BytesCopiedInLoop != 0) {
909 BasicBlock *LoopBB = CopyBackwardsBB;
910 BasicBlock *PredBB = OrigBB;
911 if (RemainingBytes != 0) {
912 // if we introduce residual code, it needs its separate BB
913 LoopBB = CopyBackwardsBB->splitBasicBlock(
914 CopyBackwardsBB->getTerminator(), "memmove_bwd_loop");
915 PredBB = CopyBackwardsBB;
916 } else {
917 CopyBackwardsBB->setName("memmove_bwd_loop");
918 }
919 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
920 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0);
921 Value *Index = LoopBuilder.CreateSub(LoopPhi, CILoopOpSize, "bwd_index");
922 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, Index);
923 Value *Element = LoopBuilder.CreateAlignedLoad(
924 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
925 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, Index);
926 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
927 DstIsVolatile);
928
929 // Replace the unconditional branch introduced by
930 // SplitBlockAndInsertIfThenElse to turn LoopBB into a loop.
931 Instruction *UncondTerm = LoopBB->getTerminator();
932 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, Zero), ExitBB,
933 LoopBB);
934 UncondTerm->eraseFromParent();
935
936 LoopPhi->addIncoming(Index, LoopBB);
937 LoopPhi->addIncoming(LoopBound, PredBB);
938 }
939
940 // Copying forward.
941 BasicBlock *FwdResidualBB = CopyForwardBB;
942 if (BytesCopiedInLoop != 0) {
943 CopyForwardBB->setName("memmove_fwd_loop");
944 BasicBlock *LoopBB = CopyForwardBB;
945 BasicBlock *SuccBB = ExitBB;
946 if (RemainingBytes != 0) {
947 // if we introduce residual code, it needs its separate BB
948 SuccBB = CopyForwardBB->splitBasicBlock(CopyForwardBB->getTerminator(),
949 "memmove_fwd_residual");
950 FwdResidualBB = SuccBB;
951 }
952 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
953 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0, "fwd_index");
954 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopPhi);
955 Value *Element = LoopBuilder.CreateAlignedLoad(
956 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
957 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopPhi);
958 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
959 DstIsVolatile);
960 Value *Index = LoopBuilder.CreateAdd(LoopPhi, CILoopOpSize);
961 LoopPhi->addIncoming(Index, LoopBB);
962 LoopPhi->addIncoming(Zero, OrigBB);
963
964 // Replace the unconditional branch to turn LoopBB into a loop.
965 Instruction *UncondTerm = LoopBB->getTerminator();
966 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, LoopBound), SuccBB,
967 LoopBB);
968 UncondTerm->eraseFromParent();
969 }
970
971 if (RemainingBytes != 0) {
972 uint64_t BytesCopied = BytesCopiedInLoop;
973
974 // Residual code is required to move the remaining bytes. In the forward
975 // case, we emit it in the normal order.
976 IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
977 SmallVector<Type *, 5> RemainingOps;
978 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
979 SrcAS, DstAS, PartSrcAlign,
980 PartDstAlign);
981 for (auto *OpTy : RemainingOps)
982 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
983 }
984}
985
986static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
987 Value *CopyLen, Value *SetValue, Align DstAlign,
988 std::optional<uint64_t> AverageTripCount,
989 bool IsVolatile) {
990 Type *TypeOfCopyLen = CopyLen->getType();
991 BasicBlock *OrigBB = InsertBefore->getParent();
992 Function *F = OrigBB->getParent();
993 const DataLayout &DL = F->getDataLayout();
994 BasicBlock *NewBB =
995 OrigBB->splitBasicBlock(InsertBefore, "split");
996 BasicBlock *LoopBB
997 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
998
999 IRBuilder<> Builder(OrigBB->getTerminator());
1000
1001 auto *ToLoopBR = Builder.CreateCondBr(
1002 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
1003 LoopBB);
1004 MDBuilder MDB(F->getContext());
1005 if (AverageTripCount.has_value())
1006 ToLoopBR->setMetadata(LLVMContext::MD_prof,
1008 else
1010
1011 OrigBB->getTerminator()->eraseFromParent();
1012
1013 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
1014 Align PartAlign(commonAlignment(DstAlign, PartSize));
1015
1016 IRBuilder<> LoopBuilder(LoopBB);
1017 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
1018 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
1019
1020 LoopBuilder.CreateAlignedStore(
1021 SetValue,
1022 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
1023 PartAlign, IsVolatile);
1024
1025 Value *NewIndex =
1026 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
1027 LoopIndex->addIncoming(NewIndex, LoopBB);
1028
1029 auto *LoopBR = LoopBuilder.CreateCondBr(
1030 LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
1031 if (AverageTripCount.has_value())
1032 setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1},
1033 /*IsExpected=*/false);
1034 else
1036}
1037
1038template <typename T>
1040 if (SE) {
1041 const SCEV *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
1042 const SCEV *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
1043 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
1044 return false;
1045 }
1046 return true;
1047}
1048
1050 const TargetTransformInfo &TTI,
1051 ScalarEvolution *SE) {
1052 bool CanOverlap = canOverlap(Memcpy, SE);
1053 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1054 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
1056 /* InsertBefore */ Memcpy,
1057 /* SrcAddr */ Memcpy->getRawSource(),
1058 /* DstAddr */ Memcpy->getRawDest(),
1059 /* CopyLen */ CI,
1060 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1061 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
1062 /* SrcIsVolatile */ Memcpy->isVolatile(),
1063 /* DstIsVolatile */ Memcpy->isVolatile(),
1064 /* CanOverlap */ CanOverlap,
1065 /* TargetTransformInfo */ TTI,
1066 /* AtomicElementSize */ std::nullopt,
1067 /* AverageTripCount */ TripCount);
1068 } else {
1070 /* InsertBefore */ Memcpy,
1071 /* SrcAddr */ Memcpy->getRawSource(),
1072 /* DstAddr */ Memcpy->getRawDest(),
1073 /* CopyLen */ Memcpy->getLength(),
1074 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1075 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
1076 /* SrcIsVolatile */ Memcpy->isVolatile(),
1077 /* DstIsVolatile */ Memcpy->isVolatile(),
1078 /* CanOverlap */ CanOverlap,
1079 /* TargetTransformInfo */ TTI,
1080 /* AtomicElementSize */ std::nullopt,
1081 /* AverageTripCount */ TripCount);
1082 }
1083}
1084
1086 const TargetTransformInfo &TTI) {
1087 Value *CopyLen = Memmove->getLength();
1088 Value *SrcAddr = Memmove->getRawSource();
1089 Value *DstAddr = Memmove->getRawDest();
1090 Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
1091 Align DstAlign = Memmove->getDestAlign().valueOrOne();
1092 bool SrcIsVolatile = Memmove->isVolatile();
1093 bool DstIsVolatile = SrcIsVolatile;
1094 IRBuilder<> CastBuilder(Memmove);
1095
1096 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
1097 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
1098 if (SrcAS != DstAS) {
1099 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1100 // We may not be able to emit a pointer comparison, but we don't have
1101 // to. Expand as memcpy.
1102 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1103 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1105 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1106 SrcIsVolatile, DstIsVolatile,
1107 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1108 } else {
1110 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1111 DstAlign, SrcIsVolatile, DstIsVolatile,
1112 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1113 }
1114
1115 return true;
1116 }
1117
1118 if (!(TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1119 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1120 // We don't know generically if it's legal to introduce an
1121 // addrspacecast. We need to know either if it's legal to insert an
1122 // addrspacecast, or if the address spaces cannot alias.
1123 LLVM_DEBUG(
1124 dbgs() << "Do not know how to expand memmove between different "
1125 "address spaces\n");
1126 return false;
1127 }
1128 }
1129
1130 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1132 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1133 SrcIsVolatile, DstIsVolatile, TTI);
1134 } else {
1136 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1137 SrcIsVolatile, DstIsVolatile, TTI);
1138 }
1139 return true;
1140}
1141
1143 createMemSetLoop(/* InsertBefore */ Memset,
1144 /* DstAddr */ Memset->getRawDest(),
1145 /* CopyLen */ Memset->getLength(),
1146 /* SetValue */ Memset->getValue(),
1147 /* Alignment */ Memset->getDestAlign().valueOrOne(),
1148 /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
1149 /* IsVolatile */ Memset->isVolatile());
1150}
1151
1153 createMemSetLoop(/* InsertBefore=*/Memset,
1154 /* DstAddr=*/Memset->getRawDest(),
1155 /* CopyLen=*/Memset->getLength(),
1156 /* SetValue=*/Memset->getValue(),
1157 /* Alignment=*/Memset->getDestAlign().valueOrOne(),
1158 /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
1159 /* IsVolatile */ Memset->isVolatile());
1160}
1161
1163 const TargetTransformInfo &TTI,
1164 ScalarEvolution *SE) {
1165 assert(AtomicMemcpy->isAtomic());
1166 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
1168 /* InsertBefore */ AtomicMemcpy,
1169 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1170 /* DstAddr */ AtomicMemcpy->getRawDest(),
1171 /* CopyLen */ CI,
1172 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1173 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
1174 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1175 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1176 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1177 /* TargetTransformInfo */ TTI,
1178 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1179 } else {
1181 /* InsertBefore */ AtomicMemcpy,
1182 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1183 /* DstAddr */ AtomicMemcpy->getRawDest(),
1184 /* CopyLen */ AtomicMemcpy->getLength(),
1185 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1186 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
1187 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1188 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1189 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1190 /* TargetTransformInfo */ TTI,
1191 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1192 }
1193}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
Definition Execution.cpp:41
#define DEBUG_TYPE
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > AverageTripCount)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, std::optional< uint64_t > AverageTripCount, bool IsVolatile)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
This file contains the declarations for profiling metadata utility functions.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1339
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2794
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
Definition MDBuilder.h:181
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition MDBuilder.cpp:43
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
Definition MDBuilder.h:174
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
TargetTransformInfo TTI
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130