LLVM 23.0.0git
NVPTXLowerAggrCopies.cpp
Go to the documentation of this file.
1//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
11// the size is large or is not a compile-time constant.
12//
13//===----------------------------------------------------------------------===//
14
16#include "NVPTX.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/IR/Module.h"
32
33#define DEBUG_TYPE "nvptx"
34
35using namespace llvm;
36
37namespace {
38
39// actual analysis class, which is a functionpass
40struct NVPTXLowerAggrCopies : public FunctionPass {
41 static char ID;
42
43 NVPTXLowerAggrCopies() : FunctionPass(ID) {}
44
45 void getAnalysisUsage(AnalysisUsage &AU) const override {
49 }
50
51 bool runOnFunction(Function &F) override;
52
53 static const unsigned MaxAggrCopySize = 128;
54
55 StringRef getPassName() const override {
56 return "Lower aggregate copies/intrinsics into loops";
57 }
58};
59
60char NVPTXLowerAggrCopies::ID = 0;
61
62bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
65
66 const DataLayout &DL = F.getDataLayout();
67 LLVMContext &Context = F.getParent()->getContext();
69 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
70 AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
71
72 // Collect all aggregate loads and mem* calls.
73 for (BasicBlock &BB : F) {
74 for (Instruction &I : BB) {
75 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
76 if (!LI->hasOneUse())
77 continue;
78
79 if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
80 continue;
81
82 if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
83 if (SI->getOperand(0) != LI)
84 continue;
85 AggrLoads.push_back(LI);
86 }
87 } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
88 // Convert intrinsic calls with variable size or with constant size
89 // larger than the MaxAggrCopySize threshold.
90 if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
91 if (LenCI->getZExtValue() >= MaxAggrCopySize) {
92 MemCalls.push_back(IntrCall);
93 }
94 } else {
95 MemCalls.push_back(IntrCall);
96 }
97 }
98 }
99 }
100
101 if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
102 return false;
103 }
104
105 //
106 // Do the transformation of an aggr load/copy/set to a loop
107 //
108 for (LoadInst *LI : AggrLoads) {
109 auto *SI = cast<StoreInst>(*LI->user_begin());
110 Value *SrcAddr = LI->getOperand(0);
111 Value *DstAddr = SI->getOperand(1);
112 unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
113 ConstantInt *CopyLen =
114 ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
115
117 if (AA.isNoAlias(MemoryLocation(SrcAddr, Size),
118 MemoryLocation(DstAddr, Size))) {
119 // No overlap: emit a plain memcpy loop. Expand the loop here (rather
120 // than emitting a memcpy intrinsic and letting the code below expand it)
121 // so we can pass CanOverlap = false; expandMemCpyAsLoop would
122 // conservatively assume overlap.
123 createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
124 /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
125 /* CopyLen */ CopyLen,
126 /* SrcAlign */ LI->getAlign(),
127 /* DestAlign */ SI->getAlign(),
128 /* SrcIsVolatile */ LI->isVolatile(),
129 /* DstIsVolatile */ SI->isVolatile(),
130 /* CanOverlap */ false, TTI);
131 } else {
132 // May alias: lower as a memmove, which picks the copy direction at
133 // runtime. Emit the intrinsic here and let the loop below expand it.
134 //
135 // The pointers may alias even if they're in different address spaces
136 // (e.g. the generic addrspace may alias global). If they're in
137 // different addrspaces, cast to the generic space first, because
138 // expandMemMoveAsLoop needs to compare the pointer values to determine
139 // the copy direction.
140 IRBuilder<> Builder(SI);
141 unsigned SrcAS = LI->getPointerAddressSpace();
142 unsigned DstAS = SI->getPointerAddressSpace();
143 if (SrcAS != DstAS) {
144 PointerType *GenericPtrTy =
146 SrcAddr = Builder.CreateAddrSpaceCast(SrcAddr, GenericPtrTy);
147 DstAddr = Builder.CreateAddrSpaceCast(DstAddr, GenericPtrTy);
148 }
149 MemCalls.push_back(cast<MemMoveInst>(Builder.CreateMemMove(
150 DstAddr, SI->getAlign(), SrcAddr, LI->getAlign(), CopyLen,
151 LI->isVolatile() || SI->isVolatile())));
152 }
153
154 SI->eraseFromParent();
155 LI->eraseFromParent();
156 }
157
158 // Transform mem* intrinsic calls.
159 for (MemIntrinsic *MemCall : MemCalls) {
160 bool Expanded = true;
161 if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
162 expandMemCpyAsLoop(Memcpy, TTI);
163 } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
164 Expanded = expandMemMoveAsLoop(Memmove, TTI);
165 } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
166 expandMemSetAsLoop(Memset, TTI);
167 }
168 if (Expanded)
169 MemCall->eraseFromParent();
170 }
171
172 return true;
173}
174
175} // namespace
176
178 NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
179 "Lower aggregate copies, and llvm.mem* intrinsics into loops", false, false)
183 NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
184 "Lower aggregate copies, and llvm.mem* intrinsics into loops", false, false)
185
187 return new NVPTXLowerAggrCopies();
188}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This pass exposes codegen information to IR-level passes.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This is the shared class of boolean and integer constants.
Definition Constants.h:87
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Representation for a specific memory location.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
LLVM Value Representation.
Definition Value.h:75
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionPass * createLowerAggrCopies()
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
TargetTransformInfo TTI
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.