LLVM  3.7.0
NVPTXLowerAggrCopies.cpp
Go to the documentation of this file.
1 //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
10 // the size is large or is not a compile-time constant.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXLowerAggrCopies.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Support/Debug.h"
28 
29 #define DEBUG_TYPE "nvptx"
30 
31 using namespace llvm;
32 
33 namespace {
34 // actual analysis class, which is a functionpass
35 struct NVPTXLowerAggrCopies : public FunctionPass {
36  static char ID;
37 
38  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
39 
40  void getAnalysisUsage(AnalysisUsage &AU) const override {
43  }
44 
45  bool runOnFunction(Function &F) override;
46 
47  static const unsigned MaxAggrCopySize = 128;
48 
49  const char *getPassName() const override {
50  return "Lower aggregate copies/intrinsics into loops";
51  }
52 };
53 } // namespace
54 
56 
57 // Lower MemTransferInst or load-store pair to loop
59  Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
60  bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
61  Type *indType = len->getType();
62 
63  BasicBlock *origBB = splitAt->getParent();
64  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
65  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
66 
67  origBB->getTerminator()->setSuccessor(0, loopBB);
68  IRBuilder<> builder(origBB, origBB->getTerminator());
69 
70  // srcAddr and dstAddr are expected to be pointer types,
71  // so no check is made here.
72  unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
73  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
74 
75  // Cast pointers to (char *)
76  srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
77  dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
78 
79  IRBuilder<> loop(loopBB);
80  // The loop index (ind) is a phi node.
81  PHINode *ind = loop.CreatePHI(indType, 0);
82  // Incoming value for ind is 0
83  ind->addIncoming(ConstantInt::get(indType, 0), origBB);
84 
85  // load from srcAddr+ind
86  // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
87  // word-sized loads and stores.
88  Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
89  srcVolatile);
90  // store at dstAddr+ind
91  loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
92  dstVolatile);
93 
94  // The value for ind coming from backedge is (ind + 1)
95  Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
96  ind->addIncoming(newind, loopBB);
97 
98  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
99 }
100 
101 // Lower MemSetInst to loop
102 static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
103  Value *len, Value *val, LLVMContext &Context,
104  Function &F) {
105  BasicBlock *origBB = splitAt->getParent();
106  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
107  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
108 
109  origBB->getTerminator()->setSuccessor(0, loopBB);
110  IRBuilder<> builder(origBB, origBB->getTerminator());
111 
112  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
113 
114  // Cast pointer to the type of value getting stored
115  dstAddr =
116  builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
117 
118  IRBuilder<> loop(loopBB);
119  PHINode *ind = loop.CreatePHI(len->getType(), 0);
120  ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
121 
122  loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
123 
124  Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
125  ind->addIncoming(newind, loopBB);
126 
127  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
128 }
129 
130 bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
131  SmallVector<LoadInst *, 4> aggrLoads;
133  SmallVector<MemSetInst *, 4> aggrMemsets;
134 
135  const DataLayout &DL = F.getParent()->getDataLayout();
136  LLVMContext &Context = F.getParent()->getContext();
137 
138  //
139  // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
140  //
141  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
142  for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
143  ++II) {
144  if (LoadInst *load = dyn_cast<LoadInst>(II)) {
145  if (!load->hasOneUse())
146  continue;
147 
148  if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
149  continue;
150 
151  User *use = load->user_back();
152  if (StoreInst *store = dyn_cast<StoreInst>(use)) {
153  if (store->getOperand(0) != load)
154  continue;
155  aggrLoads.push_back(load);
156  }
157  } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
158  Value *len = intr->getLength();
159  // If the number of elements being copied is greater
160  // than MaxAggrCopySize, lower it to a loop
161  if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
162  if (len_int->getZExtValue() >= MaxAggrCopySize) {
163  aggrMemcpys.push_back(intr);
164  }
165  } else {
166  // turn variable length memcpy/memmov into loop
167  aggrMemcpys.push_back(intr);
168  }
169  } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
170  Value *len = memsetintr->getLength();
171  if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
172  if (len_int->getZExtValue() >= MaxAggrCopySize) {
173  aggrMemsets.push_back(memsetintr);
174  }
175  } else {
176  // turn variable length memset into loop
177  aggrMemsets.push_back(memsetintr);
178  }
179  }
180  }
181  }
182  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
183  (aggrMemsets.size() == 0))
184  return false;
185 
186  //
187  // Do the transformation of an aggr load/copy/set to a loop
188  //
189  for (LoadInst *load : aggrLoads) {
190  StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
191  Value *srcAddr = load->getOperand(0);
192  Value *dstAddr = store->getOperand(1);
193  unsigned numLoads = DL.getTypeStoreSize(load->getType());
194  Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
195 
196  convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
197  store->isVolatile(), Context, F);
198 
199  store->eraseFromParent();
200  load->eraseFromParent();
201  }
202 
203  for (MemTransferInst *cpy : aggrMemcpys) {
204  convertTransferToLoop(/* splitAt */ cpy,
205  /* srcAddr */ cpy->getSource(),
206  /* dstAddr */ cpy->getDest(),
207  /* len */ cpy->getLength(),
208  /* srcVolatile */ cpy->isVolatile(),
209  /* dstVolatile */ cpy->isVolatile(),
210  /* Context */ Context,
211  /* Function F */ F);
212  cpy->eraseFromParent();
213  }
214 
215  for (MemSetInst *memsetinst : aggrMemsets) {
216  Value *len = memsetinst->getLength();
217  Value *val = memsetinst->getValue();
218  convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
219  F);
220  memsetinst->eraseFromParent();
221  }
222 
223  return true;
224 }
225 
227  return new NVPTXLowerAggrCopies();
228 }
Value * CreateGEP(Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1032
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Definition: IRBuilder.h:973
void addIncoming(Value *V, BasicBlock *BB)
addIncoming - Add an incoming value to the end of the PHI list
bool isVolatile() const
isVolatile - Return true if this is a store to a volatile memory location.
Definition: Instructions.h:351
iterator end()
Definition: Function.h:459
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1366
MemSetInst - This class wraps the llvm.memset intrinsic.
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:622
MachineFunctionAnalysis - This class is a Pass that manages a MachineFunction object.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1462
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
FunctionPass * createLowerAggrCopies()
void setSuccessor(unsigned idx, BasicBlock *B)
Update the specified successor to point at the provided block.
Definition: InstrTypes.h:67
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
iterator begin()
Definition: Function.h:457
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:985
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:704
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:103
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, Value *len, Value *val, LLVMContext &Context, Function &F)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:296
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:348
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:371
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
const BasicBlock * getParent() const
Definition: Instruction.h:72
User * user_back()
Definition: Value.h:298
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:265