LLVM  3.7.0
LoadCombine.cpp
Go to the documentation of this file.
1 //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This transformation combines adjacent loads.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/Debug.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "load-combine"
33 
34 STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
35 STATISTIC(NumLoadsCombined, "Number of loads combined");
36 
37 namespace {
38 struct PointerOffsetPair {
39  Value *Pointer;
40  uint64_t Offset;
41 };
42 
43 struct LoadPOPPair {
44  LoadPOPPair() = default;
45  LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
46  : Load(L), POP(P), InsertOrder(O) {}
47  LoadInst *Load;
48  PointerOffsetPair POP;
49  /// \brief The new load needs to be created before the first load in IR order.
50  unsigned InsertOrder;
51 };
52 
53 class LoadCombine : public BasicBlockPass {
54  LLVMContext *C;
55  AliasAnalysis *AA;
56 
57 public:
58  LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
60  }
61 
63  bool doInitialization(Function &) override;
64  bool runOnBasicBlock(BasicBlock &BB) override;
65  void getAnalysisUsage(AnalysisUsage &AU) const override;
66 
67  const char *getPassName() const override { return "LoadCombine"; }
68  static char ID;
69 
71 
72 private:
73  BuilderTy *Builder;
74 
75  PointerOffsetPair getPointerOffsetPair(LoadInst &);
76  bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
77  bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
78  bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
79 };
80 }
81 
82 bool LoadCombine::doInitialization(Function &F) {
83  DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
84  C = &F.getContext();
85  return true;
86 }
87 
88 PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
89  PointerOffsetPair POP;
90  POP.Pointer = LI.getPointerOperand();
91  POP.Offset = 0;
92  while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
93  if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
94  auto &DL = LI.getModule()->getDataLayout();
95  unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
96  APInt Offset(BitWidth, 0);
97  if (GEP->accumulateConstantOffset(DL, Offset))
98  POP.Offset += Offset.getZExtValue();
99  else
100  // Can't handle GEPs with variable indices.
101  return POP;
102  POP.Pointer = GEP->getPointerOperand();
103  } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
104  POP.Pointer = BC->getOperand(0);
105  }
106  return POP;
107 }
108 
109 bool LoadCombine::combineLoads(
110  DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
111  bool Combined = false;
112  for (auto &Loads : LoadMap) {
113  if (Loads.second.size() < 2)
114  continue;
115  std::sort(Loads.second.begin(), Loads.second.end(),
116  [](const LoadPOPPair &A, const LoadPOPPair &B) {
117  return A.POP.Offset < B.POP.Offset;
118  });
119  if (aggregateLoads(Loads.second))
120  Combined = true;
121  }
122  return Combined;
123 }
124 
125 /// \brief Try to aggregate loads from a sorted list of loads to be combined.
126 ///
127 /// It is guaranteed that no writes occur between any of the loads. All loads
128 /// have the same base pointer. There are at least two loads.
129 bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
130  assert(Loads.size() >= 2 && "Insufficient loads!");
131  LoadInst *BaseLoad = nullptr;
132  SmallVector<LoadPOPPair, 8> AggregateLoads;
133  bool Combined = false;
134  uint64_t PrevOffset = -1ull;
135  uint64_t PrevSize = 0;
136  for (auto &L : Loads) {
137  if (PrevOffset == -1ull) {
138  BaseLoad = L.Load;
139  PrevOffset = L.POP.Offset;
140  PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
141  L.Load->getType());
142  AggregateLoads.push_back(L);
143  continue;
144  }
145  if (L.Load->getAlignment() > BaseLoad->getAlignment())
146  continue;
147  if (L.POP.Offset > PrevOffset + PrevSize) {
148  // No other load will be combinable
149  if (combineLoads(AggregateLoads))
150  Combined = true;
151  AggregateLoads.clear();
152  PrevOffset = -1;
153  continue;
154  }
155  if (L.POP.Offset != PrevOffset + PrevSize)
156  // This load is offset less than the size of the last load.
157  // FIXME: We may want to handle this case.
158  continue;
159  PrevOffset = L.POP.Offset;
160  PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
161  L.Load->getType());
162  AggregateLoads.push_back(L);
163  }
164  if (combineLoads(AggregateLoads))
165  Combined = true;
166  return Combined;
167 }
168 
169 /// \brief Given a list of combinable load. Combine the maximum number of them.
170 bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
171  // Remove loads from the end while the size is not a power of 2.
172  unsigned TotalSize = 0;
173  for (const auto &L : Loads)
174  TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
175  while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
176  TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
177  if (Loads.size() < 2)
178  return false;
179 
180  DEBUG({
181  dbgs() << "***** Combining Loads ******\n";
182  for (const auto &L : Loads) {
183  dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
184  }
185  });
186 
187  // Find first load. This is where we put the new load.
188  LoadPOPPair FirstLP;
189  FirstLP.InsertOrder = -1u;
190  for (const auto &L : Loads)
191  if (L.InsertOrder < FirstLP.InsertOrder)
192  FirstLP = L;
193 
194  unsigned AddressSpace =
195  FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
196 
197  Builder->SetInsertPoint(FirstLP.Load);
198  Value *Ptr = Builder->CreateConstGEP1_64(
199  Builder->CreatePointerCast(Loads[0].POP.Pointer,
200  Builder->getInt8PtrTy(AddressSpace)),
201  Loads[0].POP.Offset);
202  LoadInst *NewLoad = new LoadInst(
203  Builder->CreatePointerCast(
204  Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
205  Ptr->getType()->getPointerAddressSpace())),
206  Twine(Loads[0].Load->getName()) + ".combined", false,
207  Loads[0].Load->getAlignment(), FirstLP.Load);
208 
209  for (const auto &L : Loads) {
210  Builder->SetInsertPoint(L.Load);
211  Value *V = Builder->CreateExtractInteger(
212  L.Load->getModule()->getDataLayout(), NewLoad,
213  cast<IntegerType>(L.Load->getType()),
214  L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
215  L.Load->replaceAllUsesWith(V);
216  }
217 
218  NumLoadsCombined = NumLoadsCombined + Loads.size();
219  return true;
220 }
221 
222 bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
223  if (skipOptnoneFunction(BB))
224  return false;
225 
226  AA = &getAnalysis<AliasAnalysis>();
227 
230  Builder = &TheBuilder;
231 
233  AliasSetTracker AST(*AA);
234 
235  bool Combined = false;
236  unsigned Index = 0;
237  for (auto &I : BB) {
238  if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
239  if (combineLoads(LoadMap))
240  Combined = true;
241  LoadMap.clear();
242  AST.clear();
243  continue;
244  }
245  LoadInst *LI = dyn_cast<LoadInst>(&I);
246  if (!LI)
247  continue;
248  ++NumLoadsAnalyzed;
249  if (!LI->isSimple() || !LI->getType()->isIntegerTy())
250  continue;
251  auto POP = getPointerOffsetPair(*LI);
252  if (!POP.Pointer)
253  continue;
254  LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
255  AST.add(LI);
256  }
257  if (combineLoads(LoadMap))
258  Combined = true;
259  return Combined;
260 }
261 
262 void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
263  AU.setPreservesCFG();
264 
267 }
268 
269 char LoadCombine::ID = 0;
270 
272  return new LoadCombine();
273 }
274 
275 INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
276  false, false)
278 INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
279  false, false)
280 
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
load Combine Adjacent false
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
STATISTIC(NumFunctions,"Total number of functions")
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
Hexagon Common GEP
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
bool isSimple() const
Definition: Instructions.h:279
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
AnalysisUsage & addRequired()
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
load Combine Adjacent Loads
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:32
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:111
#define P(N)
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
Represent the analysis usage information of a pass.
Value * getPointerOperand()
Definition: Instructions.h:284
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
INITIALIZE_PASS_BEGIN(LoadCombine,"load-combine","Combine Adjacent Loads", false, false) INITIALIZE_PASS_END(LoadCombine
BasicBlockPass class - This class is used to implement most local optimizations.
Definition: Pass.h:330
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:304
BasicBlockPass * createLoadCombinePass()
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
AddressSpace
Definition: NVPTXBaseInfo.h:22
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
void initializeSROAPass(PassRegistry &)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
load combine
#define I(x, y, z)
Definition: MD5.cpp:54
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
LLVM Value Representation.
Definition: Value.h:69
#define DEBUG(X)
Definition: Debug.h:92
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:354
IRBuilder< true, TargetFolder > BuilderTy