LLVM  4.0.0
LoadCombine.cpp
Go to the documentation of this file.
1 //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This transformation combines adjacent loads.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/Statistic.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/Debug.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "load-combine"
34 
35 STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
36 STATISTIC(NumLoadsCombined, "Number of loads combined");
37 
38 #define LDCOMBINE_NAME "Combine Adjacent Loads"
39 
40 namespace {
41 struct PointerOffsetPair {
42  Value *Pointer;
43  APInt Offset;
44 };
45 
46 struct LoadPOPPair {
47  LoadInst *Load;
48  PointerOffsetPair POP;
49  /// \brief The new load needs to be created before the first load in IR order.
50  unsigned InsertOrder;
51 };
52 
53 class LoadCombine : public BasicBlockPass {
54  LLVMContext *C;
55  AliasAnalysis *AA;
56 
57 public:
58  LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
60  }
61 
63  bool doInitialization(Function &) override;
64  bool runOnBasicBlock(BasicBlock &BB) override;
65  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  AU.setPreservesCFG();
69  }
70 
71  StringRef getPassName() const override { return LDCOMBINE_NAME; }
72  static char ID;
73 
75 
76 private:
77  BuilderTy *Builder;
78 
79  PointerOffsetPair getPointerOffsetPair(LoadInst &);
80  bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
81  bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
82  bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
83 };
84 }
85 
86 bool LoadCombine::doInitialization(Function &F) {
87  DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
88  C = &F.getContext();
89  return true;
90 }
91 
92 PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
93  auto &DL = LI.getModule()->getDataLayout();
94 
95  PointerOffsetPair POP;
96  POP.Pointer = LI.getPointerOperand();
97  unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
98  POP.Offset = APInt(BitWidth, 0);
99 
100  while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
101  if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
102  APInt LastOffset = POP.Offset;
103  if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
104  // Can't handle GEPs with variable indices.
105  POP.Offset = LastOffset;
106  return POP;
107  }
108  POP.Pointer = GEP->getPointerOperand();
109  } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
110  POP.Pointer = BC->getOperand(0);
111  }
112  }
113  return POP;
114 }
115 
116 bool LoadCombine::combineLoads(
117  DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
118  bool Combined = false;
119  for (auto &Loads : LoadMap) {
120  if (Loads.second.size() < 2)
121  continue;
122  std::sort(Loads.second.begin(), Loads.second.end(),
123  [](const LoadPOPPair &A, const LoadPOPPair &B) {
124  return A.POP.Offset.slt(B.POP.Offset);
125  });
126  if (aggregateLoads(Loads.second))
127  Combined = true;
128  }
129  return Combined;
130 }
131 
132 /// \brief Try to aggregate loads from a sorted list of loads to be combined.
133 ///
134 /// It is guaranteed that no writes occur between any of the loads. All loads
135 /// have the same base pointer. There are at least two loads.
136 bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
137  assert(Loads.size() >= 2 && "Insufficient loads!");
138  LoadInst *BaseLoad = nullptr;
139  SmallVector<LoadPOPPair, 8> AggregateLoads;
140  bool Combined = false;
141  bool ValidPrevOffset = false;
142  APInt PrevOffset;
143  uint64_t PrevSize = 0;
144  for (auto &L : Loads) {
145  if (ValidPrevOffset == false) {
146  BaseLoad = L.Load;
147  PrevOffset = L.POP.Offset;
148  PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
149  L.Load->getType());
150  AggregateLoads.push_back(L);
151  ValidPrevOffset = true;
152  continue;
153  }
154  if (L.Load->getAlignment() > BaseLoad->getAlignment())
155  continue;
156  APInt PrevEnd = PrevOffset + PrevSize;
157  if (L.POP.Offset.sgt(PrevEnd)) {
158  // No other load will be combinable
159  if (combineLoads(AggregateLoads))
160  Combined = true;
161  AggregateLoads.clear();
162  ValidPrevOffset = false;
163  continue;
164  }
165  if (L.POP.Offset != PrevEnd)
166  // This load is offset less than the size of the last load.
167  // FIXME: We may want to handle this case.
168  continue;
169  PrevOffset = L.POP.Offset;
170  PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
171  L.Load->getType());
172  AggregateLoads.push_back(L);
173  }
174  if (combineLoads(AggregateLoads))
175  Combined = true;
176  return Combined;
177 }
178 
179 /// \brief Given a list of combinable load. Combine the maximum number of them.
180 bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
181  // Remove loads from the end while the size is not a power of 2.
182  unsigned TotalSize = 0;
183  for (const auto &L : Loads)
184  TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
185  while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
186  TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
187  if (Loads.size() < 2)
188  return false;
189 
190  DEBUG({
191  dbgs() << "***** Combining Loads ******\n";
192  for (const auto &L : Loads) {
193  dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
194  }
195  });
196 
197  // Find first load. This is where we put the new load.
198  LoadPOPPair FirstLP;
199  FirstLP.InsertOrder = -1u;
200  for (const auto &L : Loads)
201  if (L.InsertOrder < FirstLP.InsertOrder)
202  FirstLP = L;
203 
204  unsigned AddressSpace =
205  FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
206 
207  Builder->SetInsertPoint(FirstLP.Load);
208  Value *Ptr = Builder->CreateConstGEP1_64(
209  Builder->CreatePointerCast(Loads[0].POP.Pointer,
210  Builder->getInt8PtrTy(AddressSpace)),
211  Loads[0].POP.Offset.getSExtValue());
212  LoadInst *NewLoad = new LoadInst(
213  Builder->CreatePointerCast(
214  Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
215  Ptr->getType()->getPointerAddressSpace())),
216  Twine(Loads[0].Load->getName()) + ".combined", false,
217  Loads[0].Load->getAlignment(), FirstLP.Load);
218 
219  for (const auto &L : Loads) {
220  Builder->SetInsertPoint(L.Load);
221  Value *V = Builder->CreateExtractInteger(
222  L.Load->getModule()->getDataLayout(), NewLoad,
223  cast<IntegerType>(L.Load->getType()),
224  (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract");
225  L.Load->replaceAllUsesWith(V);
226  }
227 
228  NumLoadsCombined = NumLoadsCombined + Loads.size();
229  return true;
230 }
231 
233  if (skipBasicBlock(BB))
234  return false;
235 
236  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
237 
238  IRBuilder<TargetFolder> TheBuilder(
240  Builder = &TheBuilder;
241 
243  AliasSetTracker AST(*AA);
244 
245  bool Combined = false;
246  unsigned Index = 0;
247  for (auto &I : BB) {
248  if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
249  if (combineLoads(LoadMap))
250  Combined = true;
251  LoadMap.clear();
252  AST.clear();
253  continue;
254  }
255  LoadInst *LI = dyn_cast<LoadInst>(&I);
256  if (!LI)
257  continue;
258  ++NumLoadsAnalyzed;
259  if (!LI->isSimple() || !LI->getType()->isIntegerTy())
260  continue;
261  auto POP = getPointerOffsetPair(*LI);
262  if (!POP.Pointer)
263  continue;
264  LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
265  AST.add(LI);
266  }
267  if (combineLoads(LoadMap))
268  Combined = true;
269  return Combined;
270 }
271 
272 char LoadCombine::ID = 0;
273 
275  return new LoadCombine();
276 }
277 
278 INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
280 INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
Legacy wrapper pass to provide the GlobalsAAResult object.
MachineLoop * L
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
STATISTIC(NumFunctions,"Total number of functions")
This is the interface for a simple mod/ref and alias analysis over globals.
static bool runOnBasicBlock(BasicBlock &BB)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:655
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isSimple() const
Definition: Instructions.h:263
#define LDCOMBINE_NAME
Definition: LoadCombine.cpp:38
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
AnalysisUsage & addRequired()
void initializeLoadCombinePass(PassRegistry &)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
#define F(x, y, z)
Definition: MD5.cpp:51
Function Alias Analysis false
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:32
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:110
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Represent the analysis usage information of a pass.
uint32_t Offset
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
Value * getPointerOperand()
Definition: Instructions.h:270
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
BasicBlockPass class - This class is used to implement most local optimizations.
Definition: Pass.h:335
IRBuilder< TargetFolder > BuilderTy
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
BasicBlockPass * createLoadCombinePass()
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
AddressSpace
Definition: NVPTXBaseInfo.h:22
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:50
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
LLVM Value Representation.
Definition: Value.h:71
#define DEBUG(X)
Definition: Debug.h:100
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")