33 #define DEBUG_TYPE "load-combine"
35 STATISTIC(NumLoadsAnalyzed,
"Number of loads analyzed for combining");
36 STATISTIC(NumLoadsCombined,
"Number of loads combined");
38 #define LDCOMBINE_NAME "Combine Adjacent Loads"
41 struct PointerOffsetPair {
48 PointerOffsetPair POP;
63 bool doInitialization(
Function &)
override;
79 PointerOffsetPair getPointerOffsetPair(
LoadInst &);
86 bool LoadCombine::doInitialization(
Function &
F) {
92 PointerOffsetPair LoadCombine::getPointerOffsetPair(
LoadInst &LI) {
95 PointerOffsetPair POP;
98 POP.Offset =
APInt(BitWidth, 0);
100 while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
101 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
102 APInt LastOffset = POP.Offset;
103 if (!
GEP->accumulateConstantOffset(DL, POP.Offset)) {
105 POP.Offset = LastOffset;
108 POP.Pointer =
GEP->getPointerOperand();
109 }
else if (
auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
110 POP.Pointer = BC->getOperand(0);
116 bool LoadCombine::combineLoads(
118 bool Combined =
false;
119 for (
auto &Loads : LoadMap) {
120 if (Loads.second.size() < 2)
122 std::sort(Loads.second.begin(), Loads.second.end(),
123 [](
const LoadPOPPair &
A,
const LoadPOPPair &
B) {
124 return A.POP.Offset.slt(
B.POP.Offset);
126 if (aggregateLoads(Loads.second))
137 assert(Loads.
size() >= 2 &&
"Insufficient loads!");
140 bool Combined =
false;
141 bool ValidPrevOffset =
false;
143 uint64_t PrevSize = 0;
144 for (
auto &
L : Loads) {
145 if (ValidPrevOffset ==
false) {
147 PrevOffset =
L.POP.Offset;
148 PrevSize =
L.Load->getModule()->getDataLayout().getTypeStoreSize(
151 ValidPrevOffset =
true;
154 if (
L.Load->getAlignment() > BaseLoad->getAlignment())
156 APInt PrevEnd = PrevOffset + PrevSize;
157 if (
L.POP.Offset.sgt(PrevEnd)) {
159 if (combineLoads(AggregateLoads))
161 AggregateLoads.
clear();
162 ValidPrevOffset =
false;
165 if (
L.POP.Offset != PrevEnd)
169 PrevOffset =
L.POP.Offset;
170 PrevSize =
L.Load->getModule()->getDataLayout().getTypeStoreSize(
174 if (combineLoads(AggregateLoads))
182 unsigned TotalSize = 0;
183 for (
const auto &
L : Loads)
184 TotalSize +=
L.Load->getType()->getPrimitiveSizeInBits();
186 TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
187 if (Loads.size() < 2)
191 dbgs() <<
"***** Combining Loads ******\n";
192 for (
const auto &
L : Loads) {
193 dbgs() <<
L.POP.Offset <<
": " << *
L.Load <<
"\n";
199 FirstLP.InsertOrder = -1u;
200 for (
const auto &
L : Loads)
201 if (
L.InsertOrder < FirstLP.InsertOrder)
205 FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
207 Builder->SetInsertPoint(FirstLP.Load);
208 Value *
Ptr = Builder->CreateConstGEP1_64(
209 Builder->CreatePointerCast(Loads[0].POP.Pointer,
210 Builder->getInt8PtrTy(AddressSpace)),
211 Loads[0].POP.Offset.getSExtValue());
213 Builder->CreatePointerCast(
216 Twine(Loads[0].
Load->getName()) +
".combined",
false,
217 Loads[0].
Load->getAlignment(), FirstLP.Load);
219 for (
const auto &
L : Loads) {
220 Builder->SetInsertPoint(
L.Load);
221 Value *V = Builder->CreateExtractInteger(
222 L.Load->getModule()->getDataLayout(), NewLoad,
223 cast<IntegerType>(
L.Load->getType()),
224 (
L.POP.Offset - Loads[0].POP.Offset).getZExtValue(),
"combine.extract");
225 L.Load->replaceAllUsesWith(V);
228 NumLoadsCombined = NumLoadsCombined + Loads.size();
233 if (skipBasicBlock(BB))
236 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
240 Builder = &TheBuilder;
245 bool Combined =
false;
248 if (
I.mayThrow() || (
I.mayWriteToMemory() && AST.containsUnknown(&
I))) {
249 if (combineLoads(LoadMap))
261 auto POP = getPointerOffsetPair(*LI);
264 LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
267 if (combineLoads(LoadMap))
275 return new LoadCombine();
Legacy wrapper pass to provide the GlobalsAAResult object.
void push_back(const T &Elt)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
STATISTIC(NumFunctions,"Total number of functions")
This is the interface for a simple mod/ref and alias analysis over globals.
static bool runOnBasicBlock(BasicBlock &BB)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
An instruction for reading from memory.
StringRef getName() const
Return a constant reference to the value's name.
AnalysisUsage & addRequired()
void initializeLoadCombinePass(PassRegistry &)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Function Alias Analysis false
TargetFolder - Create constants with target dependent folding.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
LLVM Basic Block Representation.
This is an important class for using LLVM in a threaded context.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
Value * getPointerOperand()
LLVMContext & getContext() const
All values hold a context through their type.
BasicBlockPass class - This class is used to implement most local optimizations.
IRBuilder< TargetFolder > BuilderTy
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
BasicBlockPass * createLoadCombinePass()
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Class for arbitrary precision integers.
bool isIntegerTy() const
True if this is an instance of IntegerType.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVMContext & getContext() const
Get the context in which this basic block lives.
LLVM Value Representation.
StringRef - Represent a constant reference to a string, i.e.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")