File: | lib/CodeGen/ExpandMemCmp.cpp |
Warning: | line 403, column 60 Called C++ object pointer is uninitialized |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass tries to expand memcmp() calls into optimally-sized loads and | |||
10 | // compares for the target. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "llvm/ADT/Statistic.h" | |||
15 | #include "llvm/Analysis/ConstantFolding.h" | |||
16 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
17 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
18 | #include "llvm/Analysis/ValueTracking.h" | |||
19 | #include "llvm/CodeGen/TargetLowering.h" | |||
20 | #include "llvm/CodeGen/TargetPassConfig.h" | |||
21 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
22 | #include "llvm/IR/IRBuilder.h" | |||
23 | ||||
24 | using namespace llvm; | |||
25 | ||||
26 | #define DEBUG_TYPE"expandmemcmp" "expandmemcmp" | |||
27 | ||||
28 | STATISTIC(NumMemCmpCalls, "Number of memcmp calls")static llvm::Statistic NumMemCmpCalls = {"expandmemcmp", "NumMemCmpCalls" , "Number of memcmp calls", {0}, {false}}; | |||
29 | STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size")static llvm::Statistic NumMemCmpNotConstant = {"expandmemcmp" , "NumMemCmpNotConstant", "Number of memcmp calls without constant size" , {0}, {false}}; | |||
30 | STATISTIC(NumMemCmpGreaterThanMax,static llvm::Statistic NumMemCmpGreaterThanMax = {"expandmemcmp" , "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size" , {0}, {false}} | |||
31 | "Number of memcmp calls with size greater than max size")static llvm::Statistic NumMemCmpGreaterThanMax = {"expandmemcmp" , "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size" , {0}, {false}}; | |||
32 | STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls")static llvm::Statistic NumMemCmpInlined = {"expandmemcmp", "NumMemCmpInlined" , "Number of inlined memcmp calls", {0}, {false}}; | |||
33 | ||||
34 | static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock( | |||
35 | "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), | |||
36 | cl::desc("The number of loads per basic block for inline expansion of " | |||
37 | "memcmp that is only being compared against zero.")); | |||
38 | ||||
39 | static cl::opt<unsigned> MaxLoadsPerMemcmp( | |||
40 | "max-loads-per-memcmp", cl::Hidden, | |||
41 | cl::desc("Set maximum number of loads used in expanded memcmp")); | |||
42 | ||||
43 | static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize( | |||
44 | "max-loads-per-memcmp-opt-size", cl::Hidden, | |||
45 | cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz")); | |||
46 | ||||
47 | namespace { | |||
48 | ||||
49 | ||||
50 | // This class provides helper functions to expand a memcmp library call into an | |||
51 | // inline expansion. | |||
52 | class MemCmpExpansion { | |||
53 | struct ResultBlock { | |||
54 | BasicBlock *BB = nullptr; | |||
55 | PHINode *PhiSrc1 = nullptr; | |||
56 | PHINode *PhiSrc2 = nullptr; | |||
57 | ||||
58 | ResultBlock() = default; | |||
59 | }; | |||
60 | ||||
61 | CallInst *const CI; | |||
62 | ResultBlock ResBlock; | |||
63 | const uint64_t Size; | |||
64 | unsigned MaxLoadSize; | |||
65 | uint64_t NumLoadsNonOneByte; | |||
66 | const uint64_t NumLoadsPerBlockForZeroCmp; | |||
67 | std::vector<BasicBlock *> LoadCmpBlocks; | |||
68 | BasicBlock *EndBlock; | |||
69 | PHINode *PhiRes; | |||
70 | const bool IsUsedForZeroCmp; | |||
71 | const DataLayout &DL; | |||
72 | IRBuilder<> Builder; | |||
73 | // Represents the decomposition in blocks of the expansion. For example, | |||
74 | // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and | |||
75 | // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. | |||
76 | struct LoadEntry { | |||
77 | LoadEntry(unsigned LoadSize, uint64_t Offset) | |||
78 | : LoadSize(LoadSize), Offset(Offset) { | |||
79 | } | |||
80 | ||||
81 | // The size of the load for this block, in bytes. | |||
82 | unsigned LoadSize; | |||
83 | // The offset of this load from the base pointer, in bytes. | |||
84 | uint64_t Offset; | |||
85 | }; | |||
86 | using LoadEntryVector = SmallVector<LoadEntry, 8>; | |||
87 | LoadEntryVector LoadSequence; | |||
88 | ||||
89 | void createLoadCmpBlocks(); | |||
90 | void createResultBlock(); | |||
91 | void setupResultBlockPHINodes(); | |||
92 | void setupEndBlockPHINodes(); | |||
93 | Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); | |||
94 | void emitLoadCompareBlock(unsigned BlockIndex); | |||
95 | void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, | |||
96 | unsigned &LoadIndex); | |||
97 | void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes); | |||
98 | void emitMemCmpResultBlock(); | |||
99 | Value *getMemCmpExpansionZeroCase(); | |||
100 | Value *getMemCmpEqZeroOneBlock(); | |||
101 | Value *getMemCmpOneBlock(); | |||
102 | Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, | |||
103 | uint64_t OffsetBytes); | |||
104 | ||||
105 | static LoadEntryVector | |||
106 | computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, | |||
107 | unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); | |||
108 | static LoadEntryVector | |||
109 | computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize, | |||
110 | unsigned MaxNumLoads, | |||
111 | unsigned &NumLoadsNonOneByte); | |||
112 | ||||
113 | public: | |||
114 | MemCmpExpansion(CallInst *CI, uint64_t Size, | |||
115 | const TargetTransformInfo::MemCmpExpansionOptions &Options, | |||
116 | unsigned MaxNumLoads, const bool IsUsedForZeroCmp, | |||
117 | unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout); | |||
118 | ||||
119 | unsigned getNumBlocks(); | |||
120 | uint64_t getNumLoads() const { return LoadSequence.size(); } | |||
121 | ||||
122 | Value *getMemCmpExpansion(); | |||
123 | }; | |||
124 | ||||
125 | MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence( | |||
126 | uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, | |||
127 | const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) { | |||
128 | NumLoadsNonOneByte = 0; | |||
129 | LoadEntryVector LoadSequence; | |||
130 | uint64_t Offset = 0; | |||
131 | while (Size && !LoadSizes.empty()) { | |||
132 | const unsigned LoadSize = LoadSizes.front(); | |||
133 | const uint64_t NumLoadsForThisSize = Size / LoadSize; | |||
134 | if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { | |||
135 | // Do not expand if the total number of loads is larger than what the | |||
136 | // target allows. Note that it's important that we exit before completing | |||
137 | // the expansion to avoid using a ton of memory to store the expansion for | |||
138 | // large sizes. | |||
139 | return {}; | |||
140 | } | |||
141 | if (NumLoadsForThisSize > 0) { | |||
142 | for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { | |||
143 | LoadSequence.push_back({LoadSize, Offset}); | |||
144 | Offset += LoadSize; | |||
145 | } | |||
146 | if (LoadSize > 1) | |||
147 | ++NumLoadsNonOneByte; | |||
148 | Size = Size % LoadSize; | |||
149 | } | |||
150 | LoadSizes = LoadSizes.drop_front(); | |||
151 | } | |||
152 | return LoadSequence; | |||
153 | } | |||
154 | ||||
155 | MemCmpExpansion::LoadEntryVector | |||
156 | MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, | |||
157 | const unsigned MaxLoadSize, | |||
158 | const unsigned MaxNumLoads, | |||
159 | unsigned &NumLoadsNonOneByte) { | |||
160 | // These are already handled by the greedy approach. | |||
161 | if (Size < 2 || MaxLoadSize < 2) | |||
162 | return {}; | |||
163 | ||||
164 | // We try to do as many non-overlapping loads as possible starting from the | |||
165 | // beginning. | |||
166 | const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize; | |||
167 | assert(NumNonOverlappingLoads && "there must be at least one load")((NumNonOverlappingLoads && "there must be at least one load" ) ? static_cast<void> (0) : __assert_fail ("NumNonOverlappingLoads && \"there must be at least one load\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 167, __PRETTY_FUNCTION__)); | |||
168 | // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with | |||
169 | // an overlapping load. | |||
170 | Size = Size - NumNonOverlappingLoads * MaxLoadSize; | |||
171 | // Bail if we do not need an overloapping store, this is already handled by | |||
172 | // the greedy approach. | |||
173 | if (Size == 0) | |||
174 | return {}; | |||
175 | // Bail if the number of loads (non-overlapping + potential overlapping one) | |||
176 | // is larger than the max allowed. | |||
177 | if ((NumNonOverlappingLoads + 1) > MaxNumLoads) | |||
178 | return {}; | |||
179 | ||||
180 | // Add non-overlapping loads. | |||
181 | LoadEntryVector LoadSequence; | |||
182 | uint64_t Offset = 0; | |||
183 | for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) { | |||
184 | LoadSequence.push_back({MaxLoadSize, Offset}); | |||
185 | Offset += MaxLoadSize; | |||
186 | } | |||
187 | ||||
188 | // Add the last overlapping load. | |||
189 | assert(Size > 0 && Size < MaxLoadSize && "broken invariant")((Size > 0 && Size < MaxLoadSize && "broken invariant" ) ? static_cast<void> (0) : __assert_fail ("Size > 0 && Size < MaxLoadSize && \"broken invariant\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 189, __PRETTY_FUNCTION__)); | |||
190 | LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)}); | |||
191 | NumLoadsNonOneByte = 1; | |||
192 | return LoadSequence; | |||
193 | } | |||
194 | ||||
195 | // Initialize the basic block structure required for expansion of memcmp call | |||
196 | // with given maximum load size and memcmp size parameter. | |||
197 | // This structure includes: | |||
198 | // 1. A list of load compare blocks - LoadCmpBlocks. | |||
199 | // 2. An EndBlock, split from original instruction point, which is the block to | |||
200 | // return from. | |||
201 | // 3. ResultBlock, block to branch to for early exit when a | |||
202 | // LoadCmpBlock finds a difference. | |||
203 | MemCmpExpansion::MemCmpExpansion( | |||
204 | CallInst *const CI, uint64_t Size, | |||
205 | const TargetTransformInfo::MemCmpExpansionOptions &Options, | |||
206 | const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, | |||
207 | const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout) | |||
208 | : CI(CI), | |||
209 | Size(Size), | |||
210 | MaxLoadSize(0), | |||
211 | NumLoadsNonOneByte(0), | |||
212 | NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp), | |||
213 | IsUsedForZeroCmp(IsUsedForZeroCmp), | |||
214 | DL(TheDataLayout), | |||
215 | Builder(CI) { | |||
216 | assert(Size > 0 && "zero blocks")((Size > 0 && "zero blocks") ? static_cast<void > (0) : __assert_fail ("Size > 0 && \"zero blocks\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 216, __PRETTY_FUNCTION__)); | |||
217 | // Scale the max size down if the target can load more bytes than we need. | |||
218 | llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes); | |||
219 | while (!LoadSizes.empty() && LoadSizes.front() > Size) { | |||
220 | LoadSizes = LoadSizes.drop_front(); | |||
221 | } | |||
222 | assert(!LoadSizes.empty() && "cannot load Size bytes")((!LoadSizes.empty() && "cannot load Size bytes") ? static_cast <void> (0) : __assert_fail ("!LoadSizes.empty() && \"cannot load Size bytes\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 222, __PRETTY_FUNCTION__)); | |||
223 | MaxLoadSize = LoadSizes.front(); | |||
224 | // Compute the decomposition. | |||
225 | unsigned GreedyNumLoadsNonOneByte = 0; | |||
226 | LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads, | |||
227 | GreedyNumLoadsNonOneByte); | |||
228 | NumLoadsNonOneByte = GreedyNumLoadsNonOneByte; | |||
229 | assert(LoadSequence.size() <= MaxNumLoads && "broken invariant")((LoadSequence.size() <= MaxNumLoads && "broken invariant" ) ? static_cast<void> (0) : __assert_fail ("LoadSequence.size() <= MaxNumLoads && \"broken invariant\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 229, __PRETTY_FUNCTION__)); | |||
230 | // If we allow overlapping loads and the load sequence is not already optimal, | |||
231 | // use overlapping loads. | |||
232 | if (Options.AllowOverlappingLoads && | |||
233 | (LoadSequence.empty() || LoadSequence.size() > 2)) { | |||
234 | unsigned OverlappingNumLoadsNonOneByte = 0; | |||
235 | auto OverlappingLoads = computeOverlappingLoadSequence( | |||
236 | Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte); | |||
237 | if (!OverlappingLoads.empty() && | |||
238 | (LoadSequence.empty() || | |||
239 | OverlappingLoads.size() < LoadSequence.size())) { | |||
240 | LoadSequence = OverlappingLoads; | |||
241 | NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte; | |||
242 | } | |||
243 | } | |||
244 | assert(LoadSequence.size() <= MaxNumLoads && "broken invariant")((LoadSequence.size() <= MaxNumLoads && "broken invariant" ) ? static_cast<void> (0) : __assert_fail ("LoadSequence.size() <= MaxNumLoads && \"broken invariant\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 244, __PRETTY_FUNCTION__)); | |||
245 | } | |||
246 | ||||
247 | unsigned MemCmpExpansion::getNumBlocks() { | |||
248 | if (IsUsedForZeroCmp) | |||
249 | return getNumLoads() / NumLoadsPerBlockForZeroCmp + | |||
250 | (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); | |||
251 | return getNumLoads(); | |||
252 | } | |||
253 | ||||
254 | void MemCmpExpansion::createLoadCmpBlocks() { | |||
255 | for (unsigned i = 0; i < getNumBlocks(); i++) { | |||
256 | BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", | |||
257 | EndBlock->getParent(), EndBlock); | |||
258 | LoadCmpBlocks.push_back(BB); | |||
259 | } | |||
260 | } | |||
261 | ||||
262 | void MemCmpExpansion::createResultBlock() { | |||
263 | ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", | |||
264 | EndBlock->getParent(), EndBlock); | |||
265 | } | |||
266 | ||||
267 | /// Return a pointer to an element of type `LoadSizeType` at offset | |||
268 | /// `OffsetBytes`. | |||
269 | Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, | |||
270 | Type *LoadSizeType, | |||
271 | uint64_t OffsetBytes) { | |||
272 | if (OffsetBytes > 0) { | |||
273 | auto *ByteType = Type::getInt8Ty(CI->getContext()); | |||
274 | Source = Builder.CreateGEP( | |||
275 | ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), | |||
276 | ConstantInt::get(ByteType, OffsetBytes)); | |||
277 | } | |||
278 | return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); | |||
279 | } | |||
280 | ||||
281 | // This function creates the IR instructions for loading and comparing 1 byte. | |||
282 | // It loads 1 byte from each source of the memcmp parameters with the given | |||
283 | // GEPIndex. It then subtracts the two loaded values and adds this result to the | |||
284 | // final phi node for selecting the memcmp result. | |||
285 | void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, | |||
286 | unsigned OffsetBytes) { | |||
287 | Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); | |||
288 | Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); | |||
289 | Value *Source1 = | |||
290 | getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); | |||
291 | Value *Source2 = | |||
292 | getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); | |||
293 | ||||
294 | Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); | |||
295 | Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); | |||
296 | ||||
297 | LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); | |||
298 | LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); | |||
299 | Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); | |||
300 | ||||
301 | PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); | |||
302 | ||||
303 | if (BlockIndex < (LoadCmpBlocks.size() - 1)) { | |||
304 | // Early exit branch if difference found to EndBlock. Otherwise, continue to | |||
305 | // next LoadCmpBlock, | |||
306 | Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, | |||
307 | ConstantInt::get(Diff->getType(), 0)); | |||
308 | BranchInst *CmpBr = | |||
309 | BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); | |||
310 | Builder.Insert(CmpBr); | |||
311 | } else { | |||
312 | // The last block has an unconditional branch to EndBlock. | |||
313 | BranchInst *CmpBr = BranchInst::Create(EndBlock); | |||
314 | Builder.Insert(CmpBr); | |||
315 | } | |||
316 | } | |||
317 | ||||
318 | /// Generate an equality comparison for one or more pairs of loaded values. | |||
319 | /// This is used in the case where the memcmp() call is compared equal or not | |||
320 | /// equal to zero. | |||
321 | Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, | |||
322 | unsigned &LoadIndex) { | |||
323 | assert(LoadIndex < getNumLoads() &&((LoadIndex < getNumLoads() && "getCompareLoadPairs() called with no remaining loads" ) ? static_cast<void> (0) : __assert_fail ("LoadIndex < getNumLoads() && \"getCompareLoadPairs() called with no remaining loads\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 324, __PRETTY_FUNCTION__)) | |||
324 | "getCompareLoadPairs() called with no remaining loads")((LoadIndex < getNumLoads() && "getCompareLoadPairs() called with no remaining loads" ) ? static_cast<void> (0) : __assert_fail ("LoadIndex < getNumLoads() && \"getCompareLoadPairs() called with no remaining loads\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 324, __PRETTY_FUNCTION__)); | |||
325 | std::vector<Value *> XorList, OrList; | |||
326 | Value *Diff; | |||
327 | ||||
328 | const unsigned NumLoads = | |||
329 | std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); | |||
330 | ||||
331 | // For a single-block expansion, start inserting before the memcmp call. | |||
332 | if (LoadCmpBlocks.empty()) | |||
333 | Builder.SetInsertPoint(CI); | |||
334 | else | |||
335 | Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); | |||
336 | ||||
337 | Value *Cmp = nullptr; | |||
338 | // If we have multiple loads per block, we need to generate a composite | |||
339 | // comparison using xor+or. The type for the combinations is the largest load | |||
340 | // type. | |||
341 | IntegerType *const MaxLoadType = | |||
342 | NumLoads == 1 ? nullptr | |||
343 | : IntegerType::get(CI->getContext(), MaxLoadSize * 8); | |||
344 | for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { | |||
345 | const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; | |||
346 | ||||
347 | IntegerType *LoadSizeType = | |||
348 | IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); | |||
349 | ||||
350 | Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, | |||
351 | CurLoadEntry.Offset); | |||
352 | Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, | |||
353 | CurLoadEntry.Offset); | |||
354 | ||||
355 | // Get a constant or load a value for each source address. | |||
356 | Value *LoadSrc1 = nullptr; | |||
357 | if (auto *Source1C = dyn_cast<Constant>(Source1)) | |||
358 | LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); | |||
359 | if (!LoadSrc1) | |||
360 | LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); | |||
361 | ||||
362 | Value *LoadSrc2 = nullptr; | |||
363 | if (auto *Source2C = dyn_cast<Constant>(Source2)) | |||
364 | LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); | |||
365 | if (!LoadSrc2) | |||
366 | LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); | |||
367 | ||||
368 | if (NumLoads != 1) { | |||
369 | if (LoadSizeType != MaxLoadType) { | |||
370 | LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); | |||
371 | LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); | |||
372 | } | |||
373 | // If we have multiple loads per block, we need to generate a composite | |||
374 | // comparison using xor+or. | |||
375 | Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); | |||
376 | Diff = Builder.CreateZExt(Diff, MaxLoadType); | |||
377 | XorList.push_back(Diff); | |||
378 | } else { | |||
379 | // If there's only one load per block, we just compare the loaded values. | |||
380 | Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); | |||
381 | } | |||
382 | } | |||
383 | ||||
384 | auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { | |||
385 | std::vector<Value *> OutList; | |||
386 | for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { | |||
387 | Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); | |||
388 | OutList.push_back(Or); | |||
389 | } | |||
390 | if (InList.size() % 2 != 0) | |||
391 | OutList.push_back(InList.back()); | |||
392 | return OutList; | |||
393 | }; | |||
394 | ||||
395 | if (!Cmp) { | |||
396 | // Pairwise OR the XOR results. | |||
397 | OrList = pairWiseOr(XorList); | |||
398 | ||||
399 | // Pairwise OR the OR results until one result left. | |||
400 | while (OrList.size() != 1) { | |||
401 | OrList = pairWiseOr(OrList); | |||
402 | } | |||
403 | Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); | |||
| ||||
404 | } | |||
405 | ||||
406 | return Cmp; | |||
407 | } | |||
408 | ||||
409 | void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, | |||
410 | unsigned &LoadIndex) { | |||
411 | Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); | |||
412 | ||||
413 | BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) | |||
414 | ? EndBlock | |||
415 | : LoadCmpBlocks[BlockIndex + 1]; | |||
416 | // Early exit branch if difference found to ResultBlock. Otherwise, | |||
417 | // continue to next LoadCmpBlock or EndBlock. | |||
418 | BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); | |||
419 | Builder.Insert(CmpBr); | |||
420 | ||||
421 | // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 | |||
422 | // since early exit to ResultBlock was not taken (no difference was found in | |||
423 | // any of the bytes). | |||
424 | if (BlockIndex == LoadCmpBlocks.size() - 1) { | |||
425 | Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); | |||
426 | PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); | |||
427 | } | |||
428 | } | |||
429 | ||||
430 | // This function creates the IR intructions for loading and comparing using the | |||
431 | // given LoadSize. It loads the number of bytes specified by LoadSize from each | |||
432 | // source of the memcmp parameters. It then does a subtract to see if there was | |||
433 | // a difference in the loaded values. If a difference is found, it branches | |||
434 | // with an early exit to the ResultBlock for calculating which source was | |||
435 | // larger. Otherwise, it falls through to the either the next LoadCmpBlock or | |||
436 | // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with | |||
437 | // a special case through emitLoadCompareByteBlock. The special handling can | |||
438 | // simply subtract the loaded values and add it to the result phi node. | |||
439 | void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { | |||
440 | // There is one load per block in this case, BlockIndex == LoadIndex. | |||
441 | const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; | |||
442 | ||||
443 | if (CurLoadEntry.LoadSize == 1) { | |||
444 | MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset); | |||
445 | return; | |||
446 | } | |||
447 | ||||
448 | Type *LoadSizeType = | |||
449 | IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); | |||
450 | Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); | |||
451 | assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type")((CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type" ) ? static_cast<void> (0) : __assert_fail ("CurLoadEntry.LoadSize <= MaxLoadSize && \"Unexpected load type\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 451, __PRETTY_FUNCTION__)); | |||
452 | ||||
453 | Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); | |||
454 | ||||
455 | Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, | |||
456 | CurLoadEntry.Offset); | |||
457 | Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, | |||
458 | CurLoadEntry.Offset); | |||
459 | ||||
460 | // Load LoadSizeType from the base address. | |||
461 | Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); | |||
462 | Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); | |||
463 | ||||
464 | if (DL.isLittleEndian()) { | |||
465 | Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), | |||
466 | Intrinsic::bswap, LoadSizeType); | |||
467 | LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); | |||
468 | LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); | |||
469 | } | |||
470 | ||||
471 | if (LoadSizeType != MaxLoadType) { | |||
472 | LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); | |||
473 | LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); | |||
474 | } | |||
475 | ||||
476 | // Add the loaded values to the phi nodes for calculating memcmp result only | |||
477 | // if result is not used in a zero equality. | |||
478 | if (!IsUsedForZeroCmp) { | |||
479 | ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); | |||
480 | ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); | |||
481 | } | |||
482 | ||||
483 | Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); | |||
484 | BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) | |||
485 | ? EndBlock | |||
486 | : LoadCmpBlocks[BlockIndex + 1]; | |||
487 | // Early exit branch if difference found to ResultBlock. Otherwise, continue | |||
488 | // to next LoadCmpBlock or EndBlock. | |||
489 | BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); | |||
490 | Builder.Insert(CmpBr); | |||
491 | ||||
492 | // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 | |||
493 | // since early exit to ResultBlock was not taken (no difference was found in | |||
494 | // any of the bytes). | |||
495 | if (BlockIndex == LoadCmpBlocks.size() - 1) { | |||
496 | Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); | |||
497 | PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); | |||
498 | } | |||
499 | } | |||
500 | ||||
501 | // This function populates the ResultBlock with a sequence to calculate the | |||
502 | // memcmp result. It compares the two loaded source values and returns -1 if | |||
503 | // src1 < src2 and 1 if src1 > src2. | |||
504 | void MemCmpExpansion::emitMemCmpResultBlock() { | |||
505 | // Special case: if memcmp result is used in a zero equality, result does not | |||
506 | // need to be calculated and can simply return 1. | |||
507 | if (IsUsedForZeroCmp) { | |||
508 | BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); | |||
509 | Builder.SetInsertPoint(ResBlock.BB, InsertPt); | |||
510 | Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); | |||
511 | PhiRes->addIncoming(Res, ResBlock.BB); | |||
512 | BranchInst *NewBr = BranchInst::Create(EndBlock); | |||
513 | Builder.Insert(NewBr); | |||
514 | return; | |||
515 | } | |||
516 | BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); | |||
517 | Builder.SetInsertPoint(ResBlock.BB, InsertPt); | |||
518 | ||||
519 | Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, | |||
520 | ResBlock.PhiSrc2); | |||
521 | ||||
522 | Value *Res = | |||
523 | Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), | |||
524 | ConstantInt::get(Builder.getInt32Ty(), 1)); | |||
525 | ||||
526 | BranchInst *NewBr = BranchInst::Create(EndBlock); | |||
527 | Builder.Insert(NewBr); | |||
528 | PhiRes->addIncoming(Res, ResBlock.BB); | |||
529 | } | |||
530 | ||||
531 | void MemCmpExpansion::setupResultBlockPHINodes() { | |||
532 | Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); | |||
533 | Builder.SetInsertPoint(ResBlock.BB); | |||
534 | // Note: this assumes one load per block. | |||
535 | ResBlock.PhiSrc1 = | |||
536 | Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); | |||
537 | ResBlock.PhiSrc2 = | |||
538 | Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); | |||
539 | } | |||
540 | ||||
541 | void MemCmpExpansion::setupEndBlockPHINodes() { | |||
542 | Builder.SetInsertPoint(&EndBlock->front()); | |||
543 | PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); | |||
544 | } | |||
545 | ||||
546 | Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { | |||
547 | unsigned LoadIndex = 0; | |||
548 | // This loop populates each of the LoadCmpBlocks with the IR sequence to | |||
549 | // handle multiple loads per block. | |||
550 | for (unsigned I = 0; I < getNumBlocks(); ++I) { | |||
551 | emitLoadCompareBlockMultipleLoads(I, LoadIndex); | |||
552 | } | |||
553 | ||||
554 | emitMemCmpResultBlock(); | |||
555 | return PhiRes; | |||
556 | } | |||
557 | ||||
558 | /// A memcmp expansion that compares equality with 0 and only has one block of | |||
559 | /// load and compare can bypass the compare, branch, and phi IR that is required | |||
560 | /// in the general case. | |||
561 | Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { | |||
562 | unsigned LoadIndex = 0; | |||
563 | Value *Cmp = getCompareLoadPairs(0, LoadIndex); | |||
| ||||
564 | assert(LoadIndex == getNumLoads() && "some entries were not consumed")((LoadIndex == getNumLoads() && "some entries were not consumed" ) ? static_cast<void> (0) : __assert_fail ("LoadIndex == getNumLoads() && \"some entries were not consumed\"" , "/build/llvm-toolchain-snapshot-9~svn360825/lib/CodeGen/ExpandMemCmp.cpp" , 564, __PRETTY_FUNCTION__)); | |||
565 | return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); | |||
566 | } | |||
567 | ||||
568 | /// A memcmp expansion that only has one block of load and compare can bypass | |||
569 | /// the compare, branch, and phi IR that is required in the general case. | |||
570 | Value *MemCmpExpansion::getMemCmpOneBlock() { | |||
571 | Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); | |||
572 | Value *Source1 = CI->getArgOperand(0); | |||
573 | Value *Source2 = CI->getArgOperand(1); | |||
574 | ||||
575 | // Cast source to LoadSizeType*. | |||
576 | if (Source1->getType() != LoadSizeType) | |||
577 | Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); | |||
578 | if (Source2->getType() != LoadSizeType) | |||
579 | Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); | |||
580 | ||||
581 | // Load LoadSizeType from the base address. | |||
582 | Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); | |||
583 | Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); | |||
584 | ||||
585 | if (DL.isLittleEndian() && Size != 1) { | |||
586 | Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), | |||
587 | Intrinsic::bswap, LoadSizeType); | |||
588 | LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); | |||
589 | LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); | |||
590 | } | |||
591 | ||||
592 | if (Size < 4) { | |||
593 | // The i8 and i16 cases don't need compares. We zext the loaded values and | |||
594 | // subtract them to get the suitable negative, zero, or positive i32 result. | |||
595 | LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); | |||
596 | LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); | |||
597 | return Builder.CreateSub(LoadSrc1, LoadSrc2); | |||
598 | } | |||
599 | ||||
600 | // The result of memcmp is negative, zero, or positive, so produce that by | |||
601 | // subtracting 2 extended compare bits: sub (ugt, ult). | |||
602 | // If a target prefers to use selects to get -1/0/1, they should be able | |||
603 | // to transform this later. The inverse transform (going from selects to math) | |||
604 | // may not be possible in the DAG because the selects got converted into | |||
605 | // branches before we got there. | |||
606 | Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); | |||
607 | Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); | |||
608 | Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); | |||
609 | Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); | |||
610 | return Builder.CreateSub(ZextUGT, ZextULT); | |||
611 | } | |||
612 | ||||
613 | // This function expands the memcmp call into an inline expansion and returns | |||
614 | // the memcmp result. | |||
615 | Value *MemCmpExpansion::getMemCmpExpansion() { | |||
616 | // Create the basic block framework for a multi-block expansion. | |||
617 | if (getNumBlocks() != 1) { | |||
618 | BasicBlock *StartBlock = CI->getParent(); | |||
619 | EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); | |||
620 | setupEndBlockPHINodes(); | |||
621 | createResultBlock(); | |||
622 | ||||
623 | // If return value of memcmp is not used in a zero equality, we need to | |||
624 | // calculate which source was larger. The calculation requires the | |||
625 | // two loaded source values of each load compare block. | |||
626 | // These will be saved in the phi nodes created by setupResultBlockPHINodes. | |||
627 | if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); | |||
628 | ||||
629 | // Create the number of required load compare basic blocks. | |||
630 | createLoadCmpBlocks(); | |||
631 | ||||
632 | // Update the terminator added by splitBasicBlock to branch to the first | |||
633 | // LoadCmpBlock. | |||
634 | StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); | |||
635 | } | |||
636 | ||||
637 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); | |||
638 | ||||
639 | if (IsUsedForZeroCmp) | |||
640 | return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() | |||
641 | : getMemCmpExpansionZeroCase(); | |||
642 | ||||
643 | if (getNumBlocks() == 1) | |||
644 | return getMemCmpOneBlock(); | |||
645 | ||||
646 | for (unsigned I = 0; I < getNumBlocks(); ++I) { | |||
647 | emitLoadCompareBlock(I); | |||
648 | } | |||
649 | ||||
650 | emitMemCmpResultBlock(); | |||
651 | return PhiRes; | |||
652 | } | |||
653 | ||||
654 | // This function checks to see if an expansion of memcmp can be generated. | |||
655 | // It checks for constant compare size that is less than the max inline size. | |||
656 | // If an expansion cannot occur, returns false to leave as a library call. | |||
657 | // Otherwise, the library call is replaced with a new IR instruction sequence. | |||
658 | /// We want to transform: | |||
659 | /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) | |||
660 | /// To: | |||
661 | /// loadbb: | |||
662 | /// %0 = bitcast i32* %buffer2 to i8* | |||
663 | /// %1 = bitcast i32* %buffer1 to i8* | |||
664 | /// %2 = bitcast i8* %1 to i64* | |||
665 | /// %3 = bitcast i8* %0 to i64* | |||
666 | /// %4 = load i64, i64* %2 | |||
667 | /// %5 = load i64, i64* %3 | |||
668 | /// %6 = call i64 @llvm.bswap.i64(i64 %4) | |||
669 | /// %7 = call i64 @llvm.bswap.i64(i64 %5) | |||
670 | /// %8 = sub i64 %6, %7 | |||
671 | /// %9 = icmp ne i64 %8, 0 | |||
672 | /// br i1 %9, label %res_block, label %loadbb1 | |||
673 | /// res_block: ; preds = %loadbb2, | |||
674 | /// %loadbb1, %loadbb | |||
675 | /// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] | |||
676 | /// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] | |||
677 | /// %10 = icmp ult i64 %phi.src1, %phi.src2 | |||
678 | /// %11 = select i1 %10, i32 -1, i32 1 | |||
679 | /// br label %endblock | |||
680 | /// loadbb1: ; preds = %loadbb | |||
681 | /// %12 = bitcast i32* %buffer2 to i8* | |||
682 | /// %13 = bitcast i32* %buffer1 to i8* | |||
683 | /// %14 = bitcast i8* %13 to i32* | |||
684 | /// %15 = bitcast i8* %12 to i32* | |||
685 | /// %16 = getelementptr i32, i32* %14, i32 2 | |||
686 | /// %17 = getelementptr i32, i32* %15, i32 2 | |||
687 | /// %18 = load i32, i32* %16 | |||
688 | /// %19 = load i32, i32* %17 | |||
689 | /// %20 = call i32 @llvm.bswap.i32(i32 %18) | |||
690 | /// %21 = call i32 @llvm.bswap.i32(i32 %19) | |||
691 | /// %22 = zext i32 %20 to i64 | |||
692 | /// %23 = zext i32 %21 to i64 | |||
693 | /// %24 = sub i64 %22, %23 | |||
694 | /// %25 = icmp ne i64 %24, 0 | |||
695 | /// br i1 %25, label %res_block, label %loadbb2 | |||
696 | /// loadbb2: ; preds = %loadbb1 | |||
697 | /// %26 = bitcast i32* %buffer2 to i8* | |||
698 | /// %27 = bitcast i32* %buffer1 to i8* | |||
699 | /// %28 = bitcast i8* %27 to i16* | |||
700 | /// %29 = bitcast i8* %26 to i16* | |||
701 | /// %30 = getelementptr i16, i16* %28, i16 6 | |||
702 | /// %31 = getelementptr i16, i16* %29, i16 6 | |||
703 | /// %32 = load i16, i16* %30 | |||
704 | /// %33 = load i16, i16* %31 | |||
705 | /// %34 = call i16 @llvm.bswap.i16(i16 %32) | |||
706 | /// %35 = call i16 @llvm.bswap.i16(i16 %33) | |||
707 | /// %36 = zext i16 %34 to i64 | |||
708 | /// %37 = zext i16 %35 to i64 | |||
709 | /// %38 = sub i64 %36, %37 | |||
710 | /// %39 = icmp ne i64 %38, 0 | |||
711 | /// br i1 %39, label %res_block, label %loadbb3 | |||
712 | /// loadbb3: ; preds = %loadbb2 | |||
713 | /// %40 = bitcast i32* %buffer2 to i8* | |||
714 | /// %41 = bitcast i32* %buffer1 to i8* | |||
715 | /// %42 = getelementptr i8, i8* %41, i8 14 | |||
716 | /// %43 = getelementptr i8, i8* %40, i8 14 | |||
717 | /// %44 = load i8, i8* %42 | |||
718 | /// %45 = load i8, i8* %43 | |||
719 | /// %46 = zext i8 %44 to i32 | |||
720 | /// %47 = zext i8 %45 to i32 | |||
721 | /// %48 = sub i32 %46, %47 | |||
722 | /// br label %endblock | |||
723 | /// endblock: ; preds = %res_block, | |||
724 | /// %loadbb3 | |||
725 | /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] | |||
726 | /// ret i32 %phi.res | |||
727 | static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, | |||
728 | const TargetLowering *TLI, const DataLayout *DL) { | |||
729 | NumMemCmpCalls++; | |||
730 | ||||
731 | // Early exit from expansion if -Oz. | |||
732 | if (CI->getFunction()->hasMinSize()) | |||
733 | return false; | |||
734 | ||||
735 | // Early exit from expansion if size is not a constant. | |||
736 | ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); | |||
737 | if (!SizeCast) { | |||
738 | NumMemCmpNotConstant++; | |||
739 | return false; | |||
740 | } | |||
741 | const uint64_t SizeVal = SizeCast->getZExtValue(); | |||
742 | ||||
743 | if (SizeVal == 0) { | |||
744 | return false; | |||
745 | } | |||
746 | // TTI call to check if target would like to expand memcmp. Also, get the | |||
747 | // available load sizes. | |||
748 | const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); | |||
749 | const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); | |||
750 | if (!Options) return false; | |||
751 | ||||
752 | const unsigned MaxNumLoads = CI->getFunction()->hasOptSize() | |||
753 | ? (MaxLoadsPerMemcmpOptSize.getNumOccurrences() | |||
754 | ? MaxLoadsPerMemcmpOptSize | |||
755 | : TLI->getMaxExpandSizeMemcmp(true)) | |||
756 | : (MaxLoadsPerMemcmp.getNumOccurrences() | |||
757 | ? MaxLoadsPerMemcmp | |||
758 | : TLI->getMaxExpandSizeMemcmp(false)); | |||
759 | ||||
760 | unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() | |||
761 | ? MemCmpEqZeroNumLoadsPerBlock | |||
762 | : TLI->getMemcmpEqZeroLoadsPerBlock(); | |||
763 | ||||
764 | MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, | |||
765 | IsUsedForZeroCmp, NumLoadsPerBlock, *DL); | |||
766 | ||||
767 | // Don't expand if this will require more loads than desired by the target. | |||
768 | if (Expansion.getNumLoads() == 0) { | |||
769 | NumMemCmpGreaterThanMax++; | |||
770 | return false; | |||
771 | } | |||
772 | ||||
773 | NumMemCmpInlined++; | |||
774 | ||||
775 | Value *Res = Expansion.getMemCmpExpansion(); | |||
776 | ||||
777 | // Replace call with result of expansion and erase call. | |||
778 | CI->replaceAllUsesWith(Res); | |||
779 | CI->eraseFromParent(); | |||
780 | ||||
781 | return true; | |||
782 | } | |||
783 | ||||
784 | ||||
785 | ||||
786 | class ExpandMemCmpPass : public FunctionPass { | |||
787 | public: | |||
788 | static char ID; | |||
789 | ||||
790 | ExpandMemCmpPass() : FunctionPass(ID) { | |||
791 | initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry()); | |||
792 | } | |||
793 | ||||
794 | bool runOnFunction(Function &F) override { | |||
795 | if (skipFunction(F)) return false; | |||
796 | ||||
797 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | |||
798 | if (!TPC) { | |||
799 | return false; | |||
800 | } | |||
801 | const TargetLowering* TL = | |||
802 | TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); | |||
803 | ||||
804 | const TargetLibraryInfo *TLI = | |||
805 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); | |||
806 | const TargetTransformInfo *TTI = | |||
807 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | |||
808 | auto PA = runImpl(F, TLI, TTI, TL); | |||
809 | return !PA.areAllPreserved(); | |||
810 | } | |||
811 | ||||
812 | private: | |||
813 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
814 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
815 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
816 | FunctionPass::getAnalysisUsage(AU); | |||
817 | } | |||
818 | ||||
819 | PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, | |||
820 | const TargetTransformInfo *TTI, | |||
821 | const TargetLowering* TL); | |||
822 | // Returns true if a change was made. | |||
823 | bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, | |||
824 | const TargetTransformInfo *TTI, const TargetLowering* TL, | |||
825 | const DataLayout& DL); | |||
826 | }; | |||
827 | ||||
828 | bool ExpandMemCmpPass::runOnBlock( | |||
829 | BasicBlock &BB, const TargetLibraryInfo *TLI, | |||
830 | const TargetTransformInfo *TTI, const TargetLowering* TL, | |||
831 | const DataLayout& DL) { | |||
832 | for (Instruction& I : BB) { | |||
833 | CallInst *CI = dyn_cast<CallInst>(&I); | |||
834 | if (!CI) { | |||
835 | continue; | |||
836 | } | |||
837 | LibFunc Func; | |||
838 | if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && | |||
839 | (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && | |||
840 | expandMemCmp(CI, TTI, TL, &DL)) { | |||
841 | return true; | |||
842 | } | |||
843 | } | |||
844 | return false; | |||
845 | } | |||
846 | ||||
847 | ||||
848 | PreservedAnalyses ExpandMemCmpPass::runImpl( | |||
849 | Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, | |||
850 | const TargetLowering* TL) { | |||
851 | const DataLayout& DL = F.getParent()->getDataLayout(); | |||
852 | bool MadeChanges = false; | |||
853 | for (auto BBIt = F.begin(); BBIt != F.end();) { | |||
854 | if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { | |||
855 | MadeChanges = true; | |||
856 | // If changes were made, restart the function from the beginning, since | |||
857 | // the structure of the function was changed. | |||
858 | BBIt = F.begin(); | |||
859 | } else { | |||
860 | ++BBIt; | |||
861 | } | |||
862 | } | |||
863 | return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); | |||
864 | } | |||
865 | ||||
866 | } // namespace | |||
867 | ||||
868 | char ExpandMemCmpPass::ID = 0; | |||
869 | INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",static void *initializeExpandMemCmpPassPassOnce(PassRegistry & Registry) { | |||
870 | "Expand memcmp() to load/stores", false, false)static void *initializeExpandMemCmpPassPassOnce(PassRegistry & Registry) { | |||
871 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
872 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
873 | INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",PassInfo *PI = new PassInfo( "Expand memcmp() to load/stores" , "expandmemcmp", &ExpandMemCmpPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<ExpandMemCmpPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeExpandMemCmpPassPassFlag; void llvm::initializeExpandMemCmpPassPass (PassRegistry &Registry) { llvm::call_once(InitializeExpandMemCmpPassPassFlag , initializeExpandMemCmpPassPassOnce, std::ref(Registry)); } | |||
874 | "Expand memcmp() to load/stores", false, false)PassInfo *PI = new PassInfo( "Expand memcmp() to load/stores" , "expandmemcmp", &ExpandMemCmpPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<ExpandMemCmpPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeExpandMemCmpPassPassFlag; void llvm::initializeExpandMemCmpPassPass (PassRegistry &Registry) { llvm::call_once(InitializeExpandMemCmpPassPassFlag , initializeExpandMemCmpPassPassOnce, std::ref(Registry)); } | |||
875 | ||||
876 | FunctionPass *llvm::createExpandMemCmpPass() { | |||
877 | return new ExpandMemCmpPass(); | |||
878 | } |