Bug Summary

File:lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Warning:line 532, column 15
Although the value stored to 'PatternValue' is used in the enclosing expression, the value is never actually read from 'PatternValue'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopIdiomRecognize.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374814/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-10~svn374814/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374814/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374814/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374814=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-035155-28452-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
1//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an idiom recognizer that transforms simple loops into a
10// non-loop form. In cases that this kicks in, it can be a significant
11// performance win.
12//
13// If compiling for code size we avoid idiom recognition if the resulting
14// code could be larger than the code for the original loop. One way this could
15// happen is if the loop is not removable after idiom recognition due to the
16// presence of non-idiom instructions. The initial implementation of the
17// heuristics applies to idioms in multi-block loops.
18//
19//===----------------------------------------------------------------------===//
20//
21// TODO List:
22//
23// Future loop memory idioms to recognize:
24// memcmp, memmove, strlen, etc.
25// Future floating point idioms to recognize in -ffast-math mode:
26// fpowi
27// Future integer operation idioms to recognize:
28// ctpop
29//
30// Beware that isel's default lowering for ctpop is highly inefficient for
31// i64 and larger types when i64 is legal and the value has few bits set. It
32// would be good to enhance isel to emit a loop for ctpop in this case.
33//
34// This could recognize common matrix multiplies and dot product idioms and
35// replace them with calls to BLAS (if linked in??).
36//
37//===----------------------------------------------------------------------===//
38
39#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
40#include "llvm/ADT/APInt.h"
41#include "llvm/ADT/ArrayRef.h"
42#include "llvm/ADT/DenseMap.h"
43#include "llvm/ADT/MapVector.h"
44#include "llvm/ADT/STLExtras.h"
45#include "llvm/ADT/SetVector.h"
46#include "llvm/ADT/SmallPtrSet.h"
47#include "llvm/ADT/SmallVector.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/ADT/StringRef.h"
50#include "llvm/Analysis/AliasAnalysis.h"
51#include "llvm/Analysis/LoopAccessAnalysis.h"
52#include "llvm/Analysis/LoopInfo.h"
53#include "llvm/Analysis/LoopPass.h"
54#include "llvm/Analysis/MemoryLocation.h"
55#include "llvm/Analysis/OptimizationRemarkEmitter.h"
56#include "llvm/Analysis/ScalarEvolution.h"
57#include "llvm/Analysis/ScalarEvolutionExpander.h"
58#include "llvm/Analysis/ScalarEvolutionExpressions.h"
59#include "llvm/Analysis/TargetLibraryInfo.h"
60#include "llvm/Analysis/TargetTransformInfo.h"
61#include "llvm/Analysis/ValueTracking.h"
62#include "llvm/IR/Attributes.h"
63#include "llvm/IR/BasicBlock.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugLoc.h"
68#include "llvm/IR/DerivedTypes.h"
69#include "llvm/IR/Dominators.h"
70#include "llvm/IR/GlobalValue.h"
71#include "llvm/IR/GlobalVariable.h"
72#include "llvm/IR/IRBuilder.h"
73#include "llvm/IR/InstrTypes.h"
74#include "llvm/IR/Instruction.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/IntrinsicInst.h"
77#include "llvm/IR/Intrinsics.h"
78#include "llvm/IR/LLVMContext.h"
79#include "llvm/IR/Module.h"
80#include "llvm/IR/PassManager.h"
81#include "llvm/IR/PatternMatch.h"
82#include "llvm/IR/Type.h"
83#include "llvm/IR/User.h"
84#include "llvm/IR/Value.h"
85#include "llvm/IR/ValueHandle.h"
86#include "llvm/IR/Verifier.h"
87#include "llvm/Pass.h"
88#include "llvm/Support/Casting.h"
89#include "llvm/Support/CommandLine.h"
90#include "llvm/Support/Debug.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Transforms/Scalar.h"
93#include "llvm/Transforms/Scalar/LoopPassManager.h"
94#include "llvm/Transforms/Utils/BasicBlockUtils.h"
95#include "llvm/Transforms/Utils/BuildLibCalls.h"
96#include "llvm/Transforms/Utils/Local.h"
97#include "llvm/Transforms/Utils/LoopUtils.h"
98#include <algorithm>
99#include <cassert>
100#include <cstdint>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE"loop-idiom" "loop-idiom"
107
108STATISTIC(NumMemSet, "Number of memset's formed from loop stores")static llvm::Statistic NumMemSet = {"loop-idiom", "NumMemSet"
, "Number of memset's formed from loop stores"}
;
109STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores")static llvm::Statistic NumMemCpy = {"loop-idiom", "NumMemCpy"
, "Number of memcpy's formed from loop load+stores"}
;
110STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare")static llvm::Statistic NumBCmp = {"loop-idiom", "NumBCmp", "Number of memcmp's formed from loop 2xload+eq-compare"
}
;
111
112static cl::opt<bool> UseLIRCodeSizeHeurs(
113 "use-lir-code-size-heurs",
114 cl::desc("Use loop idiom recognition code size heuristics when compiling"
115 "with -Os/-Oz"),
116 cl::init(true), cl::Hidden);
117
118namespace {
119
120// FIXME: reinventing the wheel much? Is there a cleaner solution?
121struct PMAbstraction {
122 virtual void markLoopAsDeleted(Loop *L) = 0;
123 virtual ~PMAbstraction() = default;
124};
125struct LegacyPMAbstraction : PMAbstraction {
126 LPPassManager &LPM;
127 LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {}
128 virtual ~LegacyPMAbstraction() = default;
129 void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); }
130};
131struct NewPMAbstraction : PMAbstraction {
132 LPMUpdater &Updater;
133 NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {}
134 virtual ~NewPMAbstraction() = default;
135 void markLoopAsDeleted(Loop *L) override {
136 Updater.markLoopAsDeleted(*L, L->getName());
137 }
138};
139
140class LoopIdiomRecognize {
141 Loop *CurLoop = nullptr;
142 AliasAnalysis *AA;
143 DominatorTree *DT;
144 LoopInfo *LI;
145 ScalarEvolution *SE;
146 TargetLibraryInfo *TLI;
147 const TargetTransformInfo *TTI;
148 const DataLayout *DL;
149 PMAbstraction &LoopDeleter;
150 OptimizationRemarkEmitter &ORE;
151 bool ApplyCodeSizeHeuristics;
152
153public:
154 explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
155 LoopInfo *LI, ScalarEvolution *SE,
156 TargetLibraryInfo *TLI,
157 const TargetTransformInfo *TTI,
158 const DataLayout *DL, PMAbstraction &LoopDeleter,
159 OptimizationRemarkEmitter &ORE)
160 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL),
161 LoopDeleter(LoopDeleter), ORE(ORE) {}
162
163 bool runOnLoop(Loop *L);
164
165private:
166 using StoreList = SmallVector<StoreInst *, 8>;
167 using StoreListMap = MapVector<Value *, StoreList>;
168
169 StoreListMap StoreRefsForMemset;
170 StoreListMap StoreRefsForMemsetPattern;
171 StoreList StoreRefsForMemcpy;
172 bool HasMemset;
173 bool HasMemsetPattern;
174 bool HasMemcpy;
175 bool HasMemCmp;
176 bool HasBCmp;
177
178 /// Return code for isLegalStore()
179 enum LegalStoreKind {
180 None = 0,
181 Memset,
182 MemsetPattern,
183 Memcpy,
184 UnorderedAtomicMemcpy,
185 DontUse // Dummy retval never to be used. Allows catching errors in retval
186 // handling.
187 };
188
189 /// \name Countable Loop Idiom Handling
190 /// @{
191
192 bool runOnCountableLoop();
193 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
194 SmallVectorImpl<BasicBlock *> &ExitBlocks);
195
196 void collectStores(BasicBlock *BB);
197 LegalStoreKind isLegalStore(StoreInst *SI);
198 enum class ForMemset { No, Yes };
199 bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
200 ForMemset For);
201 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
202
203 bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
204 unsigned StoreAlignment, Value *StoredVal,
205 Instruction *TheStore,
206 SmallPtrSetImpl<Instruction *> &Stores,
207 const SCEVAddRecExpr *Ev, const SCEV *BECount,
208 bool NegStride, bool IsLoopMemset = false);
209 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
210 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
211 bool IsLoopMemset = false);
212
213 /// @}
214 /// \name Noncountable Loop Idiom Handling
215 /// @{
216
217 bool runOnNoncountableLoop();
218
219 struct CmpLoopStructure {
220 Value *BCmpValue, *LatchCmpValue;
221 BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB;
222 BasicBlock *LatchBrFinishBB, *LatchBrContinueBB;
223 };
224 bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const;
225 struct CmpOfLoads {
226 ICmpInst::Predicate BCmpPred;
227 Value *LoadSrcA, *LoadSrcB;
228 Value *LoadA, *LoadB;
229 };
230 bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const;
231 bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads,
232 CmpLoopStructure &CmpLoop) const;
233 bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads,
234 const SCEV *&SrcA, const SCEV *&SrcB,
235 const SCEV *&Iterations) const;
236 bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst,
237 LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA,
238 const SCEV *&SrcB, const SCEV *&NBytes) const;
239 BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual);
240 void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst,
241 LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA,
242 const SCEV *SrcB, const SCEV *NBytes);
243 bool recognizeBCmp();
244
245 bool recognizePopcount();
246 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
247 PHINode *CntPhi, Value *Var);
248 bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
249 void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
250 Instruction *CntInst, PHINode *CntPhi,
251 Value *Var, Instruction *DefX,
252 const DebugLoc &DL, bool ZeroCheck,
253 bool IsCntPhiUsedOutsideLoop);
254
255 /// @}
256};
257
258class LoopIdiomRecognizeLegacyPass : public LoopPass {
259public:
260 static char ID;
261
262 explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
263 initializeLoopIdiomRecognizeLegacyPassPass(
264 *PassRegistry::getPassRegistry());
265 }
266
267 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
268 if (skipLoop(L))
269 return false;
270
271 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
272 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
273 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
274 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
275 TargetLibraryInfo *TLI =
276 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
277 *L->getHeader()->getParent());
278 const TargetTransformInfo *TTI =
279 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
280 *L->getHeader()->getParent());
281 const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
282 LegacyPMAbstraction LoopDeleter(LPM);
283
284 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
285 // pass. Function analyses need to be preserved across loop transformations
286 // but ORE cannot be preserved (see comment before the pass definition).
287 OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
288
289 LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE);
290 return LIR.runOnLoop(L);
291 }
292
293 /// This transformation requires natural loop information & requires that
294 /// loop preheaders be inserted into the CFG.
295 void getAnalysisUsage(AnalysisUsage &AU) const override {
296 AU.addRequired<TargetLibraryInfoWrapperPass>();
297 AU.addRequired<TargetTransformInfoWrapperPass>();
298 getLoopAnalysisUsage(AU);
299 }
300};
301
302} // end anonymous namespace
303
304char LoopIdiomRecognizeLegacyPass::ID = 0;
305
306PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
307 LoopStandardAnalysisResults &AR,
308 LPMUpdater &Updater) {
309 const auto *DL = &L.getHeader()->getModule()->getDataLayout();
310
311 const auto &FAM =
312 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
313 Function *F = L.getHeader()->getParent();
314
315 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
316 // FIXME: This should probably be optional rather than required.
317 if (!ORE)
318 report_fatal_error(
319 "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
320 "at a higher level");
321
322 NewPMAbstraction LoopDeleter(Updater);
323 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
324 LoopDeleter, *ORE);
325 if (!LIR.runOnLoop(&L))
326 return PreservedAnalyses::all();
327
328 return getLoopPassPreservedAnalyses();
329}
330
331INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry
&Registry) {
332 "Recognize loop idioms", false, false)static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry
&Registry) {
333INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
334INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
335INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
336INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",PassInfo *PI = new PassInfo( "Recognize loop idioms", "loop-idiom"
, &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag
, initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry
)); }
337 "Recognize loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize loop idioms", "loop-idiom"
, &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag
, initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry
)); }
338
339Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
340
341static void deleteDeadInstruction(Instruction *I) {
342 I->replaceAllUsesWith(UndefValue::get(I->getType()));
343 I->eraseFromParent();
344}
345
346//===----------------------------------------------------------------------===//
347//
348// Implementation of LoopIdiomRecognize
349//
350//===----------------------------------------------------------------------===//
351
352bool LoopIdiomRecognize::runOnLoop(Loop *L) {
353 CurLoop = L;
354 // If the loop could not be converted to canonical form, it must have an
355 // indirectbr in it, just give up.
356 if (!L->getLoopPreheader())
357 return false;
358
359 // Disable loop idiom recognition if the function's name is a common idiom.
360 StringRef Name = L->getHeader()->getParent()->getName();
361 if (Name == "memset" || Name == "memcpy" || Name == "memcmp" ||
362 Name == "bcmp")
363 return false;
364
365 // Determine if code size heuristics need to be applied.
366 ApplyCodeSizeHeuristics =
367 L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
368
369 HasMemset = TLI->has(LibFunc_memset);
370 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
371 HasMemcpy = TLI->has(LibFunc_memcpy);
372 HasMemCmp = TLI->has(LibFunc_memcmp);
373 HasBCmp = TLI->has(LibFunc_bcmp);
374
375 if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp)
376 if (SE->hasLoopInvariantBackedgeTakenCount(L))
377 return runOnCountableLoop();
378
379 return runOnNoncountableLoop();
380}
381
382bool LoopIdiomRecognize::runOnCountableLoop() {
383 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
384 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 386, __PRETTY_FUNCTION__))
385 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 386, __PRETTY_FUNCTION__))
386 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 386, __PRETTY_FUNCTION__))
;
387
388 // If this loop executes exactly one time, then it should be peeled, not
389 // optimized by this pass.
390 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
391 if (BECst->getAPInt() == 0)
392 return false;
393
394 SmallVector<BasicBlock *, 8> ExitBlocks;
395 CurLoop->getUniqueExitBlocks(ExitBlocks);
396
397 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Countable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
398 << CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Countable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
399 << "] Countable Loop %" << CurLoop->getHeader()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Countable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
400 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Countable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
;
401
402 bool MadeChange = false;
403
404 // The following transforms hoist stores/memsets into the loop pre-header.
405 // Give up if the loop has instructions may throw.
406 SimpleLoopSafetyInfo SafetyInfo;
407 SafetyInfo.computeLoopSafetyInfo(CurLoop);
408 if (SafetyInfo.anyBlockMayThrow())
409 return MadeChange;
410
411 // Scan all the blocks in the loop that are not in subloops.
412 for (auto *BB : CurLoop->getBlocks()) {
413 // Ignore blocks in subloops.
414 if (LI->getLoopFor(BB) != CurLoop)
415 continue;
416
417 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
418 }
419 return MadeChange;
420}
421
422static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
423 const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
424 return ConstStride->getAPInt();
425}
426
427/// getMemSetPatternValue - If a strided store of the specified value is safe to
428/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
429/// be passed in. Otherwise, return null.
430///
431/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
432/// just replicate their input array and then pass on to memset_pattern16.
433static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
434 // FIXME: This could check for UndefValue because it can be merged into any
435 // other valid pattern.
436
437 // If the value isn't a constant, we can't promote it to being in a constant
438 // array. We could theoretically do a store to an alloca or something, but
439 // that doesn't seem worthwhile.
440 Constant *C = dyn_cast<Constant>(V);
441 if (!C)
442 return nullptr;
443
444 // Only handle simple values that are a power of two bytes in size.
445 uint64_t Size = DL->getTypeSizeInBits(V->getType());
446 if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
447 return nullptr;
448
449 // Don't care enough about darwin/ppc to implement this.
450 if (DL->isBigEndian())
451 return nullptr;
452
453 // Convert to size in bytes.
454 Size /= 8;
455
456 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
457 // if the top and bottom are the same (e.g. for vectors and large integers).
458 if (Size > 16)
459 return nullptr;
460
461 // If the constant is exactly 16 bytes, just use it.
462 if (Size == 16)
463 return C;
464
465 // Otherwise, we'll use an array of the constants.
466 unsigned ArraySize = 16 / Size;
467 ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
468 return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
469}
470
471LoopIdiomRecognize::LegalStoreKind
472LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
473 // Don't touch volatile stores.
474 if (SI->isVolatile())
475 return LegalStoreKind::None;
476 // We only want simple or unordered-atomic stores.
477 if (!SI->isUnordered())
478 return LegalStoreKind::None;
479
480 // Don't convert stores of non-integral pointer types to memsets (which stores
481 // integers).
482 if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
483 return LegalStoreKind::None;
484
485 // Avoid merging nontemporal stores.
486 if (SI->getMetadata(LLVMContext::MD_nontemporal))
487 return LegalStoreKind::None;
488
489 Value *StoredVal = SI->getValueOperand();
490 Value *StorePtr = SI->getPointerOperand();
491
492 // Reject stores that are so large that they overflow an unsigned.
493 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
494 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
495 return LegalStoreKind::None;
496
497 // See if the pointer expression is an AddRec like {base,+,1} on the current
498 // loop, which indicates a strided store. If we have something else, it's a
499 // random store we can't handle.
500 const SCEVAddRecExpr *StoreEv =
501 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
502 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
503 return LegalStoreKind::None;
504
505 // Check to see if we have a constant stride.
506 if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
507 return LegalStoreKind::None;
508
509 // See if the store can be turned into a memset.
510
511 // If the stored value is a byte-wise value (like i32 -1), then it may be
512 // turned into a memset of i8 -1, assuming that all the consecutive bytes
513 // are stored. A store of i32 0x01020304 can never be turned into a memset,
514 // but it can be turned into memset_pattern if the target supports it.
515 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
516 Constant *PatternValue = nullptr;
517
518 // Note: memset and memset_pattern on unordered-atomic is yet not supported
519 bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
520
521 // If we're allowed to form a memset, and the stored value would be
522 // acceptable for memset, use it.
523 if (!UnorderedAtomic && HasMemset && SplatValue &&
524 // Verify that the stored value is loop invariant. If not, we can't
525 // promote the memset.
526 CurLoop->isLoopInvariant(SplatValue)) {
527 // It looks like we can use SplatValue.
528 return LegalStoreKind::Memset;
529 } else if (!UnorderedAtomic && HasMemsetPattern &&
530 // Don't create memset_pattern16s with address spaces.
531 StorePtr->getType()->getPointerAddressSpace() == 0 &&
532 (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
Although the value stored to 'PatternValue' is used in the enclosing expression, the value is never actually read from 'PatternValue'
533 // It looks like we can use PatternValue!
534 return LegalStoreKind::MemsetPattern;
535 }
536
537 // Otherwise, see if the store can be turned into a memcpy.
538 if (HasMemcpy) {
539 // Check to see if the stride matches the size of the store. If so, then we
540 // know that every byte is touched in the loop.
541 APInt Stride = getStoreStride(StoreEv);
542 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
543 if (StoreSize != Stride && StoreSize != -Stride)
544 return LegalStoreKind::None;
545
546 // The store must be feeding a non-volatile load.
547 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
548
549 // Only allow non-volatile loads
550 if (!LI || LI->isVolatile())
551 return LegalStoreKind::None;
552 // Only allow simple or unordered-atomic loads
553 if (!LI->isUnordered())
554 return LegalStoreKind::None;
555
556 // See if the pointer expression is an AddRec like {base,+,1} on the current
557 // loop, which indicates a strided load. If we have something else, it's a
558 // random load we can't handle.
559 const SCEVAddRecExpr *LoadEv =
560 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
561 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
562 return LegalStoreKind::None;
563
564 // The store and load must share the same stride.
565 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
566 return LegalStoreKind::None;
567
568 // Success. This store can be converted into a memcpy.
569 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
570 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
571 : LegalStoreKind::Memcpy;
572 }
573 // This store can't be transformed into a memset/memcpy.
574 return LegalStoreKind::None;
575}
576
577void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
578 StoreRefsForMemset.clear();
579 StoreRefsForMemsetPattern.clear();
580 StoreRefsForMemcpy.clear();
581 for (Instruction &I : *BB) {
582 StoreInst *SI = dyn_cast<StoreInst>(&I);
583 if (!SI)
584 continue;
585
586 // Make sure this is a strided store with a constant stride.
587 switch (isLegalStore(SI)) {
588 case LegalStoreKind::None:
589 // Nothing to do
590 break;
591 case LegalStoreKind::Memset: {
592 // Find the base pointer.
593 Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
594 StoreRefsForMemset[Ptr].push_back(SI);
595 } break;
596 case LegalStoreKind::MemsetPattern: {
597 // Find the base pointer.
598 Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
599 StoreRefsForMemsetPattern[Ptr].push_back(SI);
600 } break;
601 case LegalStoreKind::Memcpy:
602 case LegalStoreKind::UnorderedAtomicMemcpy:
603 StoreRefsForMemcpy.push_back(SI);
604 break;
605 default:
606 assert(false && "unhandled return value")((false && "unhandled return value") ? static_cast<
void> (0) : __assert_fail ("false && \"unhandled return value\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 606, __PRETTY_FUNCTION__))
;
607 break;
608 }
609 }
610}
611
612/// runOnLoopBlock - Process the specified block, which lives in a counted loop
613/// with the specified backedge count. This block is known to be in the current
614/// loop and not in any subloops.
615bool LoopIdiomRecognize::runOnLoopBlock(
616 BasicBlock *BB, const SCEV *BECount,
617 SmallVectorImpl<BasicBlock *> &ExitBlocks) {
618 // We can only promote stores in this block if they are unconditionally
619 // executed in the loop. For a block to be unconditionally executed, it has
620 // to dominate all the exit blocks of the loop. Verify this now.
621 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
622 if (!DT->dominates(BB, ExitBlocks[i]))
623 return false;
624
625 bool MadeChange = false;
626 // Look for store instructions, which may be optimized to memset/memcpy.
627 collectStores(BB);
628
629 // Look for a single store or sets of stores with a common base, which can be
630 // optimized into a memset (memset_pattern). The latter most commonly happens
631 // with structs and handunrolled loops.
632 for (auto &SL : StoreRefsForMemset)
633 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
634
635 for (auto &SL : StoreRefsForMemsetPattern)
636 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
637
638 // Optimize the store into a memcpy, if it feeds an similarly strided load.
639 for (auto &SI : StoreRefsForMemcpy)
640 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
641
642 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
643 Instruction *Inst = &*I++;
644 // Look for memset instructions, which may be optimized to a larger memset.
645 if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
646 WeakTrackingVH InstPtr(&*I);
647 if (!processLoopMemSet(MSI, BECount))
648 continue;
649 MadeChange = true;
650
651 // If processing the memset invalidated our iterator, start over from the
652 // top of the block.
653 if (!InstPtr)
654 I = BB->begin();
655 continue;
656 }
657 }
658
659 return MadeChange;
660}
661
662/// See if this store(s) can be promoted to a memset.
663bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
664 const SCEV *BECount, ForMemset For) {
665 // Try to find consecutive stores that can be transformed into memsets.
666 SetVector<StoreInst *> Heads, Tails;
667 SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
668
669 // Do a quadratic search on all of the given stores and find
670 // all of the pairs of stores that follow each other.
671 SmallVector<unsigned, 16> IndexQueue;
672 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
673 assert(SL[i]->isSimple() && "Expected only non-volatile stores.")((SL[i]->isSimple() && "Expected only non-volatile stores."
) ? static_cast<void> (0) : __assert_fail ("SL[i]->isSimple() && \"Expected only non-volatile stores.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 673, __PRETTY_FUNCTION__))
;
674
675 Value *FirstStoredVal = SL[i]->getValueOperand();
676 Value *FirstStorePtr = SL[i]->getPointerOperand();
677 const SCEVAddRecExpr *FirstStoreEv =
678 cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
679 APInt FirstStride = getStoreStride(FirstStoreEv);
680 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
681
682 // See if we can optimize just this store in isolation.
683 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
684 Heads.insert(SL[i]);
685 continue;
686 }
687
688 Value *FirstSplatValue = nullptr;
689 Constant *FirstPatternValue = nullptr;
690
691 if (For == ForMemset::Yes)
692 FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
693 else
694 FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
695
696 assert((FirstSplatValue || FirstPatternValue) &&(((FirstSplatValue || FirstPatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(FirstSplatValue || FirstPatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 697, __PRETTY_FUNCTION__))
697 "Expected either splat value or pattern value.")(((FirstSplatValue || FirstPatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(FirstSplatValue || FirstPatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 697, __PRETTY_FUNCTION__))
;
698
699 IndexQueue.clear();
700 // If a store has multiple consecutive store candidates, search Stores
701 // array according to the sequence: from i+1 to e, then from i-1 to 0.
702 // This is because usually pairing with immediate succeeding or preceding
703 // candidate create the best chance to find memset opportunity.
704 unsigned j = 0;
705 for (j = i + 1; j < e; ++j)
706 IndexQueue.push_back(j);
707 for (j = i; j > 0; --j)
708 IndexQueue.push_back(j - 1);
709
710 for (auto &k : IndexQueue) {
711 assert(SL[k]->isSimple() && "Expected only non-volatile stores.")((SL[k]->isSimple() && "Expected only non-volatile stores."
) ? static_cast<void> (0) : __assert_fail ("SL[k]->isSimple() && \"Expected only non-volatile stores.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 711, __PRETTY_FUNCTION__))
;
712 Value *SecondStorePtr = SL[k]->getPointerOperand();
713 const SCEVAddRecExpr *SecondStoreEv =
714 cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
715 APInt SecondStride = getStoreStride(SecondStoreEv);
716
717 if (FirstStride != SecondStride)
718 continue;
719
720 Value *SecondStoredVal = SL[k]->getValueOperand();
721 Value *SecondSplatValue = nullptr;
722 Constant *SecondPatternValue = nullptr;
723
724 if (For == ForMemset::Yes)
725 SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
726 else
727 SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
728
729 assert((SecondSplatValue || SecondPatternValue) &&(((SecondSplatValue || SecondPatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(SecondSplatValue || SecondPatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 730, __PRETTY_FUNCTION__))
730 "Expected either splat value or pattern value.")(((SecondSplatValue || SecondPatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(SecondSplatValue || SecondPatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 730, __PRETTY_FUNCTION__))
;
731
732 if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
733 if (For == ForMemset::Yes) {
734 if (isa<UndefValue>(FirstSplatValue))
735 FirstSplatValue = SecondSplatValue;
736 if (FirstSplatValue != SecondSplatValue)
737 continue;
738 } else {
739 if (isa<UndefValue>(FirstPatternValue))
740 FirstPatternValue = SecondPatternValue;
741 if (FirstPatternValue != SecondPatternValue)
742 continue;
743 }
744 Tails.insert(SL[k]);
745 Heads.insert(SL[i]);
746 ConsecutiveChain[SL[i]] = SL[k];
747 break;
748 }
749 }
750 }
751
752 // We may run into multiple chains that merge into a single chain. We mark the
753 // stores that we transformed so that we don't visit the same store twice.
754 SmallPtrSet<Value *, 16> TransformedStores;
755 bool Changed = false;
756
757 // For stores that start but don't end a link in the chain:
758 for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
759 it != e; ++it) {
760 if (Tails.count(*it))
761 continue;
762
763 // We found a store instr that starts a chain. Now follow the chain and try
764 // to transform it.
765 SmallPtrSet<Instruction *, 8> AdjacentStores;
766 StoreInst *I = *it;
767
768 StoreInst *HeadStore = I;
769 unsigned StoreSize = 0;
770
771 // Collect the chain into a list.
772 while (Tails.count(I) || Heads.count(I)) {
773 if (TransformedStores.count(I))
774 break;
775 AdjacentStores.insert(I);
776
777 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
778 // Move to the next value in the chain.
779 I = ConsecutiveChain[I];
780 }
781
782 Value *StoredVal = HeadStore->getValueOperand();
783 Value *StorePtr = HeadStore->getPointerOperand();
784 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
785 APInt Stride = getStoreStride(StoreEv);
786
787 // Check to see if the stride matches the size of the stores. If so, then
788 // we know that every byte is touched in the loop.
789 if (StoreSize != Stride && StoreSize != -Stride)
790 continue;
791
792 bool NegStride = StoreSize == -Stride;
793
794 if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
795 StoredVal, HeadStore, AdjacentStores, StoreEv,
796 BECount, NegStride)) {
797 TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
798 Changed = true;
799 }
800 }
801
802 return Changed;
803}
804
805/// processLoopMemSet - See if this memset can be promoted to a large memset.
806bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
807 const SCEV *BECount) {
808 // We can only handle non-volatile memsets with a constant size.
809 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
810 return false;
811
812 // If we're not allowed to hack on memset, we fail.
813 if (!HasMemset)
814 return false;
815
816 Value *Pointer = MSI->getDest();
817
818 // See if the pointer expression is an AddRec like {base,+,1} on the current
819 // loop, which indicates a strided store. If we have something else, it's a
820 // random store we can't handle.
821 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
822 if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
823 return false;
824
825 // Reject memsets that are so large that they overflow an unsigned.
826 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
827 if ((SizeInBytes >> 32) != 0)
828 return false;
829
830 // Check to see if the stride matches the size of the memset. If so, then we
831 // know that every byte is touched in the loop.
832 const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
833 if (!ConstStride)
834 return false;
835
836 APInt Stride = ConstStride->getAPInt();
837 if (SizeInBytes != Stride && SizeInBytes != -Stride)
838 return false;
839
840 // Verify that the memset value is loop invariant. If not, we can't promote
841 // the memset.
842 Value *SplatValue = MSI->getValue();
843 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
844 return false;
845
846 SmallPtrSet<Instruction *, 1> MSIs;
847 MSIs.insert(MSI);
848 bool NegStride = SizeInBytes == -Stride;
849 return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
850 MSI->getDestAlignment(), SplatValue, MSI, MSIs,
851 Ev, BECount, NegStride, /*IsLoopMemset=*/true);
852}
853
854/// mayLoopAccessLocation - Return true if the specified loop might access the
855/// specified pointer location, which is a loop-strided access. The 'Access'
856/// argument specifies what the verboten forms of access are (read or write).
857static bool
858mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
859 const SCEV *BECount, unsigned StoreSize,
860 AliasAnalysis &AA,
861 SmallPtrSetImpl<Instruction *> &IgnoredStores) {
862 // Get the location that may be stored across the loop. Since the access is
863 // strided positively through memory, we say that the modified location starts
864 // at the pointer and has infinite size.
865 LocationSize AccessSize = LocationSize::unknown();
866
867 // If the loop iterates a fixed number of times, we can refine the access size
868 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
869 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
870 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
871 StoreSize);
872
873 // TODO: For this to be really effective, we have to dive into the pointer
874 // operand in the store. Store to &A[i] of 100 will always return may alias
875 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
876 // which will then no-alias a store to &A[100].
877 MemoryLocation StoreLoc(Ptr, AccessSize);
878
879 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
880 ++BI)
881 for (Instruction &I : **BI)
882 if (IgnoredStores.count(&I) == 0 &&
883 isModOrRefSet(
884 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
885 return true;
886
887 return false;
888}
889
890// If we have a negative stride, Start refers to the end of the memory location
891// we're trying to memset. Therefore, we need to recompute the base pointer,
892// which is just Start - BECount*Size.
893static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
894 Type *IntPtr, unsigned StoreSize,
895 ScalarEvolution *SE) {
896 const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
897 if (StoreSize != 1)
898 Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
899 SCEV::FlagNUW);
900 return SE->getMinusSCEV(Start, Index);
901}
902
903/// Compute the number of bytes as a SCEV from the backedge taken count.
904///
905/// This also maps the SCEV into the provided type and tries to handle the
906/// computation in a way that will fold cleanly.
907static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
908 unsigned StoreSize, Loop *CurLoop,
909 const DataLayout *DL, ScalarEvolution *SE) {
910 const SCEV *NumBytesS;
911 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
912 // pointer size if it isn't already.
913 //
914 // If we're going to need to zero extend the BE count, check if we can add
915 // one to it prior to zero extending without overflow. Provided this is safe,
916 // it allows better simplification of the +1.
917 if (DL->getTypeSizeInBits(BECount->getType()) <
918 DL->getTypeSizeInBits(IntPtr) &&
919 SE->isLoopEntryGuardedByCond(
920 CurLoop, ICmpInst::ICMP_NE, BECount,
921 SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
922 NumBytesS = SE->getZeroExtendExpr(
923 SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
924 IntPtr);
925 } else {
926 NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
927 SE->getOne(IntPtr), SCEV::FlagNUW);
928 }
929
930 // And scale it based on the store size.
931 if (StoreSize != 1) {
932 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
933 SCEV::FlagNUW);
934 }
935 return NumBytesS;
936}
937
938/// processLoopStridedStore - We see a strided store of some value. If we can
939/// transform this into a memset or memset_pattern in the loop preheader, do so.
940bool LoopIdiomRecognize::processLoopStridedStore(
941 Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
942 Value *StoredVal, Instruction *TheStore,
943 SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
944 const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
945 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
946 Constant *PatternValue = nullptr;
947
948 if (!SplatValue)
949 PatternValue = getMemSetPatternValue(StoredVal, DL);
950
951 assert((SplatValue || PatternValue) &&(((SplatValue || PatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(SplatValue || PatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 952, __PRETTY_FUNCTION__))
952 "Expected either splat value or pattern value.")(((SplatValue || PatternValue) && "Expected either splat value or pattern value."
) ? static_cast<void> (0) : __assert_fail ("(SplatValue || PatternValue) && \"Expected either splat value or pattern value.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 952, __PRETTY_FUNCTION__))
;
953
954 // The trip count of the loop and the base pointer of the addrec SCEV is
955 // guaranteed to be loop invariant, which means that it should dominate the
956 // header. This allows us to insert code for it in the preheader.
957 unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
958 BasicBlock *Preheader = CurLoop->getLoopPreheader();
959 IRBuilder<> Builder(Preheader->getTerminator());
960 SCEVExpander Expander(*SE, *DL, "loop-idiom");
961
962 Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
963 Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
964
965 const SCEV *Start = Ev->getStart();
966 // Handle negative strided loops.
967 if (NegStride)
968 Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
969
970 // TODO: ideally we should still be able to generate memset if SCEV expander
971 // is taught to generate the dependencies at the latest point.
972 if (!isSafeToExpand(Start, *SE))
973 return false;
974
975 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
976 // this into a memset in the loop preheader now if we want. However, this
977 // would be unsafe to do if there is anything else in the loop that may read
978 // or write to the aliased location. Check for any overlap by generating the
979 // base pointer and checking the region.
980 Value *BasePtr =
981 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
982 if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
983 StoreSize, *AA, Stores)) {
984 Expander.clear();
985 // If we generated new code for the base pointer, clean up.
986 RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
987 return false;
988 }
989
990 if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
991 return false;
992
993 // Okay, everything looks good, insert the memset.
994
995 const SCEV *NumBytesS =
996 getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
997
998 // TODO: ideally we should still be able to generate memset if SCEV expander
999 // is taught to generate the dependencies at the latest point.
1000 if (!isSafeToExpand(NumBytesS, *SE))
1001 return false;
1002
1003 Value *NumBytes =
1004 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
1005
1006 CallInst *NewCall;
1007 if (SplatValue) {
1008 NewCall =
1009 Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
1010 } else {
1011 // Everything is emitted in default address space
1012 Type *Int8PtrTy = DestInt8PtrTy;
1013
1014 Module *M = TheStore->getModule();
1015 StringRef FuncName = "memset_pattern16";
1016 FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
1017 Int8PtrTy, Int8PtrTy, IntPtr);
1018 inferLibFuncAttributes(M, FuncName, *TLI);
1019
1020 // Otherwise we should form a memset_pattern16. PatternValue is known to be
1021 // an constant array of 16-bytes. Plop the value into a mergable global.
1022 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
1023 GlobalValue::PrivateLinkage,
1024 PatternValue, ".memset_pattern");
1025 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1026 GV->setAlignment(16);
1027 Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
1028 NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1029 }
1030
1031 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memset: " <<
*NewCall << "\n" << " from store to: " <<
*Ev << " at: " << *TheStore << "\n"; } } while
(false)
1032 << " from store to: " << *Ev << " at: " << *TheStoredo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memset: " <<
*NewCall << "\n" << " from store to: " <<
*Ev << " at: " << *TheStore << "\n"; } } while
(false)
1033 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memset: " <<
*NewCall << "\n" << " from store to: " <<
*Ev << " at: " << *TheStore << "\n"; } } while
(false)
;
1034 NewCall->setDebugLoc(TheStore->getDebugLoc());
1035
1036 ORE.emit([&]() {
1037 return OptimizationRemark(DEBUG_TYPE"loop-idiom", "ProcessLoopStridedStore",
1038 NewCall->getDebugLoc(), Preheader)
1039 << "Transformed loop-strided store into a call to "
1040 << ore::NV("NewFunction", NewCall->getCalledFunction())
1041 << "() function";
1042 });
1043
1044 // Okay, the memset has been formed. Zap the original store and anything that
1045 // feeds into it.
1046 for (auto *I : Stores)
1047 deleteDeadInstruction(I);
1048 ++NumMemSet;
1049 return true;
1050}
1051
1052/// If the stored value is a strided load in the same loop with the same stride
1053/// this may be transformable into a memcpy. This kicks in for stuff like
1054/// for (i) A[i] = B[i];
1055bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1056 const SCEV *BECount) {
1057 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.")((SI->isUnordered() && "Expected only non-volatile non-ordered stores."
) ? static_cast<void> (0) : __assert_fail ("SI->isUnordered() && \"Expected only non-volatile non-ordered stores.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1057, __PRETTY_FUNCTION__))
;
1058
1059 Value *StorePtr = SI->getPointerOperand();
1060 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1061 APInt Stride = getStoreStride(StoreEv);
1062 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1063 bool NegStride = StoreSize == -Stride;
1064
1065 // The store must be feeding a non-volatile load.
1066 LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1067 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.")((LI->isUnordered() && "Expected only non-volatile non-ordered loads."
) ? static_cast<void> (0) : __assert_fail ("LI->isUnordered() && \"Expected only non-volatile non-ordered loads.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1067, __PRETTY_FUNCTION__))
;
1068
1069 // See if the pointer expression is an AddRec like {base,+,1} on the current
1070 // loop, which indicates a strided load. If we have something else, it's a
1071 // random load we can't handle.
1072 const SCEVAddRecExpr *LoadEv =
1073 cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1074
1075 // The trip count of the loop and the base pointer of the addrec SCEV is
1076 // guaranteed to be loop invariant, which means that it should dominate the
1077 // header. This allows us to insert code for it in the preheader.
1078 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1079 IRBuilder<> Builder(Preheader->getTerminator());
1080 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1081
1082 const SCEV *StrStart = StoreEv->getStart();
1083 unsigned StrAS = SI->getPointerAddressSpace();
1084 Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
1085
1086 // Handle negative strided loops.
1087 if (NegStride)
1088 StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
1089
1090 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1091 // this into a memcpy in the loop preheader now if we want. However, this
1092 // would be unsafe to do if there is anything else in the loop that may read
1093 // or write the memory region we're storing to. This includes the load that
1094 // feeds the stores. Check for an alias by generating the base address and
1095 // checking everything.
1096 Value *StoreBasePtr = Expander.expandCodeFor(
1097 StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1098
1099 SmallPtrSet<Instruction *, 1> Stores;
1100 Stores.insert(SI);
1101 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1102 StoreSize, *AA, Stores)) {
1103 Expander.clear();
1104 // If we generated new code for the base pointer, clean up.
1105 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1106 return false;
1107 }
1108
1109 const SCEV *LdStart = LoadEv->getStart();
1110 unsigned LdAS = LI->getPointerAddressSpace();
1111
1112 // Handle negative strided loops.
1113 if (NegStride)
1114 LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
1115
1116 // For a memcpy, we have to make sure that the input array is not being
1117 // mutated by the loop.
1118 Value *LoadBasePtr = Expander.expandCodeFor(
1119 LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1120
1121 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1122 StoreSize, *AA, Stores)) {
1123 Expander.clear();
1124 // If we generated new code for the base pointer, clean up.
1125 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
1126 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1127 return false;
1128 }
1129
1130 if (avoidLIRForMultiBlockLoop())
1131 return false;
1132
1133 // Okay, everything is safe, we can transform this!
1134
1135 const SCEV *NumBytesS =
1136 getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1137
1138 Value *NumBytes =
1139 Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1140
1141 CallInst *NewCall = nullptr;
1142 // Check whether to generate an unordered atomic memcpy:
1143 // If the load or store are atomic, then they must necessarily be unordered
1144 // by previous checks.
1145 if (!SI->isAtomic() && !LI->isAtomic())
1146 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1147 LoadBasePtr, LI->getAlignment(), NumBytes);
1148 else {
1149 // We cannot allow unaligned ops for unordered load/store, so reject
1150 // anything where the alignment isn't at least the element size.
1151 unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1152 if (Align < StoreSize)
1153 return false;
1154
1155 // If the element.atomic memcpy is not lowered into explicit
1156 // loads/stores later, then it will be lowered into an element-size
1157 // specific lib call. If the lib call doesn't exist for our store size, then
1158 // we shouldn't generate the memcpy.
1159 if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1160 return false;
1161
1162 // Create the call.
1163 // Note that unordered atomic loads/stores are *required* by the spec to
1164 // have an alignment but non-atomic loads/stores may not.
1165 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1166 StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
1167 NumBytes, StoreSize);
1168 }
1169 NewCall->setDebugLoc(SI->getDebugLoc());
1170
1171 LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memcpy: " <<
*NewCall << "\n" << " from load ptr=" <<
*LoadEv << " at: " << *LI << "\n" <<
" from store ptr=" << *StoreEv << " at: " <<
*SI << "\n"; } } while (false)
1172 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memcpy: " <<
*NewCall << "\n" << " from load ptr=" <<
*LoadEv << " at: " << *LI << "\n" <<
" from store ptr=" << *StoreEv << " at: " <<
*SI << "\n"; } } while (false)
1173 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memcpy: " <<
*NewCall << "\n" << " from load ptr=" <<
*LoadEv << " at: " << *LI << "\n" <<
" from store ptr=" << *StoreEv << " at: " <<
*SI << "\n"; } } while (false)
1174 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " Formed memcpy: " <<
*NewCall << "\n" << " from load ptr=" <<
*LoadEv << " at: " << *LI << "\n" <<
" from store ptr=" << *StoreEv << " at: " <<
*SI << "\n"; } } while (false)
;
1175
1176 ORE.emit([&]() {
1177 return OptimizationRemark(DEBUG_TYPE"loop-idiom", "ProcessLoopStoreOfLoopLoad",
1178 NewCall->getDebugLoc(), Preheader)
1179 << "Formed a call to "
1180 << ore::NV("NewFunction", NewCall->getCalledFunction())
1181 << "() function";
1182 });
1183
1184 // Okay, the memcpy has been formed. Zap the original store and anything that
1185 // feeds into it.
1186 deleteDeadInstruction(SI);
1187 ++NumMemCpy;
1188 return true;
1189}
1190
1191// When compiling for codesize we avoid idiom recognition for a multi-block loop
1192// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1193//
1194bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1195 bool IsLoopMemset) {
1196 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1197 if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1198 LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " " << CurLoop->getHeader
()->getParent()->getName() << " : LIR " << (
IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n"
; } } while (false)
1199 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " " << CurLoop->getHeader
()->getParent()->getName() << " : LIR " << (
IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n"
; } } while (false)
1200 << " avoided: multi-block top-level loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << " " << CurLoop->getHeader
()->getParent()->getName() << " : LIR " << (
IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n"
; } } while (false)
;
1201 return true;
1202 }
1203 }
1204
1205 return false;
1206}
1207
1208bool LoopIdiomRecognize::runOnNoncountableLoop() {
1209 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Noncountable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
1210 << CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Noncountable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
1211 << "] Noncountable Loop %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Noncountable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
1212 << CurLoop->getHeader()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName
() << "] Noncountable Loop %" << CurLoop->getHeader
()->getName() << "\n"; } } while (false)
;
1213
1214 return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS();
1215}
1216
1217/// Check if the given conditional branch is based on the comparison between
1218/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1219/// true), the control yields to the loop entry. If the branch matches the
1220/// behavior, the variable involved in the comparison is returned. This function
1221/// will be called to see if the precondition and postcondition of the loop are
1222/// in desirable form.
1223static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1224 bool JmpOnZero = false) {
1225 if (!BI || !BI->isConditional())
1226 return nullptr;
1227
1228 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1229 if (!Cond)
1230 return nullptr;
1231
1232 ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1233 if (!CmpZero || !CmpZero->isZero())
1234 return nullptr;
1235
1236 BasicBlock *TrueSucc = BI->getSuccessor(0);
1237 BasicBlock *FalseSucc = BI->getSuccessor(1);
1238 if (JmpOnZero)
1239 std::swap(TrueSucc, FalseSucc);
1240
1241 ICmpInst::Predicate Pred = Cond->getPredicate();
1242 if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1243 (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1244 return Cond->getOperand(0);
1245
1246 return nullptr;
1247}
1248
1249// Check if the recurrence variable `VarX` is in the right form to create
1250// the idiom. Returns the value coerced to a PHINode if so.
1251static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
1252 BasicBlock *LoopEntry) {
1253 auto *PhiX = dyn_cast<PHINode>(VarX);
1254 if (PhiX && PhiX->getParent() == LoopEntry &&
1255 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1256 return PhiX;
1257 return nullptr;
1258}
1259
1260/// Return true iff the idiom is detected in the loop.
1261///
1262/// Additionally:
1263/// 1) \p CntInst is set to the instruction counting the population bit.
1264/// 2) \p CntPhi is set to the corresponding phi node.
1265/// 3) \p Var is set to the value whose population bits are being counted.
1266///
1267/// The core idiom we are trying to detect is:
1268/// \code
1269/// if (x0 != 0)
1270/// goto loop-exit // the precondition of the loop
1271/// cnt0 = init-val;
1272/// do {
1273/// x1 = phi (x0, x2);
1274/// cnt1 = phi(cnt0, cnt2);
1275///
1276/// cnt2 = cnt1 + 1;
1277/// ...
1278/// x2 = x1 & (x1 - 1);
1279/// ...
1280/// } while(x != 0);
1281///
1282/// loop-exit:
1283/// \endcode
1284static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1285 Instruction *&CntInst, PHINode *&CntPhi,
1286 Value *&Var) {
1287 // step 1: Check to see if the look-back branch match this pattern:
1288 // "if (a!=0) goto loop-entry".
1289 BasicBlock *LoopEntry;
1290 Instruction *DefX2, *CountInst;
1291 Value *VarX1, *VarX0;
1292 PHINode *PhiX, *CountPhi;
1293
1294 DefX2 = CountInst = nullptr;
1295 VarX1 = VarX0 = nullptr;
1296 PhiX = CountPhi = nullptr;
1297 LoopEntry = *(CurLoop->block_begin());
1298
1299 // step 1: Check if the loop-back branch is in desirable form.
1300 {
1301 if (Value *T = matchCondition(
1302 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1303 DefX2 = dyn_cast<Instruction>(T);
1304 else
1305 return false;
1306 }
1307
1308 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1309 {
1310 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1311 return false;
1312
1313 BinaryOperator *SubOneOp;
1314
1315 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1316 VarX1 = DefX2->getOperand(1);
1317 else {
1318 VarX1 = DefX2->getOperand(0);
1319 SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1320 }
1321 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1322 return false;
1323
1324 ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1325 if (!Dec ||
1326 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1327 (SubOneOp->getOpcode() == Instruction::Add &&
1328 Dec->isMinusOne()))) {
1329 return false;
1330 }
1331 }
1332
1333 // step 3: Check the recurrence of variable X
1334 PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1335 if (!PhiX)
1336 return false;
1337
1338 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1339 {
1340 CountInst = nullptr;
1341 for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1342 IterE = LoopEntry->end();
1343 Iter != IterE; Iter++) {
1344 Instruction *Inst = &*Iter;
1345 if (Inst->getOpcode() != Instruction::Add)
1346 continue;
1347
1348 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1349 if (!Inc || !Inc->isOne())
1350 continue;
1351
1352 PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1353 if (!Phi)
1354 continue;
1355
1356 // Check if the result of the instruction is live of the loop.
1357 bool LiveOutLoop = false;
1358 for (User *U : Inst->users()) {
1359 if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1360 LiveOutLoop = true;
1361 break;
1362 }
1363 }
1364
1365 if (LiveOutLoop) {
1366 CountInst = Inst;
1367 CountPhi = Phi;
1368 break;
1369 }
1370 }
1371
1372 if (!CountInst)
1373 return false;
1374 }
1375
1376 // step 5: check if the precondition is in this form:
1377 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1378 {
1379 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1380 Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1381 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1382 return false;
1383
1384 CntInst = CountInst;
1385 CntPhi = CountPhi;
1386 Var = T;
1387 }
1388
1389 return true;
1390}
1391
1392/// Return true if the idiom is detected in the loop.
1393///
1394/// Additionally:
1395/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1396/// or nullptr if there is no such.
1397/// 2) \p CntPhi is set to the corresponding phi node
1398/// or nullptr if there is no such.
1399/// 3) \p Var is set to the value whose CTLZ could be used.
1400/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1401///
1402/// The core idiom we are trying to detect is:
1403/// \code
1404/// if (x0 == 0)
1405/// goto loop-exit // the precondition of the loop
1406/// cnt0 = init-val;
1407/// do {
1408/// x = phi (x0, x.next); //PhiX
1409/// cnt = phi(cnt0, cnt.next);
1410///
1411/// cnt.next = cnt + 1;
1412/// ...
1413/// x.next = x >> 1; // DefX
1414/// ...
1415/// } while(x.next != 0);
1416///
1417/// loop-exit:
1418/// \endcode
1419static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1420 Intrinsic::ID &IntrinID, Value *&InitX,
1421 Instruction *&CntInst, PHINode *&CntPhi,
1422 Instruction *&DefX) {
1423 BasicBlock *LoopEntry;
1424 Value *VarX = nullptr;
1425
1426 DefX = nullptr;
1427 CntInst = nullptr;
1428 CntPhi = nullptr;
1429 LoopEntry = *(CurLoop->block_begin());
1430
1431 // step 1: Check if the loop-back branch is in desirable form.
1432 if (Value *T = matchCondition(
1433 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1434 DefX = dyn_cast<Instruction>(T);
1435 else
1436 return false;
1437
1438 // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1439 if (!DefX || !DefX->isShift())
1440 return false;
1441 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1442 Intrinsic::ctlz;
1443 ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1444 if (!Shft || !Shft->isOne())
1445 return false;
1446 VarX = DefX->getOperand(0);
1447
1448 // step 3: Check the recurrence of variable X
1449 PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1450 if (!PhiX)
1451 return false;
1452
1453 InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1454
1455 // Make sure the initial value can't be negative otherwise the ashr in the
1456 // loop might never reach zero which would make the loop infinite.
1457 if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1458 return false;
1459
1460 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1461 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1462 // then all uses of "cnt.next" could be optimized to the trip count
1463 // plus "cnt0". Currently it is not optimized.
1464 // This step could be used to detect POPCNT instruction:
1465 // cnt.next = cnt + (x.next & 1)
1466 for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1467 IterE = LoopEntry->end();
1468 Iter != IterE; Iter++) {
1469 Instruction *Inst = &*Iter;
1470 if (Inst->getOpcode() != Instruction::Add)
1471 continue;
1472
1473 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1474 if (!Inc || !Inc->isOne())
1475 continue;
1476
1477 PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1478 if (!Phi)
1479 continue;
1480
1481 CntInst = Inst;
1482 CntPhi = Phi;
1483 break;
1484 }
1485 if (!CntInst)
1486 return false;
1487
1488 return true;
1489}
1490
1491/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1492/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1493/// trip count returns true; otherwise, returns false.
1494bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1495 // Give up if the loop has multiple blocks or multiple backedges.
1496 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1497 return false;
1498
1499 Intrinsic::ID IntrinID;
1500 Value *InitX;
1501 Instruction *DefX = nullptr;
1502 PHINode *CntPhi = nullptr;
1503 Instruction *CntInst = nullptr;
1504 // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1505 // this is always 6.
1506 size_t IdiomCanonicalSize = 6;
1507
1508 if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1509 CntInst, CntPhi, DefX))
1510 return false;
1511
1512 bool IsCntPhiUsedOutsideLoop = false;
1513 for (User *U : CntPhi->users())
1514 if (!CurLoop->contains(cast<Instruction>(U))) {
1515 IsCntPhiUsedOutsideLoop = true;
1516 break;
1517 }
1518 bool IsCntInstUsedOutsideLoop = false;
1519 for (User *U : CntInst->users())
1520 if (!CurLoop->contains(cast<Instruction>(U))) {
1521 IsCntInstUsedOutsideLoop = true;
1522 break;
1523 }
1524 // If both CntInst and CntPhi are used outside the loop the profitability
1525 // is questionable.
1526 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1527 return false;
1528
1529 // For some CPUs result of CTLZ(X) intrinsic is undefined
1530 // when X is 0. If we can not guarantee X != 0, we need to check this
1531 // when expand.
1532 bool ZeroCheck = false;
1533 // It is safe to assume Preheader exist as it was checked in
1534 // parent function RunOnLoop.
1535 BasicBlock *PH = CurLoop->getLoopPreheader();
1536
1537 // If we are using the count instruction outside the loop, make sure we
1538 // have a zero check as a precondition. Without the check the loop would run
1539 // one iteration for before any check of the input value. This means 0 and 1
1540 // would have identical behavior in the original loop and thus
1541 if (!IsCntPhiUsedOutsideLoop) {
1542 auto *PreCondBB = PH->getSinglePredecessor();
1543 if (!PreCondBB)
1544 return false;
1545 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1546 if (!PreCondBI)
1547 return false;
1548 if (matchCondition(PreCondBI, PH) != InitX)
1549 return false;
1550 ZeroCheck = true;
1551 }
1552
1553 // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1554 // profitable if we delete the loop.
1555
1556 // the loop has only 6 instructions:
1557 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1558 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1559 // %shr = ashr %n.addr.0, 1
1560 // %tobool = icmp eq %shr, 0
1561 // %inc = add nsw %i.0, 1
1562 // br i1 %tobool
1563
1564 const Value *Args[] =
1565 {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1566 : ConstantInt::getFalse(InitX->getContext())};
1567
1568 // @llvm.dbg doesn't count as they have no semantic effect.
1569 auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1570 uint32_t HeaderSize =
1571 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1572
1573 if (HeaderSize != IdiomCanonicalSize &&
1574 TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
1575 TargetTransformInfo::TCC_Basic)
1576 return false;
1577
1578 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1579 DefX->getDebugLoc(), ZeroCheck,
1580 IsCntPhiUsedOutsideLoop);
1581 return true;
1582}
1583
1584/// Recognizes a population count idiom in a non-countable loop.
1585///
1586/// If detected, transforms the relevant code to issue the popcount intrinsic
1587/// function call, and returns true; otherwise, returns false.
1588bool LoopIdiomRecognize::recognizePopcount() {
1589 if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
1590 return false;
1591
1592 // Counting population are usually conducted by few arithmetic instructions.
1593 // Such instructions can be easily "absorbed" by vacant slots in a
1594 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1595 // in a compact loop.
1596
1597 // Give up if the loop has multiple blocks or multiple backedges.
1598 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1599 return false;
1600
1601 BasicBlock *LoopBody = *(CurLoop->block_begin());
1602 if (LoopBody->size() >= 20) {
1603 // The loop is too big, bail out.
1604 return false;
1605 }
1606
1607 // It should have a preheader containing nothing but an unconditional branch.
1608 BasicBlock *PH = CurLoop->getLoopPreheader();
1609 if (!PH || &PH->front() != PH->getTerminator())
1610 return false;
1611 auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1612 if (!EntryBI || EntryBI->isConditional())
1613 return false;
1614
1615 // It should have a precondition block where the generated popcount intrinsic
1616 // function can be inserted.
1617 auto *PreCondBB = PH->getSinglePredecessor();
1618 if (!PreCondBB)
1619 return false;
1620 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1621 if (!PreCondBI || PreCondBI->isUnconditional())
1622 return false;
1623
1624 Instruction *CntInst;
1625 PHINode *CntPhi;
1626 Value *Val;
1627 if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1628 return false;
1629
1630 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1631 return true;
1632}
1633
1634static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
1635 const DebugLoc &DL) {
1636 Value *Ops[] = {Val};
1637 Type *Tys[] = {Val->getType()};
1638
1639 Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1640 Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1641 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1642 CI->setDebugLoc(DL);
1643
1644 return CI;
1645}
1646
1647static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
1648 const DebugLoc &DL, bool ZeroCheck,
1649 Intrinsic::ID IID) {
1650 Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1651 Type *Tys[] = {Val->getType()};
1652
1653 Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1654 Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1655 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1656 CI->setDebugLoc(DL);
1657
1658 return CI;
1659}
1660
1661/// Transform the following loop (Using CTLZ, CTTZ is similar):
1662/// loop:
1663/// CntPhi = PHI [Cnt0, CntInst]
1664/// PhiX = PHI [InitX, DefX]
1665/// CntInst = CntPhi + 1
1666/// DefX = PhiX >> 1
1667/// LOOP_BODY
1668/// Br: loop if (DefX != 0)
1669/// Use(CntPhi) or Use(CntInst)
1670///
1671/// Into:
1672/// If CntPhi used outside the loop:
1673/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1674/// Count = CountPrev + 1
1675/// else
1676/// Count = BitWidth(InitX) - CTLZ(InitX)
1677/// loop:
1678/// CntPhi = PHI [Cnt0, CntInst]
1679/// PhiX = PHI [InitX, DefX]
1680/// PhiCount = PHI [Count, Dec]
1681/// CntInst = CntPhi + 1
1682/// DefX = PhiX >> 1
1683/// Dec = PhiCount - 1
1684/// LOOP_BODY
1685/// Br: loop if (Dec != 0)
1686/// Use(CountPrev + Cnt0) // Use(CntPhi)
1687/// or
1688/// Use(Count + Cnt0) // Use(CntInst)
1689///
1690/// If LOOP_BODY is empty the loop will be deleted.
1691/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1692void LoopIdiomRecognize::transformLoopToCountable(
1693 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1694 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1695 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1696 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1697
1698 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1699 IRBuilder<> Builder(PreheaderBr);
1700 Builder.SetCurrentDebugLocation(DL);
1701 Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
1702
1703 // Count = BitWidth - CTLZ(InitX);
1704 // If there are uses of CntPhi create:
1705 // CountPrev = BitWidth - CTLZ(InitX >> 1);
1706 if (IsCntPhiUsedOutsideLoop) {
1707 if (DefX->getOpcode() == Instruction::AShr)
1708 InitXNext =
1709 Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1710 else if (DefX->getOpcode() == Instruction::LShr)
1711 InitXNext =
1712 Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1713 else if (DefX->getOpcode() == Instruction::Shl) // cttz
1714 InitXNext =
1715 Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1716 else
1717 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1717)
;
1718 } else
1719 InitXNext = InitX;
1720 FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1721 Count = Builder.CreateSub(
1722 ConstantInt::get(FFS->getType(),
1723 FFS->getType()->getIntegerBitWidth()),
1724 FFS);
1725 if (IsCntPhiUsedOutsideLoop) {
1726 CountPrev = Count;
1727 Count = Builder.CreateAdd(
1728 CountPrev,
1729 ConstantInt::get(CountPrev->getType(), 1));
1730 }
1731
1732 NewCount = Builder.CreateZExtOrTrunc(
1733 IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1734 cast<IntegerType>(CntInst->getType()));
1735
1736 // If the counter's initial value is not zero, insert Add Inst.
1737 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1738 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1739 if (!InitConst || !InitConst->isZero())
1740 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1741
1742 // Step 2: Insert new IV and loop condition:
1743 // loop:
1744 // ...
1745 // PhiCount = PHI [Count, Dec]
1746 // ...
1747 // Dec = PhiCount - 1
1748 // ...
1749 // Br: loop if (Dec != 0)
1750 BasicBlock *Body = *(CurLoop->block_begin());
1751 auto *LbBr = cast<BranchInst>(Body->getTerminator());
1752 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1753 Type *Ty = Count->getType();
1754
1755 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1756
1757 Builder.SetInsertPoint(LbCond);
1758 Instruction *TcDec = cast<Instruction>(
1759 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1760 "tcdec", false, true));
1761
1762 TcPhi->addIncoming(Count, Preheader);
1763 TcPhi->addIncoming(TcDec, Body);
1764
1765 CmpInst::Predicate Pred =
1766 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1767 LbCond->setPredicate(Pred);
1768 LbCond->setOperand(0, TcDec);
1769 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1770
1771 // Step 3: All the references to the original counter outside
1772 // the loop are replaced with the NewCount
1773 if (IsCntPhiUsedOutsideLoop)
1774 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1775 else
1776 CntInst->replaceUsesOutsideBlock(NewCount, Body);
1777
1778 // step 4: Forget the "non-computable" trip-count SCEV associated with the
1779 // loop. The loop would otherwise not be deleted even if it becomes empty.
1780 SE->forgetLoop(CurLoop);
1781}
1782
1783void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1784 Instruction *CntInst,
1785 PHINode *CntPhi, Value *Var) {
1786 BasicBlock *PreHead = CurLoop->getLoopPreheader();
1787 auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1788 const DebugLoc &DL = CntInst->getDebugLoc();
1789
1790 // Assuming before transformation, the loop is following:
1791 // if (x) // the precondition
1792 // do { cnt++; x &= x - 1; } while(x);
1793
1794 // Step 1: Insert the ctpop instruction at the end of the precondition block
1795 IRBuilder<> Builder(PreCondBr);
1796 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1797 {
1798 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1799 NewCount = PopCntZext =
1800 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1801
1802 if (NewCount != PopCnt)
1803 (cast<Instruction>(NewCount))->setDebugLoc(DL);
1804
1805 // TripCnt is exactly the number of iterations the loop has
1806 TripCnt = NewCount;
1807
1808 // If the population counter's initial value is not zero, insert Add Inst.
1809 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1810 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1811 if (!InitConst || !InitConst->isZero()) {
1812 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1813 (cast<Instruction>(NewCount))->setDebugLoc(DL);
1814 }
1815 }
1816
1817 // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1818 // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1819 // function would be partial dead code, and downstream passes will drag
1820 // it back from the precondition block to the preheader.
1821 {
1822 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1823
1824 Value *Opnd0 = PopCntZext;
1825 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1826 if (PreCond->getOperand(0) != Var)
1827 std::swap(Opnd0, Opnd1);
1828
1829 ICmpInst *NewPreCond = cast<ICmpInst>(
1830 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1831 PreCondBr->setCondition(NewPreCond);
1832
1833 RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
1834 }
1835
1836 // Step 3: Note that the population count is exactly the trip count of the
1837 // loop in question, which enable us to convert the loop from noncountable
1838 // loop into a countable one. The benefit is twofold:
1839 //
1840 // - If the loop only counts population, the entire loop becomes dead after
1841 // the transformation. It is a lot easier to prove a countable loop dead
1842 // than to prove a noncountable one. (In some C dialects, an infinite loop
1843 // isn't dead even if it computes nothing useful. In general, DCE needs
1844 // to prove a noncountable loop finite before safely delete it.)
1845 //
1846 // - If the loop also performs something else, it remains alive.
1847 // Since it is transformed to countable form, it can be aggressively
1848 // optimized by some optimizations which are in general not applicable
1849 // to a noncountable loop.
1850 //
1851 // After this step, this loop (conceptually) would look like following:
1852 // newcnt = __builtin_ctpop(x);
1853 // t = newcnt;
1854 // if (x)
1855 // do { cnt++; x &= x-1; t--) } while (t > 0);
1856 BasicBlock *Body = *(CurLoop->block_begin());
1857 {
1858 auto *LbBr = cast<BranchInst>(Body->getTerminator());
1859 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1860 Type *Ty = TripCnt->getType();
1861
1862 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1863
1864 Builder.SetInsertPoint(LbCond);
1865 Instruction *TcDec = cast<Instruction>(
1866 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1867 "tcdec", false, true));
1868
1869 TcPhi->addIncoming(TripCnt, PreHead);
1870 TcPhi->addIncoming(TcDec, Body);
1871
1872 CmpInst::Predicate Pred =
1873 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1874 LbCond->setPredicate(Pred);
1875 LbCond->setOperand(0, TcDec);
1876 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1877 }
1878
1879 // Step 4: All the references to the original population counter outside
1880 // the loop are replaced with the NewCount -- the value returned from
1881 // __builtin_ctpop().
1882 CntInst->replaceUsesOutsideBlock(NewCount, Body);
1883
1884 // step 5: Forget the "non-computable" trip-count SCEV associated with the
1885 // loop. The loop would otherwise not be deleted even if it becomes empty.
1886 SE->forgetLoop(CurLoop);
1887}
1888
1889bool LoopIdiomRecognize::matchBCmpLoopStructure(
1890 CmpLoopStructure &CmpLoop) const {
1891 ICmpInst::Predicate BCmpPred;
1892
1893 // We are looking for the following basic layout:
1894 // PreheaderBB: <preheader> ; preds = ???
1895 // <...>
1896 // br label %LoopHeaderBB
1897 // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
1898 // <...>
1899 // %BCmpValue = icmp <...>
1900 // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0
1901 // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
1902 // <...>
1903 // %LatchCmpValue = <are we done, or do next iteration?>
1904 // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB
1905 // Successor0: <exit> ; preds = %LoopHeaderBB
1906 // <...>
1907 // Successor1: <exit> ; preds = %LoopLatchBB
1908 // <...>
1909 //
1910 // Successor0 and Successor1 may or may not be the same basic block.
1911
1912 // Match basic frame-work of this supposedly-comparison loop.
1913 using namespace PatternMatch;
1914 if (!match(CurLoop->getHeader()->getTerminator(),
1915 m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()),
1916 m_Value(CmpLoop.BCmpValue)),
1917 CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) ||
1918 !match(CurLoop->getLoopLatch()->getTerminator(),
1919 m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)),
1920 CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) {
1921 LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Basic control-flow layout unrecognized.\n"
; } } while (false)
;
1922 return false;
1923 }
1924 LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized basic control-flow layout.\n"
; } } while (false)
;
1925 return true;
1926}
1927
1928bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue,
1929 CmpOfLoads &CmpOfLoads) const {
1930 using namespace PatternMatch;
1931 LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValuedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Analyzing header icmp " <<
*BCmpValue << " as bcmp pattern.\n"; } } while (false
)
1932 << " as bcmp pattern.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Analyzing header icmp " <<
*BCmpValue << " as bcmp pattern.\n"; } } while (false
)
;
1933
1934 // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example:
1935 // %v0 = load <...>, <...>* %LoadSrcA
1936 // %v1 = load <...>, <...>* %LoadSrcB
1937 // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1
1938 // There won't be any no-op bitcasts between load and icmp,
1939 // they would have been transformed into a load of bitcast.
1940 // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too.
1941 if (!match(BCmpValue,
1942 m_ICmp(CmpOfLoads.BCmpPred,
1943 m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)),
1944 m_Value(CmpOfLoads.LoadA)),
1945 m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)),
1946 m_Value(CmpOfLoads.LoadB)))) ||
1947 !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) {
1948 LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop header icmp did not match bcmp pattern.\n"
; } } while (false)
;
1949 return false;
1950 }
1951 LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
<< *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads
.LoadB << "\n"; } } while (false)
1952 << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
<< *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads
.LoadB << "\n"; } } while (false)
1953 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
<< *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads
.LoadB << "\n"; } } while (false)
;
1954 // FIXME: handle memcmp pattern?
1955 return true;
1956}
1957
1958bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow(
1959 const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const {
1960 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
1961 BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
1962
1963 // Be wary, comparisons can be inverted, canonicalize order.
1964 // If this 'element' comparison passed, we expect to proceed to the next elt.
1965 if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ)
1966 std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB);
1967 // The predicate on loop latch does not matter, just canonicalize some order.
1968 if (CmpLoop.LatchBrContinueBB != LoopHeaderBB)
1969 std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB);
1970
1971 // Check that control-flow between blocks is as expected.
1972 if (CmpLoop.HeaderBrEqualBB != LoopLatchBB ||
1973 CmpLoop.LatchBrContinueBB != LoopHeaderBB) {
1974 LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop control-flow not recognized.\n"
; } } while (false)
;
1975 return false;
1976 }
1977
1978 SmallVector<BasicBlock *, 2> ExitBlocks;
1979 CurLoop->getUniqueExitBlocks(ExitBlocks);
1980 assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks.")((ExitBlocks.size() <= 2U && "Can't have more than two exit blocks."
) ? static_cast<void> (0) : __assert_fail ("ExitBlocks.size() <= 2U && \"Can't have more than two exit blocks.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1980, __PRETTY_FUNCTION__))
;
1981
1982 assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&((!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
"Unexpected exit edges.") ? static_cast<void> (0) : __assert_fail
("!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) && !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) && \"Unexpected exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1986, __PRETTY_FUNCTION__))
1983 is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&((!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
"Unexpected exit edges.") ? static_cast<void> (0) : __assert_fail
("!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) && !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) && \"Unexpected exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1986, __PRETTY_FUNCTION__))
1984 !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&((!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
"Unexpected exit edges.") ? static_cast<void> (0) : __assert_fail
("!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) && !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) && \"Unexpected exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1986, __PRETTY_FUNCTION__))
1985 is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&((!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
"Unexpected exit edges.") ? static_cast<void> (0) : __assert_fail
("!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) && !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) && \"Unexpected exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1986, __PRETTY_FUNCTION__))
1986 "Unexpected exit edges.")((!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
"Unexpected exit edges.") ? static_cast<void> (0) : __assert_fail
("!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) && !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) && \"Unexpected exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1986, __PRETTY_FUNCTION__))
;
1987
1988 LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized loop control-flow.\n"
; } } while (false)
;
1989
1990 LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Performing side-effect analysis on the loop.\n"
; } } while (false)
;
1991 assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here.")((CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here."
) ? static_cast<void> (0) : __assert_fail ("CurLoop->isLCSSAForm(*DT) && \"Should only get LCSSA-form loops here.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 1991, __PRETTY_FUNCTION__))
;
1992 // No loop instructions must be used outside of the loop. Since we are in
1993 // LCSSA form, we only need to check successor block's PHI nodes's incoming
1994 // values for incoming blocks that are the loop basic blocks.
1995 for (const BasicBlock *ExitBB : ExitBlocks) {
1996 for (const PHINode &PHI : ExitBB->phis()) {
1997 for (const BasicBlock *LoopBB :
1998 make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) {
1999 return CurLoop->contains(PredecessorBB);
2000 })) {
2001 const auto *I =
2002 dyn_cast<Instruction>(PHI.getIncomingValueForBlock(LoopBB));
2003 if (I && CurLoop->contains(I)) {
2004 LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction "
<< *I << " which is used outside of the loop in basic block "
<< ExitBB->getName() << " in phi node " <<
PHI << "\n"; } } while (false)
2005 << "Loop contains instruction " << *Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction "
<< *I << " which is used outside of the loop in basic block "
<< ExitBB->getName() << " in phi node " <<
PHI << "\n"; } } while (false)
2006 << " which is used outside of the loop in basic block "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction "
<< *I << " which is used outside of the loop in basic block "
<< ExitBB->getName() << " in phi node " <<
PHI << "\n"; } } while (false)
2007 << ExitBB->getName() << " in phi node " << PHI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction "
<< *I << " which is used outside of the loop in basic block "
<< ExitBB->getName() << " in phi node " <<
PHI << "\n"; } } while (false)
;
2008 return false;
2009 }
2010 }
2011 }
2012 }
2013 // Similarly, the loop should not have any other observable side-effects
2014 // other than the final comparison result.
2015 for (BasicBlock *LoopBB : CurLoop->blocks()) {
2016 for (Instruction &I : *LoopBB) {
2017 if (isa<DbgInfoIntrinsic>(I)) // Ignore dbginfo.
2018 continue; // FIXME: anything else? lifetime info?
2019 if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) &&
2020 &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) {
2021 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction with potential side-effects: "
<< I << "\n"; } } while (false)
2022 dbgs() << "Loop contains instruction with potential side-effects: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction with potential side-effects: "
<< I << "\n"; } } while (false)
2023 << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Loop contains instruction with potential side-effects: "
<< I << "\n"; } } while (false)
;
2024 return false;
2025 }
2026 }
2027 }
2028 LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "No loop instructions deemed to have side-effects.\n"
; } } while (false)
;
2029 return true;
2030}
2031
2032bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
2033 CmpOfLoads &CmpOfLoads,
2034 const SCEV *&SrcA,
2035 const SCEV *&SrcB,
2036 const SCEV *&Iterations) const {
2037 // Try to compute SCEV of the loads, for this loop's scope.
2038 const auto *ScevForSrcA = dyn_cast<SCEVAddRecExpr>(
2039 SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop));
2040 const auto *ScevForSrcB = dyn_cast<SCEVAddRecExpr>(
2041 SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop));
2042 if (!ScevForSrcA || !ScevForSrcB) {
2043 LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Failed to get SCEV expressions for load sources.\n"
; } } while (false)
;
2044 return false;
2045 }
2046
2047 LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
<< *ScevForSrcA << "\n\t" << *ScevForSrcB <<
"\n"; } } while (false)
2048 << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
<< *ScevForSrcA << "\n\t" << *ScevForSrcB <<
"\n"; } } while (false)
;
2049
2050 // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB>
2051 const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE);
2052 const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE);
2053 if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() ||
2054 ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop ||
2055 RecStepForA != RecStepForB || !isa<SCEVConstant>(RecStepForA) ||
2056 cast<SCEVConstant>(RecStepForA)->getAPInt() != BCmpTyBytes) {
2057 LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads. Only support "
"affine SCEV expressions originating in the loop we " "are analysing with identical constant positive step, "
"equal to the count of bytes compared. Got:\n\t" << *RecStepForA
<< "\n\t" << *RecStepForB << "\n"; } } while
(false)
2058 "affine SCEV expressions originating in the loop we "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads. Only support "
"affine SCEV expressions originating in the loop we " "are analysing with identical constant positive step, "
"equal to the count of bytes compared. Got:\n\t" << *RecStepForA
<< "\n\t" << *RecStepForB << "\n"; } } while
(false)
2059 "are analysing with identical constant positive step, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads. Only support "
"affine SCEV expressions originating in the loop we " "are analysing with identical constant positive step, "
"equal to the count of bytes compared. Got:\n\t" << *RecStepForA
<< "\n\t" << *RecStepForB << "\n"; } } while
(false)
2060 "equal to the count of bytes compared. Got:\n\t"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads. Only support "
"affine SCEV expressions originating in the loop we " "are analysing with identical constant positive step, "
"equal to the count of bytes compared. Got:\n\t" << *RecStepForA
<< "\n\t" << *RecStepForB << "\n"; } } while
(false)
2061 << *RecStepForA << "\n\t" << *RecStepForB << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads. Only support "
"affine SCEV expressions originating in the loop we " "are analysing with identical constant positive step, "
"equal to the count of bytes compared. Got:\n\t" << *RecStepForA
<< "\n\t" << *RecStepForB << "\n"; } } while
(false)
;
2062 return false;
2063 // FIXME: can support BCmpTyBytes > Step.
2064 // But will need to account for the extra bytes compared at the end.
2065 }
2066
2067 SrcA = ScevForSrcA->getStart();
2068 SrcB = ScevForSrcB->getStart();
2069 LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcAdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expressions for load sources:\n\t"
<< *SrcA << "\n\t" << *SrcB << "\n";
} } while (false)
2070 << "\n\t" << *SrcB << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expressions for load sources:\n\t"
<< *SrcA << "\n\t" << *SrcB << "\n";
} } while (false)
;
2071
2072 // The load sources must be loop-invants that dominate the loop header.
2073 if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() ||
2074 !SE->isAvailableAtLoopEntry(SrcA, CurLoop) ||
2075 !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) {
2076 LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
"prior to loop header.\n"; } } while (false)
2077 "prior to loop header.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
"prior to loop header.\n"; } } while (false)
;
2078 return false;
2079 }
2080
2081 LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "SCEV expressions for loads are acceptable.\n"
; } } while (false)
;
2082
2083 // bcmp / memcmp take length argument as size_t, so let's conservatively
2084 // assume that the iteration count should be not wider than that.
2085 Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext());
2086
2087 // For how many iterations is loop guaranteed not to exit via LoopLatch?
2088 // This is one less than the maximal number of comparisons,and is: n + -1
2089 const SCEV *LoopExitCount =
2090 SE->getExitCount(CurLoop, CurLoop->getLoopLatch());
2091 LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expression for loop latch exit count: "
<< *LoopExitCount << "\n"; } } while (false)
2092 << *LoopExitCount << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got SCEV expression for loop latch exit count: "
<< *LoopExitCount << "\n"; } } while (false)
;
2093 // Exit count, similarly, must be loop-invant that dominates the loop header.
2094 if (LoopExitCount == SE->getCouldNotCompute() ||
2095 !LoopExitCount->getType()->isIntOrPtrTy() ||
2096 LoopExitCount->getType()->getScalarSizeInBits() >
2097 CmpFuncSizeTy->getScalarSizeInBits() ||
2098 !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
2099 LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Unsupported SCEV expression for loop latch exit.\n"
; } } while (false)
;
2100 return false;
2101 }
2102
2103 // LoopExitCount is always one less than the actual count of iterations.
2104 // Do this before cast, else we will be stuck with 1 + zext(-1 + n)
2105 Iterations = SE->getAddExpr(
2106 LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW);
2107 assert(Iterations != SE->getCouldNotCompute() &&((Iterations != SE->getCouldNotCompute() && "Shouldn't fail to increment by one."
) ? static_cast<void> (0) : __assert_fail ("Iterations != SE->getCouldNotCompute() && \"Shouldn't fail to increment by one.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2108, __PRETTY_FUNCTION__))
2108 "Shouldn't fail to increment by one.")((Iterations != SE->getCouldNotCompute() && "Shouldn't fail to increment by one."
) ? static_cast<void> (0) : __assert_fail ("Iterations != SE->getCouldNotCompute() && \"Shouldn't fail to increment by one.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2108, __PRETTY_FUNCTION__))
;
2109
2110 LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Computed iteration count: "
<< *Iterations << "\n"; } } while (false)
;
2111 return true;
2112}
2113
2114/// Return true iff the bcmp idiom is detected in the loop.
2115///
2116/// Additionally:
2117/// 1) \p BCmpInst is set to the root byte-comparison instruction.
2118/// 2) \p LatchCmpInst is set to the comparison that controls the latch.
2119/// 3) \p LoadA is set to the first LoadInst.
2120/// 4) \p LoadB is set to the second LoadInst.
2121/// 5) \p SrcA is set to the first source location that is being compared.
2122/// 6) \p SrcB is set to the second source location that is being compared.
2123/// 7) \p NBytes is set to the number of bytes to compare.
2124bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst,
2125 CmpInst *&LatchCmpInst,
2126 LoadInst *&LoadA, LoadInst *&LoadB,
2127 const SCEV *&SrcA, const SCEV *&SrcB,
2128 const SCEV *&NBytes) const {
2129 LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognizing bcmp idiom\n";
} } while (false)
;
2130
2131 // Give up if the loop is not in normal form, or has more than 2 blocks.
2132 if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) {
2133 LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Basic loop structure unrecognized.\n"
; } } while (false)
;
2134 return false;
2135 }
2136 LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized basic loop structure.\n"
; } } while (false)
;
2137
2138 CmpLoopStructure CmpLoop;
2139 if (!matchBCmpLoopStructure(CmpLoop))
2140 return false;
2141
2142 CmpOfLoads CmpOfLoads;
2143 if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads))
2144 return false;
2145
2146 if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop))
2147 return false;
2148
2149 BCmpInst = cast<ICmpInst>(CmpLoop.BCmpValue); // FIXME: is there no
2150 LatchCmpInst = cast<CmpInst>(CmpLoop.LatchCmpValue); // way to combine
2151 LoadA = cast<LoadInst>(CmpOfLoads.LoadA); // these cast with
2152 LoadB = cast<LoadInst>(CmpOfLoads.LoadB); // m_Value() matcher?
2153
2154 Type *BCmpValTy = BCmpInst->getOperand(0)->getType();
2155 LLVMContext &Context = BCmpValTy->getContext();
2156 uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy);
2157 static constexpr uint64_t ByteTyBits = 8;
2158
2159 LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got comparison between values of type "
<< *BCmpValTy << " of size " << BCmpTyBits
<< " bits (while byte = " << ByteTyBits <<
" bits).\n"; } } while (false)
2160 << " of size " << BCmpTyBitsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got comparison between values of type "
<< *BCmpValTy << " of size " << BCmpTyBits
<< " bits (while byte = " << ByteTyBits <<
" bits).\n"; } } while (false)
2161 << " bits (while byte = " << ByteTyBits << " bits).\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Got comparison between values of type "
<< *BCmpValTy << " of size " << BCmpTyBits
<< " bits (while byte = " << ByteTyBits <<
" bits).\n"; } } while (false)
;
2162 // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check
2163 // that we are dealing with a multiple of a byte here.
2164 if (BCmpTyBits % ByteTyBits != 0) {
2165 LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Value size is not a multiple of byte.\n"
; } } while (false)
;
2166 return false;
2167 // FIXME: could still be done under a run-time check that the total bit
2168 // count is a multiple of a byte i guess? Or handle remainder separately?
2169 }
2170
2171 // Each comparison is done on this many bytes.
2172 uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits;
2173 LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytesdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Size is exactly " <<
BCmpTyBytes << " bytes, eligible for bcmp conversion.\n"
; } } while (false)
2174 << " bytes, eligible for bcmp conversion.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Size is exactly " <<
BCmpTyBytes << " bytes, eligible for bcmp conversion.\n"
; } } while (false)
;
2175
2176 const SCEV *Iterations;
2177 if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations))
2178 return false;
2179
2180 // bcmp / memcmp take length argument as size_t, do promotion now.
2181 Type *CmpFuncSizeTy = DL->getIntPtrType(Context);
2182 Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy);
2183 assert(Iterations != SE->getCouldNotCompute() && "Promotion failed.")((Iterations != SE->getCouldNotCompute() && "Promotion failed."
) ? static_cast<void> (0) : __assert_fail ("Iterations != SE->getCouldNotCompute() && \"Promotion failed.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2183, __PRETTY_FUNCTION__))
;
2184 // Note that it didn't do ptrtoint cast, we will need to do it manually.
2185
2186 // We will be comparing *bytes*, not BCmpTy, we need to recalculate size.
2187 // It's a multiplication, and it *could* overflow. But for it to overflow
2188 // we'd want to compare more bytes than could be represented by size_t, But
2189 // allocation functions also take size_t. So how'd you produce such buffer?
2190 // FIXME: we likely need to actually check that we know this won't overflow,
2191 // via llvm::computeOverflowForUnsignedMul().
2192 NBytes = SE->getMulExpr(
2193 Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW);
2194 assert(NBytes != SE->getCouldNotCompute() &&((NBytes != SE->getCouldNotCompute() && "Shouldn't fail to increment by one."
) ? static_cast<void> (0) : __assert_fail ("NBytes != SE->getCouldNotCompute() && \"Shouldn't fail to increment by one.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2195, __PRETTY_FUNCTION__))
2195 "Shouldn't fail to increment by one.")((NBytes != SE->getCouldNotCompute() && "Shouldn't fail to increment by one."
) ? static_cast<void> (0) : __assert_fail ("NBytes != SE->getCouldNotCompute() && \"Shouldn't fail to increment by one.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2195, __PRETTY_FUNCTION__))
;
2196
2197 LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Computed total byte count: "
<< *NBytes << "\n"; } } while (false)
;
2198
2199 if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() ||
2200 LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() ||
2201 !LoadB->isSimple()) {
2202 StringLiteral L("Unsupported loads in idiom - only support identical, "
2203 "simple loads from address space 0.\n");
2204 LLVM_DEBUG(dbgs() << L)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << L; } } while (false)
;
2205 ORE.emit([&]() {
2206 return OptimizationRemarkMissed(DEBUG_TYPE"loop-idiom", "BCmpIdiomUnsupportedLoads",
2207 BCmpInst->getDebugLoc(),
2208 CurLoop->getHeader())
2209 << L;
2210 });
2211 return false; // FIXME: support non-simple loads.
2212 }
2213
2214 LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Recognized bcmp idiom\n"; }
} while (false)
;
2215 ORE.emit([&]() {
2216 return OptimizationRemarkAnalysis(DEBUG_TYPE"loop-idiom", "RecognizedBCmpIdiom",
2217 CurLoop->getStartLoc(),
2218 CurLoop->getHeader())
2219 << "Loop recognized as a bcmp idiom";
2220 });
2221
2222 return true;
2223}
2224
2225BasicBlock *
2226LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) {
2227 LLVM_DEBUG(dbgs() << "Transforming control-flow.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Transforming control-flow.\n"
; } } while (false)
;
2228 SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
2229
2230 BasicBlock *PreheaderBB = CurLoop->getLoopPreheader();
2231 BasicBlock *HeaderBB = CurLoop->getHeader();
2232 BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
2233 SmallString<32> LoopName = CurLoop->getName();
2234 Function *Func = PreheaderBB->getParent();
2235 LLVMContext &Context = Func->getContext();
2236
2237 // Before doing anything, drop SCEV info.
2238 SE->forgetLoop(CurLoop);
2239
2240 // Here we start with: (0/6)
2241 // PreheaderBB: <preheader> ; preds = ???
2242 // <...>
2243 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2244 // %ComparedEqual = icmp eq <...> %memcmp, 0
2245 // br label %LoopHeaderBB
2246 // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
2247 // <...>
2248 // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
2249 // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
2250 // <...>
2251 // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
2252 // Successor0BB: <exit> ; preds = %LoopHeaderBB
2253 // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
2254 // <...>
2255 // Successor1BB: <exit> ; preds = %LoopLatchBB
2256 // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
2257 // <...>
2258 //
2259 // Successor0 and Successor1 may or may not be the same basic block.
2260
2261 // Decouple the edge between loop preheader basic block and loop header basic
2262 // block. Thus the loop has become unreachable.
2263 assert(cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() &&((cast<BranchInst>(PreheaderBB->getTerminator())->
isUnconditional() && PreheaderBB->getTerminator()->
getSuccessor(0) == HeaderBB && "Preheader bb must end with an unconditional branch to header bb."
) ? static_cast<void> (0) : __assert_fail ("cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() && PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB && \"Preheader bb must end with an unconditional branch to header bb.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2265, __PRETTY_FUNCTION__))
2264 PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB &&((cast<BranchInst>(PreheaderBB->getTerminator())->
isUnconditional() && PreheaderBB->getTerminator()->
getSuccessor(0) == HeaderBB && "Preheader bb must end with an unconditional branch to header bb."
) ? static_cast<void> (0) : __assert_fail ("cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() && PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB && \"Preheader bb must end with an unconditional branch to header bb.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2265, __PRETTY_FUNCTION__))
2265 "Preheader bb must end with an unconditional branch to header bb.")((cast<BranchInst>(PreheaderBB->getTerminator())->
isUnconditional() && PreheaderBB->getTerminator()->
getSuccessor(0) == HeaderBB && "Preheader bb must end with an unconditional branch to header bb."
) ? static_cast<void> (0) : __assert_fail ("cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() && PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB && \"Preheader bb must end with an unconditional branch to header bb.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2265, __PRETTY_FUNCTION__))
;
2266 PreheaderBB->getTerminator()->eraseFromParent();
2267 DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB});
2268
2269 // Create a new preheader basic block before loop header basic block.
2270 auto *PhonyPreheaderBB = BasicBlock::Create(
2271 Context, LoopName + ".phonypreheaderbb", Func, HeaderBB);
2272 // And insert an unconditional branch from phony preheader basic block to
2273 // loop header basic block.
2274 IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB);
2275 DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
2276
2277 // Create a *single* new empty block that we will substitute as a
2278 // successor basic block for the loop's exits. This one is temporary.
2279 // Much like phony preheader basic block, it is not connected.
2280 auto *PhonySuccessorBB =
2281 BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func,
2282 LoopLatchBB->getNextNode());
2283 // That block must have *some* non-PHI instruction, or else deleteDeadLoop()
2284 // will mess up cleanup of dbginfo, and verifier will complain.
2285 IRBuilder<>(PhonySuccessorBB).CreateUnreachable();
2286
2287 // Create two new empty blocks that we will use to preserve the original
2288 // loop exit control-flow, and preserve the incoming values in the PHI nodes
2289 // in loop's successor exit blocks. These will live one.
2290 auto *ComparedUnequalBB =
2291 BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func,
2292 PhonySuccessorBB->getNextNode());
2293 auto *ComparedEqualBB =
2294 BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func,
2295 PhonySuccessorBB->getNextNode());
2296
2297 // By now we have: (1/6)
2298 // PreheaderBB: ; preds = ???
2299 // <...>
2300 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2301 // %ComparedEqual = icmp eq <...> %memcmp, 0
2302 // [no terminator instruction!]
2303 // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
2304 // br label %LoopHeaderBB
2305 // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
2306 // <...>
2307 // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
2308 // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
2309 // <...>
2310 // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
2311 // PhonySuccessorBB: ; No preds, UNREACHABLE!
2312 // unreachable
2313 // EqualBB: ; No preds, UNREACHABLE!
2314 // [no terminator instruction!]
2315 // UnequalBB: ; No preds, UNREACHABLE!
2316 // [no terminator instruction!]
2317 // Successor0BB: <exit> ; preds = %LoopHeaderBB
2318 // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
2319 // <...>
2320 // Successor1BB: <exit> ; preds = %LoopLatchBB
2321 // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
2322 // <...>
2323
2324 // What is the mapping/replacement basic block for exiting out of the loop
2325 // from either of old's loop basic blocks?
2326 auto GetReplacementBB = [this, ComparedEqualBB,
2327 ComparedUnequalBB](const BasicBlock *OldBB) {
2328 assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks.")((CurLoop->contains(OldBB) && "Only for loop's basic blocks."
) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(OldBB) && \"Only for loop's basic blocks.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2328, __PRETTY_FUNCTION__))
;
2329 if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal".
2330 return ComparedEqualBB;
2331 if (OldBB == CurLoop->getHeader()) // "element compared unequal".
2332 return ComparedUnequalBB;
2333 llvm_unreachable("Only had two basic blocks in loop.")::llvm::llvm_unreachable_internal("Only had two basic blocks in loop."
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2333)
;
2334 };
2335
2336 // What are the exits out of this loop?
2337 SmallVector<Loop::Edge, 2> LoopExitEdges;
2338 CurLoop->getExitEdges(LoopExitEdges);
2339 assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges.")((LoopExitEdges.size() == 2 && "Should have only to two exit edges."
) ? static_cast<void> (0) : __assert_fail ("LoopExitEdges.size() == 2 && \"Should have only to two exit edges.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2339, __PRETTY_FUNCTION__))
;
2340
2341 // Populate new basic blocks, update the exiting control-flow, PHI nodes.
2342 for (const Loop::Edge &Edge : LoopExitEdges) {
2343 auto *OldLoopBB = const_cast<BasicBlock *>(Edge.first);
2344 auto *SuccessorBB = const_cast<BasicBlock *>(Edge.second);
2345 assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) &&((CurLoop->contains(OldLoopBB) && !CurLoop->contains
(SuccessorBB) && "Unexpected edge.") ? static_cast<
void> (0) : __assert_fail ("CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) && \"Unexpected edge.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2346, __PRETTY_FUNCTION__))
2346 "Unexpected edge.")((CurLoop->contains(OldLoopBB) && !CurLoop->contains
(SuccessorBB) && "Unexpected edge.") ? static_cast<
void> (0) : __assert_fail ("CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) && \"Unexpected edge.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2346, __PRETTY_FUNCTION__))
;
2347
2348 // If we would exit the loop from this loop's basic block,
2349 // what semantically would that mean? Did comparison succeed or fail?
2350 BasicBlock *NewBB = GetReplacementBB(OldLoopBB);
2351 assert(NewBB->empty() && "Should not get same new basic block here twice.")((NewBB->empty() && "Should not get same new basic block here twice."
) ? static_cast<void> (0) : __assert_fail ("NewBB->empty() && \"Should not get same new basic block here twice.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2351, __PRETTY_FUNCTION__))
;
2352 IRBuilder<> Builder(NewBB);
2353 Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc());
2354 Builder.CreateBr(SuccessorBB);
2355 DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB});
2356 // Also, be *REALLY* careful with PHI nodes in successor basic block,
2357 // update them to recieve the same input value, but not from current loop's
2358 // basic block, but from new basic block instead.
2359 SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB);
2360 // Also, change loop control-flow. This loop's basic block shall no longer
2361 // exit from the loop to it's original successor basic block, but to our new
2362 // phony successor basic block. Note that new successor will be unique exit.
2363 OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB,
2364 PhonySuccessorBB);
2365 DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB});
2366 DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB});
2367 }
2368
2369 // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date.
2370 assert(DTUpdates.size() == 8 && "Update count prediction failed.")((DTUpdates.size() == 8 && "Update count prediction failed."
) ? static_cast<void> (0) : __assert_fail ("DTUpdates.size() == 8 && \"Update count prediction failed.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2370, __PRETTY_FUNCTION__))
;
2371 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2372 DTU.applyUpdates(DTUpdates);
2373 DTUpdates.clear();
2374
2375 // By now we have: (2/6)
2376 // PreheaderBB: ; preds = ???
2377 // <...>
2378 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2379 // %ComparedEqual = icmp eq <...> %memcmp, 0
2380 // [no terminator instruction!]
2381 // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
2382 // br label %LoopHeaderBB
2383 // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
2384 // <...>
2385 // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB
2386 // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
2387 // <...>
2388 // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB
2389 // PhonySuccessorBB: <uniq. exit> ; preds = %LoopHeaderBB, %LoopLatchBB
2390 // unreachable
2391 // EqualBB: ; No preds, UNREACHABLE!
2392 // br label %Successor1BB
2393 // UnequalBB: ; No preds, UNREACHABLE!
2394 // br label %Successor0BB
2395 // Successor0BB: ; preds = %UnequalBB
2396 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2397 // <...>
2398 // Successor1BB: ; preds = %EqualBB
2399 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2400 // <...>
2401
2402 // *Finally*, zap the original loop. Record it's parent loop though.
2403 Loop *ParentLoop = CurLoop->getParentLoop();
2404 LLVM_DEBUG(dbgs() << "Deleting old loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Deleting old loop.\n"; } }
while (false)
;
2405 LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting!
2406 deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop.
2407 CurLoop = nullptr;
2408
2409 // By now we have: (3/6)
2410 // PreheaderBB: ; preds = ???
2411 // <...>
2412 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2413 // %ComparedEqual = icmp eq <...> %memcmp, 0
2414 // [no terminator instruction!]
2415 // PhonyPreheaderBB: ; No preds, UNREACHABLE!
2416 // br label %PhonySuccessorBB
2417 // PhonySuccessorBB: ; preds = %PhonyPreheaderBB
2418 // unreachable
2419 // EqualBB: ; No preds, UNREACHABLE!
2420 // br label %Successor1BB
2421 // UnequalBB: ; No preds, UNREACHABLE!
2422 // br label %Successor0BB
2423 // Successor0BB: ; preds = %UnequalBB
2424 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2425 // <...>
2426 // Successor1BB: ; preds = %EqualBB
2427 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2428 // <...>
2429
2430 // Now, actually restore the CFG.
2431
2432 // Insert an unconditional branch from an actual preheader basic block to
2433 // phony preheader basic block.
2434 IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB);
2435 DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
2436 // Insert proper conditional branch from phony successor basic block to the
2437 // "dispatch" basic blocks, which were used to preserve incoming values in
2438 // original loop's successor basic blocks.
2439 assert(isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) &&((isa<UnreachableInst>(PhonySuccessorBB->getTerminator
()) && "Yep, that's the one we created to keep deleteDeadLoop() happy."
) ? static_cast<void> (0) : __assert_fail ("isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) && \"Yep, that's the one we created to keep deleteDeadLoop() happy.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2440, __PRETTY_FUNCTION__))
2440 "Yep, that's the one we created to keep deleteDeadLoop() happy.")((isa<UnreachableInst>(PhonySuccessorBB->getTerminator
()) && "Yep, that's the one we created to keep deleteDeadLoop() happy."
) ? static_cast<void> (0) : __assert_fail ("isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) && \"Yep, that's the one we created to keep deleteDeadLoop() happy.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2440, __PRETTY_FUNCTION__))
;
2441 PhonySuccessorBB->getTerminator()->eraseFromParent();
2442 {
2443 IRBuilder<> Builder(PhonySuccessorBB);
2444 Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc());
2445 Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB);
2446 }
2447 DTUpdates.push_back(
2448 {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB});
2449 DTUpdates.push_back(
2450 {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB});
2451
2452 BasicBlock *DispatchBB = PhonySuccessorBB;
2453 DispatchBB->setName(LoopName + ".bcmpdispatchbb");
2454
2455 assert(DTUpdates.size() == 3 && "Update count prediction failed.")((DTUpdates.size() == 3 && "Update count prediction failed."
) ? static_cast<void> (0) : __assert_fail ("DTUpdates.size() == 3 && \"Update count prediction failed.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2455, __PRETTY_FUNCTION__))
;
2456 DTU.applyUpdates(DTUpdates);
2457 DTUpdates.clear();
2458
2459 // By now we have: (4/6)
2460 // PreheaderBB: ; preds = ???
2461 // <...>
2462 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2463 // %ComparedEqual = icmp eq <...> %memcmp, 0
2464 // br label %PhonyPreheaderBB
2465 // PhonyPreheaderBB: ; preds = %PreheaderBB
2466 // br label %DispatchBB
2467 // DispatchBB: ; preds = %PhonyPreheaderBB
2468 // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
2469 // EqualBB: ; preds = %DispatchBB
2470 // br label %Successor1BB
2471 // UnequalBB: ; preds = %DispatchBB
2472 // br label %Successor0BB
2473 // Successor0BB: ; preds = %UnequalBB
2474 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2475 // <...>
2476 // Successor1BB: ; preds = %EqualBB
2477 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2478 // <...>
2479
2480 // The basic CFG has been restored! Now let's merge redundant basic blocks.
2481
2482 // Merge phony successor basic block into it's only predecessor,
2483 // phony preheader basic block. It is fully pointlessly redundant.
2484 MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
2485
2486 // By now we have: (5/6)
2487 // PreheaderBB: ; preds = ???
2488 // <...>
2489 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2490 // %ComparedEqual = icmp eq <...> %memcmp, 0
2491 // br label %DispatchBB
2492 // DispatchBB: ; preds = %PreheaderBB
2493 // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
2494 // EqualBB: ; preds = %DispatchBB
2495 // br label %Successor1BB
2496 // UnequalBB: ; preds = %DispatchBB
2497 // br label %Successor0BB
2498 // Successor0BB: ; preds = %UnequalBB
2499 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2500 // <...>
2501 // Successor1BB: ; preds = %EqualBB
2502 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2503 // <...>
2504
2505 // Was this loop nested?
2506 if (!ParentLoop) {
2507 // If the loop was *NOT* nested, then let's also merge phony successor
2508 // basic block into it's only predecessor, preheader basic block.
2509 // Also, here we need to update LoopInfo.
2510 LI->removeBlock(PreheaderBB);
2511 MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
2512
2513 // By now we have: (6/6)
2514 // DispatchBB: ; preds = ???
2515 // <...>
2516 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2517 // %ComparedEqual = icmp eq <...> %memcmp, 0
2518 // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
2519 // EqualBB: ; preds = %DispatchBB
2520 // br label %Successor1BB
2521 // UnequalBB: ; preds = %DispatchBB
2522 // br label %Successor0BB
2523 // Successor0BB: ; preds = %UnequalBB
2524 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2525 // <...>
2526 // Successor1BB: ; preds = %EqualBB
2527 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2528 // <...>
2529
2530 return DispatchBB;
2531 }
2532
2533 // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop.
2534 // To achieve that, we shall keep the preheader basic block (mainly so that
2535 // the loop header block will be guaranteed to have a predecessor outside of
2536 // the loop), and create a phony loop with all these new three basic blocks.
2537 Loop *PhonyLoop = LI->AllocateLoop();
2538 ParentLoop->addChildLoop(PhonyLoop);
2539 PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI);
2540 PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI);
2541 PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI);
2542
2543 // But we only have a preheader basic block, a header basic block block and
2544 // two exiting basic blocks. For a proper loop we also need a backedge from
2545 // non-header basic block to header bb.
2546 // Let's just add a never-taken branch from both of the exiting basic blocks.
2547 for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) {
2548 BranchInst *OldTerminator = cast<BranchInst>(BB->getTerminator());
2549 assert(OldTerminator->isUnconditional() && "That's the one we created.")((OldTerminator->isUnconditional() && "That's the one we created."
) ? static_cast<void> (0) : __assert_fail ("OldTerminator->isUnconditional() && \"That's the one we created.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2549, __PRETTY_FUNCTION__))
;
2550 BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0);
2551
2552 IRBuilder<> Builder(OldTerminator);
2553 Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc());
2554 Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB,
2555 DispatchBB);
2556 OldTerminator->eraseFromParent();
2557 // Yes, the backedge will never be taken. The control-flow is redundant.
2558 // If it can be simplified further, other passes will take care.
2559 DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB});
2560 DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB});
2561 DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB});
2562 }
2563 assert(DTUpdates.size() == 6 && "Update count prediction failed.")((DTUpdates.size() == 6 && "Update count prediction failed."
) ? static_cast<void> (0) : __assert_fail ("DTUpdates.size() == 6 && \"Update count prediction failed.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2563, __PRETTY_FUNCTION__))
;
2564 DTU.applyUpdates(DTUpdates);
2565 DTUpdates.clear();
2566
2567 // By now we have: (6/6)
2568 // PreheaderBB: <preheader> ; preds = ???
2569 // <...>
2570 // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
2571 // %ComparedEqual = icmp eq <...> %memcmp, 0
2572 // br label %BCmpDispatchBB
2573 // BCmpDispatchBB: <header> ; preds = %PreheaderBB
2574 // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
2575 // EqualBB: <latch,exiting> ; preds = %BCmpDispatchBB
2576 // br i1 %true, label %Successor1BB, label %BCmpDispatchBB
2577 // UnequalBB: <latch,exiting> ; preds = %BCmpDispatchBB
2578 // br i1 %true, label %Successor0BB, label %BCmpDispatchBB
2579 // Successor0BB: ; preds = %UnequalBB
2580 // %S0PHI = phi <...> [ <...>, %UnequalBB ]
2581 // <...>
2582 // Successor1BB: ; preds = %EqualBB
2583 // %S0PHI = phi <...> [ <...>, %EqualBB ]
2584 // <...>
2585
2586 // Finally fully DONE!
2587 return DispatchBB;
2588}
2589
2590void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst,
2591 CmpInst *LatchCmpInst,
2592 LoadInst *LoadA, LoadInst *LoadB,
2593 const SCEV *SrcA, const SCEV *SrcB,
2594 const SCEV *NBytes) {
2595 // We will be inserting before the terminator instruction of preheader block.
2596 IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator());
2597
2598 LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Transforming bcmp loop idiom into a call.\n"
; } } while (false)
;
2599 LLVM_DEBUG(dbgs() << "Emitting new instructions.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Emitting new instructions.\n"
; } } while (false)
;
2600
2601 // Expand the SCEV expressions for both sources to compare, and produce value
2602 // for the byte len (beware of Iterations potentially being a pointer, and
2603 // account for element size being BCmpTyBytes bytes, which may be not 1 byte)
2604 Value *PtrA, *PtrB, *Len;
2605 {
2606 SCEVExpander SExp(*SE, *DL, "LoopToBCmp");
2607 SExp.setInsertPoint(&*Builder.GetInsertPoint());
2608
2609 auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) {
2610 SExp.SetCurrentDebugLocation(DebugLoc());
2611 // If the pointer operand of original load had dbgloc - use it.
2612 if (const auto *I = dyn_cast<Instruction>(Load->getPointerOperand()))
2613 SExp.SetCurrentDebugLocation(I->getDebugLoc());
2614 return SExp.expandCodeFor(Src);
2615 };
2616 PtrA = HandlePtr(LoadA, SrcA);
2617 PtrB = HandlePtr(LoadB, SrcB);
2618
2619 // For len calculation let's use dbgloc for the loop's latch condition.
2620 Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
2621 SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
2622 Len = SExp.expandCodeFor(NBytes);
2623
2624 Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext());
2625 assert(SE->getTypeSizeInBits(Len->getType()) ==((SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits
(CmpFuncSizeTy) && "Len should already have the correct size."
) ? static_cast<void> (0) : __assert_fail ("SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits(CmpFuncSizeTy) && \"Len should already have the correct size.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2627, __PRETTY_FUNCTION__))
2626 DL->getTypeSizeInBits(CmpFuncSizeTy) &&((SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits
(CmpFuncSizeTy) && "Len should already have the correct size."
) ? static_cast<void> (0) : __assert_fail ("SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits(CmpFuncSizeTy) && \"Len should already have the correct size.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2627, __PRETTY_FUNCTION__))
2627 "Len should already have the correct size.")((SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits
(CmpFuncSizeTy) && "Len should already have the correct size."
) ? static_cast<void> (0) : __assert_fail ("SE->getTypeSizeInBits(Len->getType()) == DL->getTypeSizeInBits(CmpFuncSizeTy) && \"Len should already have the correct size.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2627, __PRETTY_FUNCTION__))
;
2628
2629 // Make sure that iteration count is a number, insert ptrtoint cast if not.
2630 if (Len->getType()->isPointerTy())
2631 Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy);
2632 assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now.")((Len->getType() == CmpFuncSizeTy && "Should have correct type now."
) ? static_cast<void> (0) : __assert_fail ("Len->getType() == CmpFuncSizeTy && \"Should have correct type now.\""
, "/build/llvm-toolchain-snapshot-10~svn374814/lib/Transforms/Scalar/LoopIdiomRecognize.cpp"
, 2632, __PRETTY_FUNCTION__))
;
2633
2634 Len->setName(Len->getName() + ".bytecount");
2635
2636 // There is no legality check needed. We want to compare that the memory
2637 // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal.
2638 // For them to be fully equal, they must match bit-by-bit. And likewise,
2639 // for them to *NOT* be fully equal, they have to differ just by one bit.
2640 // The step of comparison (bits compared at once) simply does not matter.
2641 }
2642
2643 // For the rest of new instructions, dbgloc should point at the value cmp.
2644 Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc());
2645
2646 // Emit the comparison itself.
2647 auto *CmpCall =
2648 cast<CallInst>(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI)
2649 : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI));
2650 // FIXME: add {B,Mem}CmpInst with MemoryCompareInst
2651 // (based on MemIntrinsicBase) as base?
2652 // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...)
2653
2654 // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal.
2655 auto *ComparedEqual = cast<ICmpInst>(Builder.CreateICmpEQ(
2656 CmpCall, ConstantInt::get(CmpCall->getType(), 0),
2657 PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp"));
2658
2659 BasicBlock *BB = transformBCmpControlFlow(ComparedEqual);
2660 Builder.ClearInsertionPoint();
2661
2662 // We're done.
2663 LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "Transformed loop bcmp idiom into a call.\n"
; } } while (false)
;
2664 ORE.emit([&]() {
2665 return OptimizationRemark(DEBUG_TYPE"loop-idiom", "TransformedBCmpIdiomToCall",
2666 CmpCall->getDebugLoc(), BB)
2667 << "Transformed bcmp idiom into a call to "
2668 << ore::NV("NewFunction", CmpCall->getCalledFunction())
2669 << "() function";
2670 });
2671 ++NumBCmp;
2672}
2673
2674/// Recognizes a bcmp idiom in a non-countable loop.
2675///
2676/// If detected, transforms the relevant code to issue the bcmp (or memcmp)
2677/// intrinsic function call, and returns true; otherwise, returns false.
2678bool LoopIdiomRecognize::recognizeBCmp() {
2679 if (!HasMemCmp && !HasBCmp)
2680 return false;
2681
2682 ICmpInst *BCmpInst;
2683 CmpInst *LatchCmpInst;
2684 LoadInst *LoadA, *LoadB;
2685 const SCEV *SrcA, *SrcB, *NBytes;
2686 if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB,
2687 NBytes)) {
2688 LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-idiom")) { dbgs() << "bcmp idiom recognition failed.\n"
; } } while (false)
;
2689 return false;
2690 }
2691
2692 transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes);
2693 return true;
2694}