/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Bug Summary

File:	llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Warning:	line 510, column 15 Although the value stored to 'PatternValue' is used in the enclosing expression, the value is never actually read from 'PatternValue'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopIdiomRecognize.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-11-161148-48261-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

1	//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements an idiom recognizer that transforms simple loops into a
10	// non-loop form. In cases that this kicks in, it can be a significant
11	// performance win.
12	//
13	// If compiling for code size we avoid idiom recognition if the resulting
14	// code could be larger than the code for the original loop. One way this could
15	// happen is if the loop is not removable after idiom recognition due to the
16	// presence of non-idiom instructions. The initial implementation of the
17	// heuristics applies to idioms in multi-block loops.
18	//
19	//===----------------------------------------------------------------------===//
20	//
21	// TODO List:
22	//
23	// Future loop memory idioms to recognize:
24	// memcmp, memmove, strlen, etc.
25	// Future floating point idioms to recognize in -ffast-math mode:
26	// fpowi
27	// Future integer operation idioms to recognize:
28	// ctpop
29	//
30	// Beware that isel's default lowering for ctpop is highly inefficient for
31	// i64 and larger types when i64 is legal and the value has few bits set. It
32	// would be good to enhance isel to emit a loop for ctpop in this case.
33	//
34	// This could recognize common matrix multiplies and dot product idioms and
35	// replace them with calls to BLAS (if linked in??).
36	//
37	//===----------------------------------------------------------------------===//
38
39	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
40	#include "llvm/ADT/APInt.h"
41	#include "llvm/ADT/ArrayRef.h"
42	#include "llvm/ADT/DenseMap.h"
43	#include "llvm/ADT/MapVector.h"
44	#include "llvm/ADT/SetVector.h"
45	#include "llvm/ADT/SmallPtrSet.h"
46	#include "llvm/ADT/SmallVector.h"
47	#include "llvm/ADT/Statistic.h"
48	#include "llvm/ADT/StringRef.h"
49	#include "llvm/Analysis/AliasAnalysis.h"
50	#include "llvm/Analysis/LoopAccessAnalysis.h"
51	#include "llvm/Analysis/LoopInfo.h"
52	#include "llvm/Analysis/LoopPass.h"
53	#include "llvm/Analysis/MemoryLocation.h"
54	#include "llvm/Analysis/MemorySSA.h"
55	#include "llvm/Analysis/MemorySSAUpdater.h"
56	#include "llvm/Analysis/MustExecute.h"
57	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
58	#include "llvm/Analysis/ScalarEvolution.h"
59	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
60	#include "llvm/Analysis/TargetLibraryInfo.h"
61	#include "llvm/Analysis/TargetTransformInfo.h"
62	#include "llvm/Analysis/ValueTracking.h"
63	#include "llvm/IR/Attributes.h"
64	#include "llvm/IR/BasicBlock.h"
65	#include "llvm/IR/Constant.h"
66	#include "llvm/IR/Constants.h"
67	#include "llvm/IR/DataLayout.h"
68	#include "llvm/IR/DebugLoc.h"
69	#include "llvm/IR/DerivedTypes.h"
70	#include "llvm/IR/Dominators.h"
71	#include "llvm/IR/GlobalValue.h"
72	#include "llvm/IR/GlobalVariable.h"
73	#include "llvm/IR/IRBuilder.h"
74	#include "llvm/IR/InstrTypes.h"
75	#include "llvm/IR/Instruction.h"
76	#include "llvm/IR/Instructions.h"
77	#include "llvm/IR/IntrinsicInst.h"
78	#include "llvm/IR/Intrinsics.h"
79	#include "llvm/IR/LLVMContext.h"
80	#include "llvm/IR/Module.h"
81	#include "llvm/IR/PassManager.h"
82	#include "llvm/IR/Type.h"
83	#include "llvm/IR/User.h"
84	#include "llvm/IR/Value.h"
85	#include "llvm/IR/ValueHandle.h"
86	#include "llvm/InitializePasses.h"
87	#include "llvm/Pass.h"
88	#include "llvm/Support/Casting.h"
89	#include "llvm/Support/CommandLine.h"
90	#include "llvm/Support/Debug.h"
91	#include "llvm/Support/raw_ostream.h"
92	#include "llvm/Transforms/Scalar.h"
93	#include "llvm/Transforms/Utils/BuildLibCalls.h"
94	#include "llvm/Transforms/Utils/Local.h"
95	#include "llvm/Transforms/Utils/LoopUtils.h"
96	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
97	#include <algorithm>
98	#include <cassert>
99	#include <cstdint>
100	#include <utility>
101	#include <vector>
102
103	using namespace llvm;
104
105	#define DEBUG_TYPE"loop-idiom" "loop-idiom"
106
107	STATISTIC(NumMemSet, "Number of memset's formed from loop stores")static llvm::Statistic NumMemSet = {"loop-idiom", "NumMemSet" , "Number of memset's formed from loop stores"};
108	STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores")static llvm::Statistic NumMemCpy = {"loop-idiom", "NumMemCpy" , "Number of memcpy's formed from loop load+stores"};
109
110	bool DisableLIRP::All;
111	static cl::opt<bool, true>
112	DisableLIRPAll("disable-" DEBUG_TYPE"loop-idiom" "-all",
113	cl::desc("Options to disable Loop Idiom Recognize Pass."),
114	cl::location(DisableLIRP::All), cl::init(false),
115	cl::ReallyHidden);
116
117	bool DisableLIRP::Memset;
118	static cl::opt<bool, true>
119	DisableLIRPMemset("disable-" DEBUG_TYPE"loop-idiom" "-memset",
120	cl::desc("Proceed with loop idiom recognize pass, but do "
121	"not convert loop(s) to memset."),
122	cl::location(DisableLIRP::Memset), cl::init(false),
123	cl::ReallyHidden);
124
125	bool DisableLIRP::Memcpy;
126	static cl::opt<bool, true>
127	DisableLIRPMemcpy("disable-" DEBUG_TYPE"loop-idiom" "-memcpy",
128	cl::desc("Proceed with loop idiom recognize pass, but do "
129	"not convert loop(s) to memcpy."),
130	cl::location(DisableLIRP::Memcpy), cl::init(false),
131	cl::ReallyHidden);
132
133	static cl::opt<bool> UseLIRCodeSizeHeurs(
134	"use-lir-code-size-heurs",
135	cl::desc("Use loop idiom recognition code size heuristics when compiling"
136	"with -Os/-Oz"),
137	cl::init(true), cl::Hidden);
138
139	namespace {
140
141	class LoopIdiomRecognize {
142	Loop *CurLoop = nullptr;
143	AliasAnalysis *AA;
144	DominatorTree *DT;
145	LoopInfo *LI;
146	ScalarEvolution *SE;
147	TargetLibraryInfo *TLI;
148	const TargetTransformInfo *TTI;
149	const DataLayout *DL;
150	OptimizationRemarkEmitter &ORE;
151	bool ApplyCodeSizeHeuristics;
152	std::unique_ptr<MemorySSAUpdater> MSSAU;
153
154	public:
155	explicit LoopIdiomRecognize(AliasAnalysis AA, DominatorTree DT,
156	LoopInfo LI, ScalarEvolution SE,
157	TargetLibraryInfo *TLI,
158	const TargetTransformInfo TTI, MemorySSA MSSA,
159	const DataLayout *DL,
160	OptimizationRemarkEmitter &ORE)
161	: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
162	if (MSSA)
163	MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
164	}
165
166	bool runOnLoop(Loop *L);
167
168	private:
169	using StoreList = SmallVector<StoreInst *, 8>;
170	using StoreListMap = MapVector<Value *, StoreList>;
171
172	StoreListMap StoreRefsForMemset;
173	StoreListMap StoreRefsForMemsetPattern;
174	StoreList StoreRefsForMemcpy;
175	bool HasMemset;
176	bool HasMemsetPattern;
177	bool HasMemcpy;
178
179	/// Return code for isLegalStore()
180	enum LegalStoreKind {
181	None = 0,
182	Memset,
183	MemsetPattern,
184	Memcpy,
185	UnorderedAtomicMemcpy,
186	DontUse // Dummy retval never to be used. Allows catching errors in retval
187	// handling.
188	};
189
190	/// \name Countable Loop Idiom Handling
191	/// @{
192
193	bool runOnCountableLoop();
194	bool runOnLoopBlock(BasicBlock BB, const SCEV BECount,
195	SmallVectorImpl<BasicBlock *> &ExitBlocks);
196
197	void collectStores(BasicBlock *BB);
198	LegalStoreKind isLegalStore(StoreInst *SI);
199	enum class ForMemset { No, Yes };
200	bool processLoopStores(SmallVectorImpl<StoreInst > &SL, const SCEV BECount,
201	ForMemset For);
202	bool processLoopMemSet(MemSetInst MSI, const SCEV BECount);
203
204	bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
205	MaybeAlign StoreAlignment, Value *StoredVal,
206	Instruction *TheStore,
207	SmallPtrSetImpl<Instruction *> &Stores,
208	const SCEVAddRecExpr Ev, const SCEV BECount,
209	bool NegStride, bool IsLoopMemset = false);
210	bool processLoopStoreOfLoopLoad(StoreInst SI, const SCEV BECount);
211	bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
212	bool IsLoopMemset = false);
213
214	/// @}
215	/// \name Noncountable Loop Idiom Handling
216	/// @{
217
218	bool runOnNoncountableLoop();
219
220	bool recognizePopcount();
221	void transformLoopToPopcount(BasicBlock PreCondBB, Instruction CntInst,
222	PHINode CntPhi, Value Var);
223	bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
224	void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
225	Instruction CntInst, PHINode CntPhi,
226	Value Var, Instruction DefX,
227	const DebugLoc &DL, bool ZeroCheck,
228	bool IsCntPhiUsedOutsideLoop);
229
230	/// @}
231	};
232
233	class LoopIdiomRecognizeLegacyPass : public LoopPass {
234	public:
235	static char ID;
236
237	explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
238	initializeLoopIdiomRecognizeLegacyPassPass(
239	*PassRegistry::getPassRegistry());
240	}
241
242	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
243	if (DisableLIRP::All)
244	return false;
245
246	if (skipLoop(L))
247	return false;
248
249	AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
250	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
251	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
252	ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
253	TargetLibraryInfo *TLI =
254	&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
255	*L->getHeader()->getParent());
256	const TargetTransformInfo *TTI =
257	&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
258	*L->getHeader()->getParent());
259	const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
260	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
261	MemorySSA *MSSA = nullptr;
262	if (MSSAAnalysis)
263	MSSA = &MSSAAnalysis->getMSSA();
264
265	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
266	// pass. Function analyses need to be preserved across loop transformations
267	// but ORE cannot be preserved (see comment before the pass definition).
268	OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
269
270	LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
271	return LIR.runOnLoop(L);
272	}
273
274	/// This transformation requires natural loop information & requires that
275	/// loop preheaders be inserted into the CFG.
276	void getAnalysisUsage(AnalysisUsage &AU) const override {
277	AU.addRequired<TargetLibraryInfoWrapperPass>();
278	AU.addRequired<TargetTransformInfoWrapperPass>();
279	AU.addPreserved<MemorySSAWrapperPass>();
280	getLoopAnalysisUsage(AU);
281	}
282	};
283
284	} // end anonymous namespace
285
286	char LoopIdiomRecognizeLegacyPass::ID = 0;
287
288	PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
289	LoopStandardAnalysisResults &AR,
290	LPMUpdater &) {
291	if (DisableLIRP::All)
292	return PreservedAnalyses::all();
293
294	const auto *DL = &L.getHeader()->getModule()->getDataLayout();
295
296	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
297	// pass. Function analyses need to be preserved across loop transformations
298	// but ORE cannot be preserved (see comment before the pass definition).
299	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
300
301	LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
302	AR.MSSA, DL, ORE);
303	if (!LIR.runOnLoop(&L))
304	return PreservedAnalyses::all();
305
306	auto PA = getLoopPassPreservedAnalyses();
307	if (AR.MSSA)
308	PA.preserve<MemorySSAAnalysis>();
309	return PA;
310	}
311
312	INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry &Registry) {
313	"Recognize loop idioms", false, false)static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry &Registry) {
314	INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
315	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
316	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
317	INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",PassInfo PI = new PassInfo( "Recognize loop idioms", "loop-idiom" , &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false, false); Registry.registerPass(PI, true); return PI; } static llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag , initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry )); }
318	"Recognize loop idioms", false, false)PassInfo PI = new PassInfo( "Recognize loop idioms", "loop-idiom" , &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false, false); Registry.registerPass(PI, true); return PI; } static llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag , initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry )); }
319
320	Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
321
322	static void deleteDeadInstruction(Instruction *I) {
323	I->replaceAllUsesWith(UndefValue::get(I->getType()));
324	I->eraseFromParent();
325	}
326
327	//===----------------------------------------------------------------------===//
328	//
329	// Implementation of LoopIdiomRecognize
330	//
331	//===----------------------------------------------------------------------===//
332
333	bool LoopIdiomRecognize::runOnLoop(Loop *L) {
334	CurLoop = L;
335	// If the loop could not be converted to canonical form, it must have an
336	// indirectbr in it, just give up.
337	if (!L->getLoopPreheader())
338	return false;
339
340	// Disable loop idiom recognition if the function's name is a common idiom.
341	StringRef Name = L->getHeader()->getParent()->getName();
342	if (Name == "memset" \|\| Name == "memcpy")
343	return false;
344
345	// Determine if code size heuristics need to be applied.
346	ApplyCodeSizeHeuristics =
347	L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
348
349	HasMemset = TLI->has(LibFunc_memset);
350	HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
351	HasMemcpy = TLI->has(LibFunc_memcpy);
352
353	if (HasMemset \|\| HasMemsetPattern \|\| HasMemcpy)
354	if (SE->hasLoopInvariantBackedgeTakenCount(L))
355	return runOnCountableLoop();
356
357	return runOnNoncountableLoop();
358	}
359
360	bool LoopIdiomRecognize::runOnCountableLoop() {
361	const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
362	assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 364, __PRETTY_FUNCTION__))
363	"runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 364, __PRETTY_FUNCTION__))
364	"backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 364, __PRETTY_FUNCTION__));
365
366	// If this loop executes exactly one time, then it should be peeled, not
367	// optimized by this pass.
368	if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
369	if (BECst->getAPInt() == 0)
370	return false;
371
372	SmallVector<BasicBlock *, 8> ExitBlocks;
373	CurLoop->getUniqueExitBlocks(ExitBlocks);
374
375	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
376	<< CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
377	<< "] Countable Loop %" << CurLoop->getHeader()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
378	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false);
379
380	// The following transforms hoist stores/memsets into the loop pre-header.
381	// Give up if the loop has instructions that may throw.
382	SimpleLoopSafetyInfo SafetyInfo;
383	SafetyInfo.computeLoopSafetyInfo(CurLoop);
384	if (SafetyInfo.anyBlockMayThrow())
385	return false;
386
387	bool MadeChange = false;
388
389	// Scan all the blocks in the loop that are not in subloops.
390	for (auto *BB : CurLoop->getBlocks()) {
391	// Ignore blocks in subloops.
392	if (LI->getLoopFor(BB) != CurLoop)
393	continue;
394
395	MadeChange \|= runOnLoopBlock(BB, BECount, ExitBlocks);
396	}
397	return MadeChange;
398	}
399
400	static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
401	const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
402	return ConstStride->getAPInt();
403	}
404
405	/// getMemSetPatternValue - If a strided store of the specified value is safe to
406	/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
407	/// be passed in. Otherwise, return null.
408	///
409	/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
410	/// just replicate their input array and then pass on to memset_pattern16.
411	static Constant getMemSetPatternValue(Value V, const DataLayout *DL) {
412	// FIXME: This could check for UndefValue because it can be merged into any
413	// other valid pattern.
414
415	// If the value isn't a constant, we can't promote it to being in a constant
416	// array. We could theoretically do a store to an alloca or something, but
417	// that doesn't seem worthwhile.
418	Constant *C = dyn_cast<Constant>(V);
419	if (!C)
420	return nullptr;
421
422	// Only handle simple values that are a power of two bytes in size.
423	uint64_t Size = DL->getTypeSizeInBits(V->getType());
424	if (Size == 0 \|\| (Size & 7) \|\| (Size & (Size - 1)))
425	return nullptr;
426
427	// Don't care enough about darwin/ppc to implement this.
428	if (DL->isBigEndian())
429	return nullptr;
430
431	// Convert to size in bytes.
432	Size /= 8;
433
434	// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
435	// if the top and bottom are the same (e.g. for vectors and large integers).
436	if (Size > 16)
437	return nullptr;
438
439	// If the constant is exactly 16 bytes, just use it.
440	if (Size == 16)
441	return C;
442
443	// Otherwise, we'll use an array of the constants.
444	unsigned ArraySize = 16 / Size;
445	ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
446	return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
447	}
448
449	LoopIdiomRecognize::LegalStoreKind
450	LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
451	// Don't touch volatile stores.
452	if (SI->isVolatile())
453	return LegalStoreKind::None;
454	// We only want simple or unordered-atomic stores.
455	if (!SI->isUnordered())
456	return LegalStoreKind::None;
457
458	// Don't convert stores of non-integral pointer types to memsets (which stores
459	// integers).
460	if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
461	return LegalStoreKind::None;
462
463	// Avoid merging nontemporal stores.
464	if (SI->getMetadata(LLVMContext::MD_nontemporal))
465	return LegalStoreKind::None;
466
467	Value *StoredVal = SI->getValueOperand();
468	Value *StorePtr = SI->getPointerOperand();
469
470	// Reject stores that are so large that they overflow an unsigned.
471	uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
472	if ((SizeInBits & 7) \|\| (SizeInBits >> 32) != 0)
473	return LegalStoreKind::None;
474
475	// See if the pointer expression is an AddRec like {base,+,1} on the current
476	// loop, which indicates a strided store. If we have something else, it's a
477	// random store we can't handle.
478	const SCEVAddRecExpr *StoreEv =
479	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
480	if (!StoreEv \|\| StoreEv->getLoop() != CurLoop \|\| !StoreEv->isAffine())
481	return LegalStoreKind::None;
482
483	// Check to see if we have a constant stride.
484	if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
485	return LegalStoreKind::None;
486
487	// See if the store can be turned into a memset.
488
489	// If the stored value is a byte-wise value (like i32 -1), then it may be
490	// turned into a memset of i8 -1, assuming that all the consecutive bytes
491	// are stored. A store of i32 0x01020304 can never be turned into a memset,
492	// but it can be turned into memset_pattern if the target supports it.
493	Value SplatValue = isBytewiseValue(StoredVal, DL);
494	Constant *PatternValue = nullptr;
495
496	// Note: memset and memset_pattern on unordered-atomic is yet not supported
497	bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
498
499	// If we're allowed to form a memset, and the stored value would be
500	// acceptable for memset, use it.
501	if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
502	// Verify that the stored value is loop invariant. If not, we can't
503	// promote the memset.
504	CurLoop->isLoopInvariant(SplatValue)) {
505	// It looks like we can use SplatValue.
506	return LegalStoreKind::Memset;
507	} else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
508	// Don't create memset_pattern16s with address spaces.
509	StorePtr->getType()->getPointerAddressSpace() == 0 &&
510	(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
	Although the value stored to 'PatternValue' is used in the enclosing expression, the value is never actually read from 'PatternValue'
511	// It looks like we can use PatternValue!
512	return LegalStoreKind::MemsetPattern;
513	}
514
515	// Otherwise, see if the store can be turned into a memcpy.
516	if (HasMemcpy && !DisableLIRP::Memcpy) {
517	// Check to see if the stride matches the size of the store. If so, then we
518	// know that every byte is touched in the loop.
519	APInt Stride = getStoreStride(StoreEv);
520	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
521	if (StoreSize != Stride && StoreSize != -Stride)
522	return LegalStoreKind::None;
523
524	// The store must be feeding a non-volatile load.
525	LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
526
527	// Only allow non-volatile loads
528	if (!LI \|\| LI->isVolatile())
529	return LegalStoreKind::None;
530	// Only allow simple or unordered-atomic loads
531	if (!LI->isUnordered())
532	return LegalStoreKind::None;
533
534	// See if the pointer expression is an AddRec like {base,+,1} on the current
535	// loop, which indicates a strided load. If we have something else, it's a
536	// random load we can't handle.
537	const SCEVAddRecExpr *LoadEv =
538	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
539	if (!LoadEv \|\| LoadEv->getLoop() != CurLoop \|\| !LoadEv->isAffine())
540	return LegalStoreKind::None;
541
542	// The store and load must share the same stride.
543	if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
544	return LegalStoreKind::None;
545
546	// Success. This store can be converted into a memcpy.
547	UnorderedAtomic = UnorderedAtomic \|\| LI->isAtomic();
548	return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
549	: LegalStoreKind::Memcpy;
550	}
551	// This store can't be transformed into a memset/memcpy.
552	return LegalStoreKind::None;
553	}
554
555	void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
556	StoreRefsForMemset.clear();
557	StoreRefsForMemsetPattern.clear();
558	StoreRefsForMemcpy.clear();
559	for (Instruction &I : *BB) {
560	StoreInst *SI = dyn_cast<StoreInst>(&I);
561	if (!SI)
562	continue;
563
564	// Make sure this is a strided store with a constant stride.
565	switch (isLegalStore(SI)) {
566	case LegalStoreKind::None:
567	// Nothing to do
568	break;
569	case LegalStoreKind::Memset: {
570	// Find the base pointer.
571	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
572	StoreRefsForMemset[Ptr].push_back(SI);
573	} break;
574	case LegalStoreKind::MemsetPattern: {
575	// Find the base pointer.
576	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
577	StoreRefsForMemsetPattern[Ptr].push_back(SI);
578	} break;
579	case LegalStoreKind::Memcpy:
580	case LegalStoreKind::UnorderedAtomicMemcpy:
581	StoreRefsForMemcpy.push_back(SI);
582	break;
583	default:
584	assert(false && "unhandled return value")((false && "unhandled return value") ? static_cast< void> (0) : __assert_fail ("false && \"unhandled return value\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 584, __PRETTY_FUNCTION__));
585	break;
586	}
587	}
588	}
589
590	/// runOnLoopBlock - Process the specified block, which lives in a counted loop
591	/// with the specified backedge count. This block is known to be in the current
592	/// loop and not in any subloops.
593	bool LoopIdiomRecognize::runOnLoopBlock(
594	BasicBlock BB, const SCEV BECount,
595	SmallVectorImpl<BasicBlock *> &ExitBlocks) {
596	// We can only promote stores in this block if they are unconditionally
597	// executed in the loop. For a block to be unconditionally executed, it has
598	// to dominate all the exit blocks of the loop. Verify this now.
599	for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
600	if (!DT->dominates(BB, ExitBlocks[i]))
601	return false;
602
603	bool MadeChange = false;
604	// Look for store instructions, which may be optimized to memset/memcpy.
605	collectStores(BB);
606
607	// Look for a single store or sets of stores with a common base, which can be
608	// optimized into a memset (memset_pattern). The latter most commonly happens
609	// with structs and handunrolled loops.
610	for (auto &SL : StoreRefsForMemset)
611	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::Yes);
612
613	for (auto &SL : StoreRefsForMemsetPattern)
614	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::No);
615
616	// Optimize the store into a memcpy, if it feeds an similarly strided load.
617	for (auto &SI : StoreRefsForMemcpy)
618	MadeChange \|= processLoopStoreOfLoopLoad(SI, BECount);
619
620	for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
621	Instruction Inst = &I++;
622	// Look for memset instructions, which may be optimized to a larger memset.
623	if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
624	WeakTrackingVH InstPtr(&*I);
625	if (!processLoopMemSet(MSI, BECount))
626	continue;
627	MadeChange = true;
628
629	// If processing the memset invalidated our iterator, start over from the
630	// top of the block.
631	if (!InstPtr)
632	I = BB->begin();
633	continue;
634	}
635	}
636
637	return MadeChange;
638	}
639
640	/// See if this store(s) can be promoted to a memset.
641	bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
642	const SCEV *BECount, ForMemset For) {
643	// Try to find consecutive stores that can be transformed into memsets.
644	SetVector<StoreInst *> Heads, Tails;
645	SmallDenseMap<StoreInst , StoreInst > ConsecutiveChain;
646
647	// Do a quadratic search on all of the given stores and find
648	// all of the pairs of stores that follow each other.
649	SmallVector<unsigned, 16> IndexQueue;
650	for (unsigned i = 0, e = SL.size(); i < e; ++i) {
651	assert(SL[i]->isSimple() && "Expected only non-volatile stores.")((SL[i]->isSimple() && "Expected only non-volatile stores." ) ? static_cast<void> (0) : __assert_fail ("SL[i]->isSimple() && \"Expected only non-volatile stores.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 651, __PRETTY_FUNCTION__));
652
653	Value *FirstStoredVal = SL[i]->getValueOperand();
654	Value *FirstStorePtr = SL[i]->getPointerOperand();
655	const SCEVAddRecExpr *FirstStoreEv =
656	cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
657	APInt FirstStride = getStoreStride(FirstStoreEv);
658	unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
659
660	// See if we can optimize just this store in isolation.
661	if (FirstStride == FirstStoreSize \|\| -FirstStride == FirstStoreSize) {
662	Heads.insert(SL[i]);
663	continue;
664	}
665
666	Value *FirstSplatValue = nullptr;
667	Constant *FirstPatternValue = nullptr;
668
669	if (For == ForMemset::Yes)
670	FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
671	else
672	FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
673
674	assert((FirstSplatValue \|\| FirstPatternValue) &&(((FirstSplatValue \|\| FirstPatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(FirstSplatValue \|\| FirstPatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 675, __PRETTY_FUNCTION__))
675	"Expected either splat value or pattern value.")(((FirstSplatValue \|\| FirstPatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(FirstSplatValue \|\| FirstPatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 675, __PRETTY_FUNCTION__));
676
677	IndexQueue.clear();
678	// If a store has multiple consecutive store candidates, search Stores
679	// array according to the sequence: from i+1 to e, then from i-1 to 0.
680	// This is because usually pairing with immediate succeeding or preceding
681	// candidate create the best chance to find memset opportunity.
682	unsigned j = 0;
683	for (j = i + 1; j < e; ++j)
684	IndexQueue.push_back(j);
685	for (j = i; j > 0; --j)
686	IndexQueue.push_back(j - 1);
687
688	for (auto &k : IndexQueue) {
689	assert(SL[k]->isSimple() && "Expected only non-volatile stores.")((SL[k]->isSimple() && "Expected only non-volatile stores." ) ? static_cast<void> (0) : __assert_fail ("SL[k]->isSimple() && \"Expected only non-volatile stores.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 689, __PRETTY_FUNCTION__));
690	Value *SecondStorePtr = SL[k]->getPointerOperand();
691	const SCEVAddRecExpr *SecondStoreEv =
692	cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
693	APInt SecondStride = getStoreStride(SecondStoreEv);
694
695	if (FirstStride != SecondStride)
696	continue;
697
698	Value *SecondStoredVal = SL[k]->getValueOperand();
699	Value *SecondSplatValue = nullptr;
700	Constant *SecondPatternValue = nullptr;
701
702	if (For == ForMemset::Yes)
703	SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
704	else
705	SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
706
707	assert((SecondSplatValue \|\| SecondPatternValue) &&(((SecondSplatValue \|\| SecondPatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(SecondSplatValue \|\| SecondPatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 708, __PRETTY_FUNCTION__))
708	"Expected either splat value or pattern value.")(((SecondSplatValue \|\| SecondPatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(SecondSplatValue \|\| SecondPatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 708, __PRETTY_FUNCTION__));
709
710	if (isConsecutiveAccess(SL[i], SL[k], DL, SE, false)) {
711	if (For == ForMemset::Yes) {
712	if (isa<UndefValue>(FirstSplatValue))
713	FirstSplatValue = SecondSplatValue;
714	if (FirstSplatValue != SecondSplatValue)
715	continue;
716	} else {
717	if (isa<UndefValue>(FirstPatternValue))
718	FirstPatternValue = SecondPatternValue;
719	if (FirstPatternValue != SecondPatternValue)
720	continue;
721	}
722	Tails.insert(SL[k]);
723	Heads.insert(SL[i]);
724	ConsecutiveChain[SL[i]] = SL[k];
725	break;
726	}
727	}
728	}
729
730	// We may run into multiple chains that merge into a single chain. We mark the
731	// stores that we transformed so that we don't visit the same store twice.
732	SmallPtrSet<Value *, 16> TransformedStores;
733	bool Changed = false;
734
735	// For stores that start but don't end a link in the chain:
736	for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
737	it != e; ++it) {
738	if (Tails.count(*it))
739	continue;
740
741	// We found a store instr that starts a chain. Now follow the chain and try
742	// to transform it.
743	SmallPtrSet<Instruction *, 8> AdjacentStores;
744	StoreInst I = it;
745
746	StoreInst *HeadStore = I;
747	unsigned StoreSize = 0;
748
749	// Collect the chain into a list.
750	while (Tails.count(I) \|\| Heads.count(I)) {
751	if (TransformedStores.count(I))
752	break;
753	AdjacentStores.insert(I);
754
755	StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
756	// Move to the next value in the chain.
757	I = ConsecutiveChain[I];
758	}
759
760	Value *StoredVal = HeadStore->getValueOperand();
761	Value *StorePtr = HeadStore->getPointerOperand();
762	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
763	APInt Stride = getStoreStride(StoreEv);
764
765	// Check to see if the stride matches the size of the stores. If so, then
766	// we know that every byte is touched in the loop.
767	if (StoreSize != Stride && StoreSize != -Stride)
768	continue;
769
770	bool NegStride = StoreSize == -Stride;
771
772	if (processLoopStridedStore(StorePtr, StoreSize,
773	MaybeAlign(HeadStore->getAlignment()),
774	StoredVal, HeadStore, AdjacentStores, StoreEv,
775	BECount, NegStride)) {
776	TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
777	Changed = true;
778	}
779	}
780
781	return Changed;
782	}
783
784	/// processLoopMemSet - See if this memset can be promoted to a large memset.
785	bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
786	const SCEV *BECount) {
787	// We can only handle non-volatile memsets with a constant size.
788	if (MSI->isVolatile() \|\| !isa<ConstantInt>(MSI->getLength()))
789	return false;
790
791	// If we're not allowed to hack on memset, we fail.
792	if (!HasMemset)
793	return false;
794
795	Value *Pointer = MSI->getDest();
796
797	// See if the pointer expression is an AddRec like {base,+,1} on the current
798	// loop, which indicates a strided store. If we have something else, it's a
799	// random store we can't handle.
800	const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
801	if (!Ev \|\| Ev->getLoop() != CurLoop \|\| !Ev->isAffine())
802	return false;
803
804	// Reject memsets that are so large that they overflow an unsigned.
805	uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
806	if ((SizeInBytes >> 32) != 0)
807	return false;
808
809	// Check to see if the stride matches the size of the memset. If so, then we
810	// know that every byte is touched in the loop.
811	const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
812	if (!ConstStride)
813	return false;
814
815	APInt Stride = ConstStride->getAPInt();
816	if (SizeInBytes != Stride && SizeInBytes != -Stride)
817	return false;
818
819	// Verify that the memset value is loop invariant. If not, we can't promote
820	// the memset.
821	Value *SplatValue = MSI->getValue();
822	if (!SplatValue \|\| !CurLoop->isLoopInvariant(SplatValue))
823	return false;
824
825	SmallPtrSet<Instruction *, 1> MSIs;
826	MSIs.insert(MSI);
827	bool NegStride = SizeInBytes == -Stride;
828	return processLoopStridedStore(
829	Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
830	SplatValue, MSI, MSIs, Ev, BECount, NegStride, /IsLoopMemset=/true);
831	}
832
833	/// mayLoopAccessLocation - Return true if the specified loop might access the
834	/// specified pointer location, which is a loop-strided access. The 'Access'
835	/// argument specifies what the verboten forms of access are (read or write).
836	static bool
837	mayLoopAccessLocation(Value Ptr, ModRefInfo Access, Loop L,
838	const SCEV *BECount, unsigned StoreSize,
839	AliasAnalysis &AA,
840	SmallPtrSetImpl<Instruction *> &IgnoredStores) {
841	// Get the location that may be stored across the loop. Since the access is
842	// strided positively through memory, we say that the modified location starts
843	// at the pointer and has infinite size.
844	LocationSize AccessSize = LocationSize::unknown();
845
846	// If the loop iterates a fixed number of times, we can refine the access size
847	// to be exactly the size of the memset, which is (BECount+1)*StoreSize
848	if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
849	AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
850	StoreSize);
851
852	// TODO: For this to be really effective, we have to dive into the pointer
853	// operand in the store. Store to &A[i] of 100 will always return may alias
854	// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
855	// which will then no-alias a store to &A[100].
856	MemoryLocation StoreLoc(Ptr, AccessSize);
857
858	for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
859	++BI)
860	for (Instruction &I : **BI)
861	if (IgnoredStores.count(&I) == 0 &&
862	isModOrRefSet(
863	intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
864	return true;
865
866	return false;
867	}
868
869	// If we have a negative stride, Start refers to the end of the memory location
870	// we're trying to memset. Therefore, we need to recompute the base pointer,
871	// which is just Start - BECount*Size.
872	static const SCEV getStartForNegStride(const SCEV Start, const SCEV *BECount,
873	Type *IntPtr, unsigned StoreSize,
874	ScalarEvolution *SE) {
875	const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
876	if (StoreSize != 1)
877	Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
878	SCEV::FlagNUW);
879	return SE->getMinusSCEV(Start, Index);
880	}
881
882	/// Compute the number of bytes as a SCEV from the backedge taken count.
883	///
884	/// This also maps the SCEV into the provided type and tries to handle the
885	/// computation in a way that will fold cleanly.
886	static const SCEV getNumBytes(const SCEV BECount, Type *IntPtr,
887	unsigned StoreSize, Loop *CurLoop,
888	const DataLayout DL, ScalarEvolution SE) {
889	const SCEV *NumBytesS;
890	// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
891	// pointer size if it isn't already.
892	//
893	// If we're going to need to zero extend the BE count, check if we can add
894	// one to it prior to zero extending without overflow. Provided this is safe,
895	// it allows better simplification of the +1.
896	if (DL->getTypeSizeInBits(BECount->getType()) <
897	DL->getTypeSizeInBits(IntPtr) &&
898	SE->isLoopEntryGuardedByCond(
899	CurLoop, ICmpInst::ICMP_NE, BECount,
900	SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
901	NumBytesS = SE->getZeroExtendExpr(
902	SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
903	IntPtr);
904	} else {
905	NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
906	SE->getOne(IntPtr), SCEV::FlagNUW);
907	}
908
909	// And scale it based on the store size.
910	if (StoreSize != 1) {
911	NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
912	SCEV::FlagNUW);
913	}
914	return NumBytesS;
915	}
916
917	/// processLoopStridedStore - We see a strided store of some value. If we can
918	/// transform this into a memset or memset_pattern in the loop preheader, do so.
919	bool LoopIdiomRecognize::processLoopStridedStore(
920	Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
921	Value StoredVal, Instruction TheStore,
922	SmallPtrSetImpl<Instruction > &Stores, const SCEVAddRecExpr Ev,
923	const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
924	Value SplatValue = isBytewiseValue(StoredVal, DL);
925	Constant *PatternValue = nullptr;
926
927	if (!SplatValue)
928	PatternValue = getMemSetPatternValue(StoredVal, DL);
929
930	assert((SplatValue \|\| PatternValue) &&(((SplatValue \|\| PatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(SplatValue \|\| PatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 931, __PRETTY_FUNCTION__))
931	"Expected either splat value or pattern value.")(((SplatValue \|\| PatternValue) && "Expected either splat value or pattern value." ) ? static_cast<void> (0) : __assert_fail ("(SplatValue \|\| PatternValue) && \"Expected either splat value or pattern value.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 931, __PRETTY_FUNCTION__));
932
933	// The trip count of the loop and the base pointer of the addrec SCEV is
934	// guaranteed to be loop invariant, which means that it should dominate the
935	// header. This allows us to insert code for it in the preheader.
936	unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
937	BasicBlock *Preheader = CurLoop->getLoopPreheader();
938	IRBuilder<> Builder(Preheader->getTerminator());
939	SCEVExpander Expander(SE, DL, "loop-idiom");
940	SCEVExpanderCleaner ExpCleaner(Expander, *DT);
941
942	Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
943	Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
944
945	bool Changed = false;
946	const SCEV *Start = Ev->getStart();
947	// Handle negative strided loops.
948	if (NegStride)
949	Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
950
951	// TODO: ideally we should still be able to generate memset if SCEV expander
952	// is taught to generate the dependencies at the latest point.
953	if (!isSafeToExpand(Start, *SE))
954	return Changed;
955
956	// Okay, we have a strided store "p[i]" of a splattable value. We can turn
957	// this into a memset in the loop preheader now if we want. However, this
958	// would be unsafe to do if there is anything else in the loop that may read
959	// or write to the aliased location. Check for any overlap by generating the
960	// base pointer and checking the region.
961	Value *BasePtr =
962	Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
963
964	// From here on out, conservatively report to the pass manager that we've
965	// changed the IR, even if we later clean up these added instructions. There
966	// may be structural differences e.g. in the order of use lists not accounted
967	// for in just a textual dump of the IR. This is written as a variable, even
968	// though statically all the places this dominates could be replaced with
969	// 'true', with the hope that anyone trying to be clever / "more precise" with
970	// the return value will read this comment, and leave them alone.
971	Changed = true;
972
973	if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
974	StoreSize, *AA, Stores))
975	return Changed;
976
977	if (avoidLIRForMultiBlockLoop(/IsMemset=/true, IsLoopMemset))
978	return Changed;
979
980	// Okay, everything looks good, insert the memset.
981
982	const SCEV *NumBytesS =
983	getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
984
985	// TODO: ideally we should still be able to generate memset if SCEV expander
986	// is taught to generate the dependencies at the latest point.
987	if (!isSafeToExpand(NumBytesS, *SE))
988	return Changed;
989
990	Value *NumBytes =
991	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
992
993	CallInst *NewCall;
994	if (SplatValue) {
995	NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
996	MaybeAlign(StoreAlignment));
997	} else {
998	// Everything is emitted in default address space
999	Type *Int8PtrTy = DestInt8PtrTy;
1000
1001	Module *M = TheStore->getModule();
1002	StringRef FuncName = "memset_pattern16";
1003	FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
1004	Int8PtrTy, Int8PtrTy, IntIdxTy);
1005	inferLibFuncAttributes(M, FuncName, *TLI);
1006
1007	// Otherwise we should form a memset_pattern16. PatternValue is known to be
1008	// an constant array of 16-bytes. Plop the value into a mergable global.
1009	GlobalVariable GV = new GlobalVariable(M, PatternValue->getType(), true,
1010	GlobalValue::PrivateLinkage,
1011	PatternValue, ".memset_pattern");
1012	GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1013	GV->setAlignment(Align(16));
1014	Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
1015	NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1016	}
1017	NewCall->setDebugLoc(TheStore->getDebugLoc());
1018
1019	if (MSSAU) {
1020	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1021	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1022	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1023	}
1024
1025	LLVM_DEBUG(dbgs() << " Formed memset: " << NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << TheStore << "\n"; } } while (false)
1026	<< " from store to: " << Ev << " at: " << TheStoredo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << *TheStore << "\n"; } } while (false)
1027	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << *TheStore << "\n"; } } while (false);
1028
1029	ORE.emit([&]() {
1030	return OptimizationRemark(DEBUG_TYPE"loop-idiom", "ProcessLoopStridedStore",
1031	NewCall->getDebugLoc(), Preheader)
1032	<< "Transformed loop-strided store into a call to "
1033	<< ore::NV("NewFunction", NewCall->getCalledFunction())
1034	<< "() function";
1035	});
1036
1037	// Okay, the memset has been formed. Zap the original store and anything that
1038	// feeds into it.
1039	for (auto *I : Stores) {
1040	if (MSSAU)
1041	MSSAU->removeMemoryAccess(I, true);
1042	deleteDeadInstruction(I);
1043	}
1044	if (MSSAU && VerifyMemorySSA)
1045	MSSAU->getMemorySSA()->verifyMemorySSA();
1046	++NumMemSet;
1047	ExpCleaner.markResultUsed();
1048	return true;
1049	}
1050
1051	/// If the stored value is a strided load in the same loop with the same stride
1052	/// this may be transformable into a memcpy. This kicks in for stuff like
1053	/// for (i) A[i] = B[i];
1054	bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1055	const SCEV *BECount) {
1056	assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.")((SI->isUnordered() && "Expected only non-volatile non-ordered stores." ) ? static_cast<void> (0) : __assert_fail ("SI->isUnordered() && \"Expected only non-volatile non-ordered stores.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 1056, __PRETTY_FUNCTION__));
1057
1058	Value *StorePtr = SI->getPointerOperand();
1059	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1060	APInt Stride = getStoreStride(StoreEv);
1061	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1062	bool NegStride = StoreSize == -Stride;
1063
1064	// The store must be feeding a non-volatile load.
1065	LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1066	assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.")((LI->isUnordered() && "Expected only non-volatile non-ordered loads." ) ? static_cast<void> (0) : __assert_fail ("LI->isUnordered() && \"Expected only non-volatile non-ordered loads.\"" , "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 1066, __PRETTY_FUNCTION__));
1067
1068	// See if the pointer expression is an AddRec like {base,+,1} on the current
1069	// loop, which indicates a strided load. If we have something else, it's a
1070	// random load we can't handle.
1071	const SCEVAddRecExpr *LoadEv =
1072	cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1073
1074	// The trip count of the loop and the base pointer of the addrec SCEV is
1075	// guaranteed to be loop invariant, which means that it should dominate the
1076	// header. This allows us to insert code for it in the preheader.
1077	BasicBlock *Preheader = CurLoop->getLoopPreheader();
1078	IRBuilder<> Builder(Preheader->getTerminator());
1079	SCEVExpander Expander(SE, DL, "loop-idiom");
1080
1081	SCEVExpanderCleaner ExpCleaner(Expander, *DT);
1082
1083	bool Changed = false;
1084	const SCEV *StrStart = StoreEv->getStart();
1085	unsigned StrAS = SI->getPointerAddressSpace();
1086	Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1087
1088	// Handle negative strided loops.
1089	if (NegStride)
1090	StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
1091
1092	// Okay, we have a strided store "p[i]" of a loaded value. We can turn
1093	// this into a memcpy in the loop preheader now if we want. However, this
1094	// would be unsafe to do if there is anything else in the loop that may read
1095	// or write the memory region we're storing to. This includes the load that
1096	// feeds the stores. Check for an alias by generating the base address and
1097	// checking everything.
1098	Value *StoreBasePtr = Expander.expandCodeFor(
1099	StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1100
1101	// From here on out, conservatively report to the pass manager that we've
1102	// changed the IR, even if we later clean up these added instructions. There
1103	// may be structural differences e.g. in the order of use lists not accounted
1104	// for in just a textual dump of the IR. This is written as a variable, even
1105	// though statically all the places this dominates could be replaced with
1106	// 'true', with the hope that anyone trying to be clever / "more precise" with
1107	// the return value will read this comment, and leave them alone.
1108	Changed = true;
1109
1110	SmallPtrSet<Instruction *, 1> Stores;
1111	Stores.insert(SI);
1112	if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1113	StoreSize, *AA, Stores))
1114	return Changed;
1115
1116	const SCEV *LdStart = LoadEv->getStart();
1117	unsigned LdAS = LI->getPointerAddressSpace();
1118
1119	// Handle negative strided loops.
1120	if (NegStride)
1121	LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
1122
1123	// For a memcpy, we have to make sure that the input array is not being
1124	// mutated by the loop.
1125	Value *LoadBasePtr = Expander.expandCodeFor(
1126	LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1127
1128	if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1129	StoreSize, *AA, Stores))
1130	return Changed;
1131
1132	if (avoidLIRForMultiBlockLoop())
1133	return Changed;
1134
1135	// Okay, everything is safe, we can transform this!
1136
1137	const SCEV *NumBytesS =
1138	getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
1139
1140	Value *NumBytes =
1141	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1142
1143	CallInst *NewCall = nullptr;
1144	// Check whether to generate an unordered atomic memcpy:
1145	// If the load or store are atomic, then they must necessarily be unordered
1146	// by previous checks.
1147	if (!SI->isAtomic() && !LI->isAtomic())
1148	NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
1149	LI->getAlign(), NumBytes);
1150	else {
1151	// We cannot allow unaligned ops for unordered load/store, so reject
1152	// anything where the alignment isn't at least the element size.
1153	const Align StoreAlign = SI->getAlign();
1154	const Align LoadAlign = LI->getAlign();
1155	if (StoreAlign < StoreSize \|\| LoadAlign < StoreSize)
1156	return Changed;
1157
1158	// If the element.atomic memcpy is not lowered into explicit
1159	// loads/stores later, then it will be lowered into an element-size
1160	// specific lib call. If the lib call doesn't exist for our store size, then
1161	// we shouldn't generate the memcpy.
1162	if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1163	return Changed;
1164
1165	// Create the call.
1166	// Note that unordered atomic loads/stores are required by the spec to
1167	// have an alignment but non-atomic loads/stores may not.
1168	NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1169	StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1170	StoreSize);
1171	}
1172	NewCall->setDebugLoc(SI->getDebugLoc());
1173
1174	if (MSSAU) {
1175	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1176	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1177	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1178	}
1179
1180	LLVM_DEBUG(dbgs() << " Formed memcpy: " << NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memcpy: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << LI << "\n" << " from store ptr=" << StoreEv << " at: " << SI << "\n"; } } while (false)
1181	<< " from load ptr=" << LoadEv << " at: " << LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memcpy: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << LI << "\n" << " from store ptr=" << StoreEv << " at: " << *SI << "\n"; } } while (false)
1182	<< " from store ptr=" << StoreEv << " at: " << SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memcpy: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << LI << "\n" << " from store ptr=" << StoreEv << " at: " << *SI << "\n"; } } while (false)
1183	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memcpy: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << LI << "\n" << " from store ptr=" << StoreEv << " at: " << *SI << "\n"; } } while (false);
1184
1185	ORE.emit([&]() {
1186	return OptimizationRemark(DEBUG_TYPE"loop-idiom", "ProcessLoopStoreOfLoopLoad",
1187	NewCall->getDebugLoc(), Preheader)
1188	<< "Formed a call to "
1189	<< ore::NV("NewFunction", NewCall->getCalledFunction())
1190	<< "() function";
1191	});
1192
1193	// Okay, the memcpy has been formed. Zap the original store and anything that
1194	// feeds into it.
1195	if (MSSAU)
1196	MSSAU->removeMemoryAccess(SI, true);
1197	deleteDeadInstruction(SI);
1198	if (MSSAU && VerifyMemorySSA)
1199	MSSAU->getMemorySSA()->verifyMemorySSA();
1200	++NumMemCpy;
1201	ExpCleaner.markResultUsed();
1202	return true;
1203	}
1204
1205	// When compiling for codesize we avoid idiom recognition for a multi-block loop
1206	// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1207	//
1208	bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1209	bool IsLoopMemset) {
1210	if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1211	if (!CurLoop->getParentLoop() && (!IsMemset \|\| !IsLoopMemset)) {
1212	LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false)
1213	<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false)
1214	<< " avoided: multi-block top-level loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false);
1215	return true;
1216	}
1217	}
1218
1219	return false;
1220	}
1221
1222	bool LoopIdiomRecognize::runOnNoncountableLoop() {
1223	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1224	<< CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1225	<< "] Noncountable Loop %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1226	<< CurLoop->getHeader()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false);
1227
1228	return recognizePopcount() \|\| recognizeAndInsertFFS();
1229	}
1230
1231	/// Check if the given conditional branch is based on the comparison between
1232	/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1233	/// true), the control yields to the loop entry. If the branch matches the
1234	/// behavior, the variable involved in the comparison is returned. This function
1235	/// will be called to see if the precondition and postcondition of the loop are
1236	/// in desirable form.
1237	static Value matchCondition(BranchInst BI, BasicBlock *LoopEntry,
1238	bool JmpOnZero = false) {
1239	if (!BI \|\| !BI->isConditional())
1240	return nullptr;
1241
1242	ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1243	if (!Cond)
1244	return nullptr;
1245
1246	ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1247	if (!CmpZero \|\| !CmpZero->isZero())
1248	return nullptr;
1249
1250	BasicBlock *TrueSucc = BI->getSuccessor(0);
1251	BasicBlock *FalseSucc = BI->getSuccessor(1);
1252	if (JmpOnZero)
1253	std::swap(TrueSucc, FalseSucc);
1254
1255	ICmpInst::Predicate Pred = Cond->getPredicate();
1256	if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) \|\|
1257	(Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1258	return Cond->getOperand(0);
1259
1260	return nullptr;
1261	}
1262
1263	// Check if the recurrence variable `VarX` is in the right form to create
1264	// the idiom. Returns the value coerced to a PHINode if so.
1265	static PHINode getRecurrenceVar(Value VarX, Instruction *DefX,
1266	BasicBlock *LoopEntry) {
1267	auto *PhiX = dyn_cast<PHINode>(VarX);
1268	if (PhiX && PhiX->getParent() == LoopEntry &&
1269	(PhiX->getOperand(0) == DefX \|\| PhiX->getOperand(1) == DefX))
1270	return PhiX;
1271	return nullptr;
1272	}
1273
1274	/// Return true iff the idiom is detected in the loop.
1275	///
1276	/// Additionally:
1277	/// 1) \p CntInst is set to the instruction counting the population bit.
1278	/// 2) \p CntPhi is set to the corresponding phi node.
1279	/// 3) \p Var is set to the value whose population bits are being counted.
1280	///
1281	/// The core idiom we are trying to detect is:
1282	/// \code
1283	/// if (x0 != 0)
1284	/// goto loop-exit // the precondition of the loop
1285	/// cnt0 = init-val;
1286	/// do {
1287	/// x1 = phi (x0, x2);
1288	/// cnt1 = phi(cnt0, cnt2);
1289	///
1290	/// cnt2 = cnt1 + 1;
1291	/// ...
1292	/// x2 = x1 & (x1 - 1);
1293	/// ...
1294	/// } while(x != 0);
1295	///
1296	/// loop-exit:
1297	/// \endcode
1298	static bool detectPopcountIdiom(Loop CurLoop, BasicBlock PreCondBB,
1299	Instruction &CntInst, PHINode &CntPhi,
1300	Value *&Var) {
1301	// step 1: Check to see if the look-back branch match this pattern:
1302	// "if (a!=0) goto loop-entry".
1303	BasicBlock *LoopEntry;
1304	Instruction DefX2, CountInst;
1305	Value VarX1, VarX0;
1306	PHINode PhiX, CountPhi;
1307
1308	DefX2 = CountInst = nullptr;
1309	VarX1 = VarX0 = nullptr;
1310	PhiX = CountPhi = nullptr;
1311	LoopEntry = *(CurLoop->block_begin());
1312
1313	// step 1: Check if the loop-back branch is in desirable form.
1314	{
1315	if (Value *T = matchCondition(
1316	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1317	DefX2 = dyn_cast<Instruction>(T);
1318	else
1319	return false;
1320	}
1321
1322	// step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1323	{
1324	if (!DefX2 \|\| DefX2->getOpcode() != Instruction::And)
1325	return false;
1326
1327	BinaryOperator *SubOneOp;
1328
1329	if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1330	VarX1 = DefX2->getOperand(1);
1331	else {
1332	VarX1 = DefX2->getOperand(0);
1333	SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1334	}
1335	if (!SubOneOp \|\| SubOneOp->getOperand(0) != VarX1)
1336	return false;
1337
1338	ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1339	if (!Dec \|\|
1340	!((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) \|\|
1341	(SubOneOp->getOpcode() == Instruction::Add &&
1342	Dec->isMinusOne()))) {
1343	return false;
1344	}
1345	}
1346
1347	// step 3: Check the recurrence of variable X
1348	PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1349	if (!PhiX)
1350	return false;
1351
1352	// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1353	{
1354	CountInst = nullptr;
1355	for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1356	IterE = LoopEntry->end();
1357	Iter != IterE; Iter++) {
1358	Instruction Inst = &Iter;
1359	if (Inst->getOpcode() != Instruction::Add)
1360	continue;
1361
1362	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1363	if (!Inc \|\| !Inc->isOne())
1364	continue;
1365
1366	PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1367	if (!Phi)
1368	continue;
1369
1370	// Check if the result of the instruction is live of the loop.
1371	bool LiveOutLoop = false;
1372	for (User *U : Inst->users()) {
1373	if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1374	LiveOutLoop = true;
1375	break;
1376	}
1377	}
1378
1379	if (LiveOutLoop) {
1380	CountInst = Inst;
1381	CountPhi = Phi;
1382	break;
1383	}
1384	}
1385
1386	if (!CountInst)
1387	return false;
1388	}
1389
1390	// step 5: check if the precondition is in this form:
1391	// "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1392	{
1393	auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1394	Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1395	if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1396	return false;
1397
1398	CntInst = CountInst;
1399	CntPhi = CountPhi;
1400	Var = T;
1401	}
1402
1403	return true;
1404	}
1405
1406	/// Return true if the idiom is detected in the loop.
1407	///
1408	/// Additionally:
1409	/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1410	/// or nullptr if there is no such.
1411	/// 2) \p CntPhi is set to the corresponding phi node
1412	/// or nullptr if there is no such.
1413	/// 3) \p Var is set to the value whose CTLZ could be used.
1414	/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1415	///
1416	/// The core idiom we are trying to detect is:
1417	/// \code
1418	/// if (x0 == 0)
1419	/// goto loop-exit // the precondition of the loop
1420	/// cnt0 = init-val;
1421	/// do {
1422	/// x = phi (x0, x.next); //PhiX
1423	/// cnt = phi(cnt0, cnt.next);
1424	///
1425	/// cnt.next = cnt + 1;
1426	/// ...
1427	/// x.next = x >> 1; // DefX
1428	/// ...
1429	/// } while(x.next != 0);
1430	///
1431	/// loop-exit:
1432	/// \endcode
1433	static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1434	Intrinsic::ID &IntrinID, Value *&InitX,
1435	Instruction &CntInst, PHINode &CntPhi,
1436	Instruction *&DefX) {
1437	BasicBlock *LoopEntry;
1438	Value *VarX = nullptr;
1439
1440	DefX = nullptr;
1441	CntInst = nullptr;
1442	CntPhi = nullptr;
1443	LoopEntry = *(CurLoop->block_begin());
1444
1445	// step 1: Check if the loop-back branch is in desirable form.
1446	if (Value *T = matchCondition(
1447	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1448	DefX = dyn_cast<Instruction>(T);
1449	else
1450	return false;
1451
1452	// step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1453	if (!DefX \|\| !DefX->isShift())
1454	return false;
1455	IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1456	Intrinsic::ctlz;
1457	ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1458	if (!Shft \|\| !Shft->isOne())
1459	return false;
1460	VarX = DefX->getOperand(0);
1461
1462	// step 3: Check the recurrence of variable X
1463	PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1464	if (!PhiX)
1465	return false;
1466
1467	InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1468
1469	// Make sure the initial value can't be negative otherwise the ashr in the
1470	// loop might never reach zero which would make the loop infinite.
1471	if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1472	return false;
1473
1474	// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1475	// TODO: We can skip the step. If loop trip count is known (CTLZ),
1476	// then all uses of "cnt.next" could be optimized to the trip count
1477	// plus "cnt0". Currently it is not optimized.
1478	// This step could be used to detect POPCNT instruction:
1479	// cnt.next = cnt + (x.next & 1)
1480	for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1481	IterE = LoopEntry->end();
1482	Iter != IterE; Iter++) {
1483	Instruction Inst = &Iter;
1484	if (Inst->getOpcode() != Instruction::Add)
1485	continue;
1486
1487	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1488	if (!Inc \|\| !Inc->isOne())
1489	continue;
1490
1491	PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1492	if (!Phi)
1493	continue;
1494
1495	CntInst = Inst;
1496	CntPhi = Phi;
1497	break;
1498	}
1499	if (!CntInst)
1500	return false;
1501
1502	return true;
1503	}
1504
1505	/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1506	/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1507	/// trip count returns true; otherwise, returns false.
1508	bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1509	// Give up if the loop has multiple blocks or multiple backedges.
1510	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
1511	return false;
1512
1513	Intrinsic::ID IntrinID;
1514	Value *InitX;
1515	Instruction *DefX = nullptr;
1516	PHINode *CntPhi = nullptr;
1517	Instruction *CntInst = nullptr;
1518	// Help decide if transformation is profitable. For ShiftUntilZero idiom,
1519	// this is always 6.
1520	size_t IdiomCanonicalSize = 6;
1521
1522	if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1523	CntInst, CntPhi, DefX))
1524	return false;
1525
1526	bool IsCntPhiUsedOutsideLoop = false;
1527	for (User *U : CntPhi->users())
1528	if (!CurLoop->contains(cast<Instruction>(U))) {
1529	IsCntPhiUsedOutsideLoop = true;
1530	break;
1531	}
1532	bool IsCntInstUsedOutsideLoop = false;
1533	for (User *U : CntInst->users())
1534	if (!CurLoop->contains(cast<Instruction>(U))) {
1535	IsCntInstUsedOutsideLoop = true;
1536	break;
1537	}
1538	// If both CntInst and CntPhi are used outside the loop the profitability
1539	// is questionable.
1540	if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1541	return false;
1542
1543	// For some CPUs result of CTLZ(X) intrinsic is undefined
1544	// when X is 0. If we can not guarantee X != 0, we need to check this
1545	// when expand.
1546	bool ZeroCheck = false;
1547	// It is safe to assume Preheader exist as it was checked in
1548	// parent function RunOnLoop.
1549	BasicBlock *PH = CurLoop->getLoopPreheader();
1550
1551	// If we are using the count instruction outside the loop, make sure we
1552	// have a zero check as a precondition. Without the check the loop would run
1553	// one iteration for before any check of the input value. This means 0 and 1
1554	// would have identical behavior in the original loop and thus
1555	if (!IsCntPhiUsedOutsideLoop) {
1556	auto *PreCondBB = PH->getSinglePredecessor();
1557	if (!PreCondBB)
1558	return false;
1559	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1560	if (!PreCondBI)
1561	return false;
1562	if (matchCondition(PreCondBI, PH) != InitX)
1563	return false;
1564	ZeroCheck = true;
1565	}
1566
1567	// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1568	// profitable if we delete the loop.
1569
1570	// the loop has only 6 instructions:
1571	// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1572	// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1573	// %shr = ashr %n.addr.0, 1
1574	// %tobool = icmp eq %shr, 0
1575	// %inc = add nsw %i.0, 1
1576	// br i1 %tobool
1577
1578	const Value *Args[] = {
1579	InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1580	: ConstantInt::getFalse(InitX->getContext())};
1581
1582	// @llvm.dbg doesn't count as they have no semantic effect.
1583	auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1584	uint32_t HeaderSize =
1585	std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1586
1587	IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1588	int Cost =
1589	TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1590	if (HeaderSize != IdiomCanonicalSize &&
1591	Cost > TargetTransformInfo::TCC_Basic)
1592	return false;
1593
1594	transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1595	DefX->getDebugLoc(), ZeroCheck,
1596	IsCntPhiUsedOutsideLoop);
1597	return true;
1598	}
1599
1600	/// Recognizes a population count idiom in a non-countable loop.
1601	///
1602	/// If detected, transforms the relevant code to issue the popcount intrinsic
1603	/// function call, and returns true; otherwise, returns false.
1604	bool LoopIdiomRecognize::recognizePopcount() {
1605	if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
1606	return false;
1607
1608	// Counting population are usually conducted by few arithmetic instructions.
1609	// Such instructions can be easily "absorbed" by vacant slots in a
1610	// non-compact loop. Therefore, recognizing popcount idiom only makes sense
1611	// in a compact loop.
1612
1613	// Give up if the loop has multiple blocks or multiple backedges.
1614	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
1615	return false;
1616
1617	BasicBlock LoopBody = (CurLoop->block_begin());
1618	if (LoopBody->size() >= 20) {
1619	// The loop is too big, bail out.
1620	return false;
1621	}
1622
1623	// It should have a preheader containing nothing but an unconditional branch.
1624	BasicBlock *PH = CurLoop->getLoopPreheader();
1625	if (!PH \|\| &PH->front() != PH->getTerminator())
1626	return false;
1627	auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1628	if (!EntryBI \|\| EntryBI->isConditional())
1629	return false;
1630
1631	// It should have a precondition block where the generated popcount intrinsic
1632	// function can be inserted.
1633	auto *PreCondBB = PH->getSinglePredecessor();
1634	if (!PreCondBB)
1635	return false;
1636	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1637	if (!PreCondBI \|\| PreCondBI->isUnconditional())
1638	return false;
1639
1640	Instruction *CntInst;
1641	PHINode *CntPhi;
1642	Value *Val;
1643	if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1644	return false;
1645
1646	transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1647	return true;
1648	}
1649
1650	static CallInst createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value Val,
1651	const DebugLoc &DL) {
1652	Value *Ops[] = {Val};
1653	Type *Tys[] = {Val->getType()};
1654
1655	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1656	Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1657	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1658	CI->setDebugLoc(DL);
1659
1660	return CI;
1661	}
1662
1663	static CallInst createFFSIntrinsic(IRBuilder<> &IRBuilder, Value Val,
1664	const DebugLoc &DL, bool ZeroCheck,
1665	Intrinsic::ID IID) {
1666	Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1667	Type *Tys[] = {Val->getType()};
1668
1669	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1670	Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1671	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1672	CI->setDebugLoc(DL);
1673
1674	return CI;
1675	}
1676
1677	/// Transform the following loop (Using CTLZ, CTTZ is similar):
1678	/// loop:
1679	/// CntPhi = PHI [Cnt0, CntInst]
1680	/// PhiX = PHI [InitX, DefX]
1681	/// CntInst = CntPhi + 1
1682	/// DefX = PhiX >> 1
1683	/// LOOP_BODY
1684	/// Br: loop if (DefX != 0)
1685	/// Use(CntPhi) or Use(CntInst)
1686	///
1687	/// Into:
1688	/// If CntPhi used outside the loop:
1689	/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1690	/// Count = CountPrev + 1
1691	/// else
1692	/// Count = BitWidth(InitX) - CTLZ(InitX)
1693	/// loop:
1694	/// CntPhi = PHI [Cnt0, CntInst]
1695	/// PhiX = PHI [InitX, DefX]
1696	/// PhiCount = PHI [Count, Dec]
1697	/// CntInst = CntPhi + 1
1698	/// DefX = PhiX >> 1
1699	/// Dec = PhiCount - 1
1700	/// LOOP_BODY
1701	/// Br: loop if (Dec != 0)
1702	/// Use(CountPrev + Cnt0) // Use(CntPhi)
1703	/// or
1704	/// Use(Count + Cnt0) // Use(CntInst)
1705	///
1706	/// If LOOP_BODY is empty the loop will be deleted.
1707	/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1708	void LoopIdiomRecognize::transformLoopToCountable(
1709	Intrinsic::ID IntrinID, BasicBlock Preheader, Instruction CntInst,
1710	PHINode CntPhi, Value InitX, Instruction *DefX, const DebugLoc &DL,
1711	bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1712	BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1713
1714	// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1715	IRBuilder<> Builder(PreheaderBr);
1716	Builder.SetCurrentDebugLocation(DL);
1717	Value FFS, Count, CountPrev, NewCount, *InitXNext;
1718
1719	// Count = BitWidth - CTLZ(InitX);
1720	// If there are uses of CntPhi create:
1721	// CountPrev = BitWidth - CTLZ(InitX >> 1);
1722	if (IsCntPhiUsedOutsideLoop) {
1723	if (DefX->getOpcode() == Instruction::AShr)
1724	InitXNext =
1725	Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1726	else if (DefX->getOpcode() == Instruction::LShr)
1727	InitXNext =
1728	Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1729	else if (DefX->getOpcode() == Instruction::Shl) // cttz
1730	InitXNext =
1731	Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1732	else
1733	llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-12~++20200911111112+c0825fa5fc3/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 1733);
1734	} else
1735	InitXNext = InitX;
1736	FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1737	Count = Builder.CreateSub(
1738	ConstantInt::get(FFS->getType(),
1739	FFS->getType()->getIntegerBitWidth()),
1740	FFS);
1741	if (IsCntPhiUsedOutsideLoop) {
1742	CountPrev = Count;
1743	Count = Builder.CreateAdd(
1744	CountPrev,
1745	ConstantInt::get(CountPrev->getType(), 1));
1746	}
1747
1748	NewCount = Builder.CreateZExtOrTrunc(
1749	IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1750	cast<IntegerType>(CntInst->getType()));
1751
1752	// If the counter's initial value is not zero, insert Add Inst.
1753	Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1754	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1755	if (!InitConst \|\| !InitConst->isZero())
1756	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1757
1758	// Step 2: Insert new IV and loop condition:
1759	// loop:
1760	// ...
1761	// PhiCount = PHI [Count, Dec]
1762	// ...
1763	// Dec = PhiCount - 1
1764	// ...
1765	// Br: loop if (Dec != 0)
1766	BasicBlock Body = (CurLoop->block_begin());
1767	auto *LbBr = cast<BranchInst>(Body->getTerminator());
1768	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1769	Type *Ty = Count->getType();
1770
1771	PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1772
1773	Builder.SetInsertPoint(LbCond);
1774	Instruction *TcDec = cast<Instruction>(
1775	Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1776	"tcdec", false, true));
1777
1778	TcPhi->addIncoming(Count, Preheader);
1779	TcPhi->addIncoming(TcDec, Body);
1780
1781	CmpInst::Predicate Pred =
1782	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1783	LbCond->setPredicate(Pred);
1784	LbCond->setOperand(0, TcDec);
1785	LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1786
1787	// Step 3: All the references to the original counter outside
1788	// the loop are replaced with the NewCount
1789	if (IsCntPhiUsedOutsideLoop)
1790	CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1791	else
1792	CntInst->replaceUsesOutsideBlock(NewCount, Body);
1793
1794	// step 4: Forget the "non-computable" trip-count SCEV associated with the
1795	// loop. The loop would otherwise not be deleted even if it becomes empty.
1796	SE->forgetLoop(CurLoop);
1797	}
1798
1799	void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1800	Instruction *CntInst,
1801	PHINode CntPhi, Value Var) {
1802	BasicBlock *PreHead = CurLoop->getLoopPreheader();
1803	auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1804	const DebugLoc &DL = CntInst->getDebugLoc();
1805
1806	// Assuming before transformation, the loop is following:
1807	// if (x) // the precondition
1808	// do { cnt++; x &= x - 1; } while(x);
1809
1810	// Step 1: Insert the ctpop instruction at the end of the precondition block
1811	IRBuilder<> Builder(PreCondBr);
1812	Value PopCnt, PopCntZext, NewCount, TripCnt;
1813	{
1814	PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1815	NewCount = PopCntZext =
1816	Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1817
1818	if (NewCount != PopCnt)
1819	(cast<Instruction>(NewCount))->setDebugLoc(DL);
1820
1821	// TripCnt is exactly the number of iterations the loop has
1822	TripCnt = NewCount;
1823
1824	// If the population counter's initial value is not zero, insert Add Inst.
1825	Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1826	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1827	if (!InitConst \|\| !InitConst->isZero()) {
1828	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1829	(cast<Instruction>(NewCount))->setDebugLoc(DL);
1830	}
1831	}
1832
1833	// Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1834	// "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1835	// function would be partial dead code, and downstream passes will drag
1836	// it back from the precondition block to the preheader.
1837	{
1838	ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1839
1840	Value *Opnd0 = PopCntZext;
1841	Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1842	if (PreCond->getOperand(0) != Var)
1843	std::swap(Opnd0, Opnd1);
1844
1845	ICmpInst *NewPreCond = cast<ICmpInst>(
1846	Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1847	PreCondBr->setCondition(NewPreCond);
1848
1849	RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
1850	}
1851
1852	// Step 3: Note that the population count is exactly the trip count of the
1853	// loop in question, which enable us to convert the loop from noncountable
1854	// loop into a countable one. The benefit is twofold:
1855	//
1856	// - If the loop only counts population, the entire loop becomes dead after
1857	// the transformation. It is a lot easier to prove a countable loop dead
1858	// than to prove a noncountable one. (In some C dialects, an infinite loop
1859	// isn't dead even if it computes nothing useful. In general, DCE needs
1860	// to prove a noncountable loop finite before safely delete it.)
1861	//
1862	// - If the loop also performs something else, it remains alive.
1863	// Since it is transformed to countable form, it can be aggressively
1864	// optimized by some optimizations which are in general not applicable
1865	// to a noncountable loop.
1866	//
1867	// After this step, this loop (conceptually) would look like following:
1868	// newcnt = __builtin_ctpop(x);
1869	// t = newcnt;
1870	// if (x)
1871	// do { cnt++; x &= x-1; t--) } while (t > 0);
1872	BasicBlock Body = (CurLoop->block_begin());
1873	{
1874	auto *LbBr = cast<BranchInst>(Body->getTerminator());
1875	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1876	Type *Ty = TripCnt->getType();
1877
1878	PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1879
1880	Builder.SetInsertPoint(LbCond);
1881	Instruction *TcDec = cast<Instruction>(
1882	Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1883	"tcdec", false, true));
1884
1885	TcPhi->addIncoming(TripCnt, PreHead);
1886	TcPhi->addIncoming(TcDec, Body);
1887
1888	CmpInst::Predicate Pred =
1889	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1890	LbCond->setPredicate(Pred);
1891	LbCond->setOperand(0, TcDec);
1892	LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1893	}
1894
1895	// Step 4: All the references to the original population counter outside
1896	// the loop are replaced with the NewCount -- the value returned from
1897	// __builtin_ctpop().
1898	CntInst->replaceUsesOutsideBlock(NewCount, Body);
1899
1900	// step 5: Forget the "non-computable" trip-count SCEV associated with the
1901	// loop. The loop would otherwise not be deleted even if it becomes empty.
1902	SE->forgetLoop(CurLoop);
1903	}