/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Bug Summary

File:	build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Warning:	line 2713, column 5 Value stored to 'Pred' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopIdiomRecognize.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

1	//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements an idiom recognizer that transforms simple loops into a
10	// non-loop form. In cases that this kicks in, it can be a significant
11	// performance win.
12	//
13	// If compiling for code size we avoid idiom recognition if the resulting
14	// code could be larger than the code for the original loop. One way this could
15	// happen is if the loop is not removable after idiom recognition due to the
16	// presence of non-idiom instructions. The initial implementation of the
17	// heuristics applies to idioms in multi-block loops.
18	//
19	//===----------------------------------------------------------------------===//
20	//
21	// TODO List:
22	//
23	// Future loop memory idioms to recognize:
24	// memcmp, strlen, etc.
25	// Future floating point idioms to recognize in -ffast-math mode:
26	// fpowi
27	// Future integer operation idioms to recognize:
28	// ctpop
29	//
30	// Beware that isel's default lowering for ctpop is highly inefficient for
31	// i64 and larger types when i64 is legal and the value has few bits set. It
32	// would be good to enhance isel to emit a loop for ctpop in this case.
33	//
34	// This could recognize common matrix multiplies and dot product idioms and
35	// replace them with calls to BLAS (if linked in??).
36	//
37	//===----------------------------------------------------------------------===//
38
39	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
40	#include "llvm/ADT/APInt.h"
41	#include "llvm/ADT/ArrayRef.h"
42	#include "llvm/ADT/DenseMap.h"
43	#include "llvm/ADT/MapVector.h"
44	#include "llvm/ADT/SetVector.h"
45	#include "llvm/ADT/SmallPtrSet.h"
46	#include "llvm/ADT/SmallVector.h"
47	#include "llvm/ADT/Statistic.h"
48	#include "llvm/ADT/StringRef.h"
49	#include "llvm/Analysis/AliasAnalysis.h"
50	#include "llvm/Analysis/CmpInstAnalysis.h"
51	#include "llvm/Analysis/LoopAccessAnalysis.h"
52	#include "llvm/Analysis/LoopInfo.h"
53	#include "llvm/Analysis/LoopPass.h"
54	#include "llvm/Analysis/MemoryLocation.h"
55	#include "llvm/Analysis/MemorySSA.h"
56	#include "llvm/Analysis/MemorySSAUpdater.h"
57	#include "llvm/Analysis/MustExecute.h"
58	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59	#include "llvm/Analysis/ScalarEvolution.h"
60	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
61	#include "llvm/Analysis/TargetLibraryInfo.h"
62	#include "llvm/Analysis/TargetTransformInfo.h"
63	#include "llvm/Analysis/ValueTracking.h"
64	#include "llvm/IR/BasicBlock.h"
65	#include "llvm/IR/Constant.h"
66	#include "llvm/IR/Constants.h"
67	#include "llvm/IR/DataLayout.h"
68	#include "llvm/IR/DebugLoc.h"
69	#include "llvm/IR/DerivedTypes.h"
70	#include "llvm/IR/Dominators.h"
71	#include "llvm/IR/GlobalValue.h"
72	#include "llvm/IR/GlobalVariable.h"
73	#include "llvm/IR/IRBuilder.h"
74	#include "llvm/IR/InstrTypes.h"
75	#include "llvm/IR/Instruction.h"
76	#include "llvm/IR/Instructions.h"
77	#include "llvm/IR/IntrinsicInst.h"
78	#include "llvm/IR/Intrinsics.h"
79	#include "llvm/IR/LLVMContext.h"
80	#include "llvm/IR/Module.h"
81	#include "llvm/IR/PassManager.h"
82	#include "llvm/IR/PatternMatch.h"
83	#include "llvm/IR/Type.h"
84	#include "llvm/IR/User.h"
85	#include "llvm/IR/Value.h"
86	#include "llvm/IR/ValueHandle.h"
87	#include "llvm/InitializePasses.h"
88	#include "llvm/Pass.h"
89	#include "llvm/Support/Casting.h"
90	#include "llvm/Support/CommandLine.h"
91	#include "llvm/Support/Debug.h"
92	#include "llvm/Support/InstructionCost.h"
93	#include "llvm/Support/raw_ostream.h"
94	#include "llvm/Transforms/Scalar.h"
95	#include "llvm/Transforms/Utils/BuildLibCalls.h"
96	#include "llvm/Transforms/Utils/Local.h"
97	#include "llvm/Transforms/Utils/LoopUtils.h"
98	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
99	#include <algorithm>
100	#include <cassert>
101	#include <cstdint>
102	#include <utility>
103	#include <vector>
104
105	using namespace llvm;
106
107	#define DEBUG_TYPE"loop-idiom" "loop-idiom"
108
109	STATISTIC(NumMemSet, "Number of memset's formed from loop stores")static llvm::Statistic NumMemSet = {"loop-idiom", "NumMemSet" , "Number of memset's formed from loop stores"};
110	STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores")static llvm::Statistic NumMemCpy = {"loop-idiom", "NumMemCpy" , "Number of memcpy's formed from loop load+stores"};
111	STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores")static llvm::Statistic NumMemMove = {"loop-idiom", "NumMemMove" , "Number of memmove's formed from loop load+stores"};
112	STATISTIC(static llvm::Statistic NumShiftUntilBitTest = {"loop-idiom", "NumShiftUntilBitTest" , "Number of uncountable loops recognized as 'shift until bitttest' idiom" }
113	NumShiftUntilBitTest,static llvm::Statistic NumShiftUntilBitTest = {"loop-idiom", "NumShiftUntilBitTest" , "Number of uncountable loops recognized as 'shift until bitttest' idiom" }
114	"Number of uncountable loops recognized as 'shift until bitttest' idiom")static llvm::Statistic NumShiftUntilBitTest = {"loop-idiom", "NumShiftUntilBitTest" , "Number of uncountable loops recognized as 'shift until bitttest' idiom" };
115	STATISTIC(NumShiftUntilZero,static llvm::Statistic NumShiftUntilZero = {"loop-idiom", "NumShiftUntilZero" , "Number of uncountable loops recognized as 'shift until zero' idiom" }
116	"Number of uncountable loops recognized as 'shift until zero' idiom")static llvm::Statistic NumShiftUntilZero = {"loop-idiom", "NumShiftUntilZero" , "Number of uncountable loops recognized as 'shift until zero' idiom" };
117
118	bool DisableLIRP::All;
119	static cl::opt<bool, true>
120	DisableLIRPAll("disable-" DEBUG_TYPE"loop-idiom" "-all",
121	cl::desc("Options to disable Loop Idiom Recognize Pass."),
122	cl::location(DisableLIRP::All), cl::init(false),
123	cl::ReallyHidden);
124
125	bool DisableLIRP::Memset;
126	static cl::opt<bool, true>
127	DisableLIRPMemset("disable-" DEBUG_TYPE"loop-idiom" "-memset",
128	cl::desc("Proceed with loop idiom recognize pass, but do "
129	"not convert loop(s) to memset."),
130	cl::location(DisableLIRP::Memset), cl::init(false),
131	cl::ReallyHidden);
132
133	bool DisableLIRP::Memcpy;
134	static cl::opt<bool, true>
135	DisableLIRPMemcpy("disable-" DEBUG_TYPE"loop-idiom" "-memcpy",
136	cl::desc("Proceed with loop idiom recognize pass, but do "
137	"not convert loop(s) to memcpy."),
138	cl::location(DisableLIRP::Memcpy), cl::init(false),
139	cl::ReallyHidden);
140
141	static cl::opt<bool> UseLIRCodeSizeHeurs(
142	"use-lir-code-size-heurs",
143	cl::desc("Use loop idiom recognition code size heuristics when compiling"
144	"with -Os/-Oz"),
145	cl::init(true), cl::Hidden);
146
147	namespace {
148
149	class LoopIdiomRecognize {
150	Loop *CurLoop = nullptr;
151	AliasAnalysis *AA;
152	DominatorTree *DT;
153	LoopInfo *LI;
154	ScalarEvolution *SE;
155	TargetLibraryInfo *TLI;
156	const TargetTransformInfo *TTI;
157	const DataLayout *DL;
158	OptimizationRemarkEmitter &ORE;
159	bool ApplyCodeSizeHeuristics;
160	std::unique_ptr<MemorySSAUpdater> MSSAU;
161
162	public:
163	explicit LoopIdiomRecognize(AliasAnalysis AA, DominatorTree DT,
164	LoopInfo LI, ScalarEvolution SE,
165	TargetLibraryInfo *TLI,
166	const TargetTransformInfo TTI, MemorySSA MSSA,
167	const DataLayout *DL,
168	OptimizationRemarkEmitter &ORE)
169	: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
170	if (MSSA)
171	MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
172	}
173
174	bool runOnLoop(Loop *L);
175
176	private:
177	using StoreList = SmallVector<StoreInst *, 8>;
178	using StoreListMap = MapVector<Value *, StoreList>;
179
180	StoreListMap StoreRefsForMemset;
181	StoreListMap StoreRefsForMemsetPattern;
182	StoreList StoreRefsForMemcpy;
183	bool HasMemset;
184	bool HasMemsetPattern;
185	bool HasMemcpy;
186
187	/// Return code for isLegalStore()
188	enum LegalStoreKind {
189	None = 0,
190	Memset,
191	MemsetPattern,
192	Memcpy,
193	UnorderedAtomicMemcpy,
194	DontUse // Dummy retval never to be used. Allows catching errors in retval
195	// handling.
196	};
197
198	/// \name Countable Loop Idiom Handling
199	/// @{
200
201	bool runOnCountableLoop();
202	bool runOnLoopBlock(BasicBlock BB, const SCEV BECount,
203	SmallVectorImpl<BasicBlock *> &ExitBlocks);
204
205	void collectStores(BasicBlock *BB);
206	LegalStoreKind isLegalStore(StoreInst *SI);
207	enum class ForMemset { No, Yes };
208	bool processLoopStores(SmallVectorImpl<StoreInst > &SL, const SCEV BECount,
209	ForMemset For);
210
211	template <typename MemInst>
212	bool processLoopMemIntrinsic(
213	BasicBlock *BB,
214	bool (LoopIdiomRecognize::Processor)(MemInst , const SCEV *),
215	const SCEV *BECount);
216	bool processLoopMemCpy(MemCpyInst MCI, const SCEV BECount);
217	bool processLoopMemSet(MemSetInst MSI, const SCEV BECount);
218
219	bool processLoopStridedStore(Value DestPtr, const SCEV StoreSizeSCEV,
220	MaybeAlign StoreAlignment, Value *StoredVal,
221	Instruction *TheStore,
222	SmallPtrSetImpl<Instruction *> &Stores,
223	const SCEVAddRecExpr Ev, const SCEV BECount,
224	bool IsNegStride, bool IsLoopMemset = false);
225	bool processLoopStoreOfLoopLoad(StoreInst SI, const SCEV BECount);
226	bool processLoopStoreOfLoopLoad(Value DestPtr, Value SourcePtr,
227	const SCEV *StoreSize, MaybeAlign StoreAlign,
228	MaybeAlign LoadAlign, Instruction *TheStore,
229	Instruction *TheLoad,
230	const SCEVAddRecExpr *StoreEv,
231	const SCEVAddRecExpr *LoadEv,
232	const SCEV *BECount);
233	bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
234	bool IsLoopMemset = false);
235
236	/// @}
237	/// \name Noncountable Loop Idiom Handling
238	/// @{
239
240	bool runOnNoncountableLoop();
241
242	bool recognizePopcount();
243	void transformLoopToPopcount(BasicBlock PreCondBB, Instruction CntInst,
244	PHINode CntPhi, Value Var);
245	bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
246	void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
247	Instruction CntInst, PHINode CntPhi,
248	Value Var, Instruction DefX,
249	const DebugLoc &DL, bool ZeroCheck,
250	bool IsCntPhiUsedOutsideLoop);
251
252	bool recognizeShiftUntilBitTest();
253	bool recognizeShiftUntilZero();
254
255	/// @}
256	};
257
258	class LoopIdiomRecognizeLegacyPass : public LoopPass {
259	public:
260	static char ID;
261
262	explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
263	initializeLoopIdiomRecognizeLegacyPassPass(
264	*PassRegistry::getPassRegistry());
265	}
266
267	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
268	if (DisableLIRP::All)
269	return false;
270
271	if (skipLoop(L))
272	return false;
273
274	AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
275	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
276	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
277	ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
278	TargetLibraryInfo *TLI =
279	&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
280	*L->getHeader()->getParent());
281	const TargetTransformInfo *TTI =
282	&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
283	*L->getHeader()->getParent());
284	const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
285	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
286	MemorySSA *MSSA = nullptr;
287	if (MSSAAnalysis)
288	MSSA = &MSSAAnalysis->getMSSA();
289
290	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
291	// pass. Function analyses need to be preserved across loop transformations
292	// but ORE cannot be preserved (see comment before the pass definition).
293	OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
294
295	LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
296	return LIR.runOnLoop(L);
297	}
298
299	/// This transformation requires natural loop information & requires that
300	/// loop preheaders be inserted into the CFG.
301	void getAnalysisUsage(AnalysisUsage &AU) const override {
302	AU.addRequired<TargetLibraryInfoWrapperPass>();
303	AU.addRequired<TargetTransformInfoWrapperPass>();
304	AU.addPreserved<MemorySSAWrapperPass>();
305	getLoopAnalysisUsage(AU);
306	}
307	};
308
309	} // end anonymous namespace
310
311	char LoopIdiomRecognizeLegacyPass::ID = 0;
312
313	PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
314	LoopStandardAnalysisResults &AR,
315	LPMUpdater &) {
316	if (DisableLIRP::All)
317	return PreservedAnalyses::all();
318
319	const auto *DL = &L.getHeader()->getModule()->getDataLayout();
320
321	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
322	// pass. Function analyses need to be preserved across loop transformations
323	// but ORE cannot be preserved (see comment before the pass definition).
324	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
325
326	LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
327	AR.MSSA, DL, ORE);
328	if (!LIR.runOnLoop(&L))
329	return PreservedAnalyses::all();
330
331	auto PA = getLoopPassPreservedAnalyses();
332	if (AR.MSSA)
333	PA.preserve<MemorySSAAnalysis>();
334	return PA;
335	}
336
337	INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry &Registry) {
338	"Recognize loop idioms", false, false)static void *initializeLoopIdiomRecognizeLegacyPassPassOnce(PassRegistry &Registry) {
339	INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
340	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
341	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
342	INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",PassInfo PI = new PassInfo( "Recognize loop idioms", "loop-idiom" , &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false, false); Registry.registerPass(PI, true); return PI; } static llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag , initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry )); }
343	"Recognize loop idioms", false, false)PassInfo PI = new PassInfo( "Recognize loop idioms", "loop-idiom" , &LoopIdiomRecognizeLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopIdiomRecognizeLegacyPass>), false, false); Registry.registerPass(PI, true); return PI; } static llvm::once_flag InitializeLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopIdiomRecognizeLegacyPassPassFlag , initializeLoopIdiomRecognizeLegacyPassPassOnce, std::ref(Registry )); }
344
345	Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
346
347	static void deleteDeadInstruction(Instruction *I) {
348	I->replaceAllUsesWith(UndefValue::get(I->getType()));
349	I->eraseFromParent();
350	}
351
352	//===----------------------------------------------------------------------===//
353	//
354	// Implementation of LoopIdiomRecognize
355	//
356	//===----------------------------------------------------------------------===//
357
358	bool LoopIdiomRecognize::runOnLoop(Loop *L) {
359	CurLoop = L;
360	// If the loop could not be converted to canonical form, it must have an
361	// indirectbr in it, just give up.
362	if (!L->getLoopPreheader())
363	return false;
364
365	// Disable loop idiom recognition if the function's name is a common idiom.
366	StringRef Name = L->getHeader()->getParent()->getName();
367	if (Name == "memset" \|\| Name == "memcpy")
368	return false;
369
370	// Determine if code size heuristics need to be applied.
371	ApplyCodeSizeHeuristics =
372	L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
373
374	HasMemset = TLI->has(LibFunc_memset);
375	HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
376	HasMemcpy = TLI->has(LibFunc_memcpy);
377
378	if (HasMemset \|\| HasMemsetPattern \|\| HasMemcpy)
379	if (SE->hasLoopInvariantBackedgeTakenCount(L))
380	return runOnCountableLoop();
381
382	return runOnNoncountableLoop();
383	}
384
385	bool LoopIdiomRecognize::runOnCountableLoop() {
386	const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
387	assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 389, __extension__ __PRETTY_FUNCTION__))
388	"runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 389, __extension__ __PRETTY_FUNCTION__))
389	"backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 389, __extension__ __PRETTY_FUNCTION__));
390
391	// If this loop executes exactly one time, then it should be peeled, not
392	// optimized by this pass.
393	if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
394	if (BECst->getAPInt() == 0)
395	return false;
396
397	SmallVector<BasicBlock *, 8> ExitBlocks;
398	CurLoop->getUniqueExitBlocks(ExitBlocks);
399
400	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
401	<< CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
402	<< "] Countable Loop %" << CurLoop->getHeader()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
403	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Countable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false);
404
405	// The following transforms hoist stores/memsets into the loop pre-header.
406	// Give up if the loop has instructions that may throw.
407	SimpleLoopSafetyInfo SafetyInfo;
408	SafetyInfo.computeLoopSafetyInfo(CurLoop);
409	if (SafetyInfo.anyBlockMayThrow())
410	return false;
411
412	bool MadeChange = false;
413
414	// Scan all the blocks in the loop that are not in subloops.
415	for (auto *BB : CurLoop->getBlocks()) {
416	// Ignore blocks in subloops.
417	if (LI->getLoopFor(BB) != CurLoop)
418	continue;
419
420	MadeChange \|= runOnLoopBlock(BB, BECount, ExitBlocks);
421	}
422	return MadeChange;
423	}
424
425	static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
426	const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
427	return ConstStride->getAPInt();
428	}
429
430	/// getMemSetPatternValue - If a strided store of the specified value is safe to
431	/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
432	/// be passed in. Otherwise, return null.
433	///
434	/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
435	/// just replicate their input array and then pass on to memset_pattern16.
436	static Constant getMemSetPatternValue(Value V, const DataLayout *DL) {
437	// FIXME: This could check for UndefValue because it can be merged into any
438	// other valid pattern.
439
440	// If the value isn't a constant, we can't promote it to being in a constant
441	// array. We could theoretically do a store to an alloca or something, but
442	// that doesn't seem worthwhile.
443	Constant *C = dyn_cast<Constant>(V);
444	if (!C)
445	return nullptr;
446
447	// Only handle simple values that are a power of two bytes in size.
448	uint64_t Size = DL->getTypeSizeInBits(V->getType());
449	if (Size == 0 \|\| (Size & 7) \|\| (Size & (Size - 1)))
450	return nullptr;
451
452	// Don't care enough about darwin/ppc to implement this.
453	if (DL->isBigEndian())
454	return nullptr;
455
456	// Convert to size in bytes.
457	Size /= 8;
458
459	// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
460	// if the top and bottom are the same (e.g. for vectors and large integers).
461	if (Size > 16)
462	return nullptr;
463
464	// If the constant is exactly 16 bytes, just use it.
465	if (Size == 16)
466	return C;
467
468	// Otherwise, we'll use an array of the constants.
469	unsigned ArraySize = 16 / Size;
470	ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
471	return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
472	}
473
474	LoopIdiomRecognize::LegalStoreKind
475	LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
476	// Don't touch volatile stores.
477	if (SI->isVolatile())
478	return LegalStoreKind::None;
479	// We only want simple or unordered-atomic stores.
480	if (!SI->isUnordered())
481	return LegalStoreKind::None;
482
483	// Avoid merging nontemporal stores.
484	if (SI->getMetadata(LLVMContext::MD_nontemporal))
485	return LegalStoreKind::None;
486
487	Value *StoredVal = SI->getValueOperand();
488	Value *StorePtr = SI->getPointerOperand();
489
490	// Don't convert stores of non-integral pointer types to memsets (which stores
491	// integers).
492	if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
493	return LegalStoreKind::None;
494
495	// Reject stores that are so large that they overflow an unsigned.
496	// When storing out scalable vectors we bail out for now, since the code
497	// below currently only works for constant strides.
498	TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
499	if (SizeInBits.isScalable() \|\| (SizeInBits.getFixedSize() & 7) \|\|
500	(SizeInBits.getFixedSize() >> 32) != 0)
501	return LegalStoreKind::None;
502
503	// See if the pointer expression is an AddRec like {base,+,1} on the current
504	// loop, which indicates a strided store. If we have something else, it's a
505	// random store we can't handle.
506	const SCEVAddRecExpr *StoreEv =
507	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
508	if (!StoreEv \|\| StoreEv->getLoop() != CurLoop \|\| !StoreEv->isAffine())
509	return LegalStoreKind::None;
510
511	// Check to see if we have a constant stride.
512	if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
513	return LegalStoreKind::None;
514
515	// See if the store can be turned into a memset.
516
517	// If the stored value is a byte-wise value (like i32 -1), then it may be
518	// turned into a memset of i8 -1, assuming that all the consecutive bytes
519	// are stored. A store of i32 0x01020304 can never be turned into a memset,
520	// but it can be turned into memset_pattern if the target supports it.
521	Value SplatValue = isBytewiseValue(StoredVal, DL);
522
523	// Note: memset and memset_pattern on unordered-atomic is yet not supported
524	bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
525
526	// If we're allowed to form a memset, and the stored value would be
527	// acceptable for memset, use it.
528	if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
529	// Verify that the stored value is loop invariant. If not, we can't
530	// promote the memset.
531	CurLoop->isLoopInvariant(SplatValue)) {
532	// It looks like we can use SplatValue.
533	return LegalStoreKind::Memset;
534	}
535	if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
536	// Don't create memset_pattern16s with address spaces.
537	StorePtr->getType()->getPointerAddressSpace() == 0 &&
538	getMemSetPatternValue(StoredVal, DL)) {
539	// It looks like we can use PatternValue!
540	return LegalStoreKind::MemsetPattern;
541	}
542
543	// Otherwise, see if the store can be turned into a memcpy.
544	if (HasMemcpy && !DisableLIRP::Memcpy) {
545	// Check to see if the stride matches the size of the store. If so, then we
546	// know that every byte is touched in the loop.
547	APInt Stride = getStoreStride(StoreEv);
548	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
549	if (StoreSize != Stride && StoreSize != -Stride)
550	return LegalStoreKind::None;
551
552	// The store must be feeding a non-volatile load.
553	LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
554
555	// Only allow non-volatile loads
556	if (!LI \|\| LI->isVolatile())
557	return LegalStoreKind::None;
558	// Only allow simple or unordered-atomic loads
559	if (!LI->isUnordered())
560	return LegalStoreKind::None;
561
562	// See if the pointer expression is an AddRec like {base,+,1} on the current
563	// loop, which indicates a strided load. If we have something else, it's a
564	// random load we can't handle.
565	const SCEVAddRecExpr *LoadEv =
566	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
567	if (!LoadEv \|\| LoadEv->getLoop() != CurLoop \|\| !LoadEv->isAffine())
568	return LegalStoreKind::None;
569
570	// The store and load must share the same stride.
571	if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
572	return LegalStoreKind::None;
573
574	// Success. This store can be converted into a memcpy.
575	UnorderedAtomic = UnorderedAtomic \|\| LI->isAtomic();
576	return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
577	: LegalStoreKind::Memcpy;
578	}
579	// This store can't be transformed into a memset/memcpy.
580	return LegalStoreKind::None;
581	}
582
583	void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
584	StoreRefsForMemset.clear();
585	StoreRefsForMemsetPattern.clear();
586	StoreRefsForMemcpy.clear();
587	for (Instruction &I : *BB) {
588	StoreInst *SI = dyn_cast<StoreInst>(&I);
589	if (!SI)
590	continue;
591
592	// Make sure this is a strided store with a constant stride.
593	switch (isLegalStore(SI)) {
594	case LegalStoreKind::None:
595	// Nothing to do
596	break;
597	case LegalStoreKind::Memset: {
598	// Find the base pointer.
599	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
600	StoreRefsForMemset[Ptr].push_back(SI);
601	} break;
602	case LegalStoreKind::MemsetPattern: {
603	// Find the base pointer.
604	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
605	StoreRefsForMemsetPattern[Ptr].push_back(SI);
606	} break;
607	case LegalStoreKind::Memcpy:
608	case LegalStoreKind::UnorderedAtomicMemcpy:
609	StoreRefsForMemcpy.push_back(SI);
610	break;
611	default:
612	assert(false && "unhandled return value")(static_cast <bool> (false && "unhandled return value" ) ? void (0) : __assert_fail ("false && \"unhandled return value\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 612, __extension__ __PRETTY_FUNCTION__));
613	break;
614	}
615	}
616	}
617
618	/// runOnLoopBlock - Process the specified block, which lives in a counted loop
619	/// with the specified backedge count. This block is known to be in the current
620	/// loop and not in any subloops.
621	bool LoopIdiomRecognize::runOnLoopBlock(
622	BasicBlock BB, const SCEV BECount,
623	SmallVectorImpl<BasicBlock *> &ExitBlocks) {
624	// We can only promote stores in this block if they are unconditionally
625	// executed in the loop. For a block to be unconditionally executed, it has
626	// to dominate all the exit blocks of the loop. Verify this now.
627	for (BasicBlock *ExitBlock : ExitBlocks)
628	if (!DT->dominates(BB, ExitBlock))
629	return false;
630
631	bool MadeChange = false;
632	// Look for store instructions, which may be optimized to memset/memcpy.
633	collectStores(BB);
634
635	// Look for a single store or sets of stores with a common base, which can be
636	// optimized into a memset (memset_pattern). The latter most commonly happens
637	// with structs and handunrolled loops.
638	for (auto &SL : StoreRefsForMemset)
639	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::Yes);
640
641	for (auto &SL : StoreRefsForMemsetPattern)
642	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::No);
643
644	// Optimize the store into a memcpy, if it feeds an similarly strided load.
645	for (auto &SI : StoreRefsForMemcpy)
646	MadeChange \|= processLoopStoreOfLoopLoad(SI, BECount);
647
648	MadeChange \|= processLoopMemIntrinsic<MemCpyInst>(
649	BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
650	MadeChange \|= processLoopMemIntrinsic<MemSetInst>(
651	BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
652
653	return MadeChange;
654	}
655
656	/// See if this store(s) can be promoted to a memset.
657	bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
658	const SCEV *BECount, ForMemset For) {
659	// Try to find consecutive stores that can be transformed into memsets.
660	SetVector<StoreInst *> Heads, Tails;
661	SmallDenseMap<StoreInst , StoreInst > ConsecutiveChain;
662
663	// Do a quadratic search on all of the given stores and find
664	// all of the pairs of stores that follow each other.
665	SmallVector<unsigned, 16> IndexQueue;
666	for (unsigned i = 0, e = SL.size(); i < e; ++i) {
667	assert(SL[i]->isSimple() && "Expected only non-volatile stores.")(static_cast <bool> (SL[i]->isSimple() && "Expected only non-volatile stores." ) ? void (0) : __assert_fail ("SL[i]->isSimple() && \"Expected only non-volatile stores.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 667, __extension__ __PRETTY_FUNCTION__));
668
669	Value *FirstStoredVal = SL[i]->getValueOperand();
670	Value *FirstStorePtr = SL[i]->getPointerOperand();
671	const SCEVAddRecExpr *FirstStoreEv =
672	cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
673	APInt FirstStride = getStoreStride(FirstStoreEv);
674	unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
675
676	// See if we can optimize just this store in isolation.
677	if (FirstStride == FirstStoreSize \|\| -FirstStride == FirstStoreSize) {
678	Heads.insert(SL[i]);
679	continue;
680	}
681
682	Value *FirstSplatValue = nullptr;
683	Constant *FirstPatternValue = nullptr;
684
685	if (For == ForMemset::Yes)
686	FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
687	else
688	FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
689
690	assert((FirstSplatValue \|\| FirstPatternValue) &&(static_cast <bool> ((FirstSplatValue \|\| FirstPatternValue ) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(FirstSplatValue \|\| FirstPatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 691, __extension__ __PRETTY_FUNCTION__))
691	"Expected either splat value or pattern value.")(static_cast <bool> ((FirstSplatValue \|\| FirstPatternValue ) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(FirstSplatValue \|\| FirstPatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 691, __extension__ __PRETTY_FUNCTION__));
692
693	IndexQueue.clear();
694	// If a store has multiple consecutive store candidates, search Stores
695	// array according to the sequence: from i+1 to e, then from i-1 to 0.
696	// This is because usually pairing with immediate succeeding or preceding
697	// candidate create the best chance to find memset opportunity.
698	unsigned j = 0;
699	for (j = i + 1; j < e; ++j)
700	IndexQueue.push_back(j);
701	for (j = i; j > 0; --j)
702	IndexQueue.push_back(j - 1);
703
704	for (auto &k : IndexQueue) {
705	assert(SL[k]->isSimple() && "Expected only non-volatile stores.")(static_cast <bool> (SL[k]->isSimple() && "Expected only non-volatile stores." ) ? void (0) : __assert_fail ("SL[k]->isSimple() && \"Expected only non-volatile stores.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 705, __extension__ __PRETTY_FUNCTION__));
706	Value *SecondStorePtr = SL[k]->getPointerOperand();
707	const SCEVAddRecExpr *SecondStoreEv =
708	cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
709	APInt SecondStride = getStoreStride(SecondStoreEv);
710
711	if (FirstStride != SecondStride)
712	continue;
713
714	Value *SecondStoredVal = SL[k]->getValueOperand();
715	Value *SecondSplatValue = nullptr;
716	Constant *SecondPatternValue = nullptr;
717
718	if (For == ForMemset::Yes)
719	SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
720	else
721	SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
722
723	assert((SecondSplatValue \|\| SecondPatternValue) &&(static_cast <bool> ((SecondSplatValue \|\| SecondPatternValue ) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(SecondSplatValue \|\| SecondPatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 724, __extension__ __PRETTY_FUNCTION__))
724	"Expected either splat value or pattern value.")(static_cast <bool> ((SecondSplatValue \|\| SecondPatternValue ) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(SecondSplatValue \|\| SecondPatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 724, __extension__ __PRETTY_FUNCTION__));
725
726	if (isConsecutiveAccess(SL[i], SL[k], DL, SE, false)) {
727	if (For == ForMemset::Yes) {
728	if (isa<UndefValue>(FirstSplatValue))
729	FirstSplatValue = SecondSplatValue;
730	if (FirstSplatValue != SecondSplatValue)
731	continue;
732	} else {
733	if (isa<UndefValue>(FirstPatternValue))
734	FirstPatternValue = SecondPatternValue;
735	if (FirstPatternValue != SecondPatternValue)
736	continue;
737	}
738	Tails.insert(SL[k]);
739	Heads.insert(SL[i]);
740	ConsecutiveChain[SL[i]] = SL[k];
741	break;
742	}
743	}
744	}
745
746	// We may run into multiple chains that merge into a single chain. We mark the
747	// stores that we transformed so that we don't visit the same store twice.
748	SmallPtrSet<Value *, 16> TransformedStores;
749	bool Changed = false;
750
751	// For stores that start but don't end a link in the chain:
752	for (StoreInst *I : Heads) {
753	if (Tails.count(I))
754	continue;
755
756	// We found a store instr that starts a chain. Now follow the chain and try
757	// to transform it.
758	SmallPtrSet<Instruction *, 8> AdjacentStores;
759	StoreInst *HeadStore = I;
760	unsigned StoreSize = 0;
761
762	// Collect the chain into a list.
763	while (Tails.count(I) \|\| Heads.count(I)) {
764	if (TransformedStores.count(I))
765	break;
766	AdjacentStores.insert(I);
767
768	StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
769	// Move to the next value in the chain.
770	I = ConsecutiveChain[I];
771	}
772
773	Value *StoredVal = HeadStore->getValueOperand();
774	Value *StorePtr = HeadStore->getPointerOperand();
775	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
776	APInt Stride = getStoreStride(StoreEv);
777
778	// Check to see if the stride matches the size of the stores. If so, then
779	// we know that every byte is touched in the loop.
780	if (StoreSize != Stride && StoreSize != -Stride)
781	continue;
782
783	bool IsNegStride = StoreSize == -Stride;
784
785	Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
786	const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
787	if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
788	MaybeAlign(HeadStore->getAlign()), StoredVal,
789	HeadStore, AdjacentStores, StoreEv, BECount,
790	IsNegStride)) {
791	TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
792	Changed = true;
793	}
794	}
795
796	return Changed;
797	}
798
799	/// processLoopMemIntrinsic - Template function for calling different processor
800	/// functions based on mem instrinsic type.
801	template <typename MemInst>
802	bool LoopIdiomRecognize::processLoopMemIntrinsic(
803	BasicBlock *BB,
804	bool (LoopIdiomRecognize::Processor)(MemInst , const SCEV *),
805	const SCEV *BECount) {
806	bool MadeChange = false;
807	for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
808	Instruction Inst = &I++;
809	// Look for memory instructions, which may be optimized to a larger one.
810	if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
811	WeakTrackingVH InstPtr(&*I);
812	if (!(this->*Processor)(MI, BECount))
813	continue;
814	MadeChange = true;
815
816	// If processing the instruction invalidated our iterator, start over from
817	// the top of the block.
818	if (!InstPtr)
819	I = BB->begin();
820	}
821	}
822	return MadeChange;
823	}
824
825	/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
826	bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
827	const SCEV *BECount) {
828	// We can only handle non-volatile memcpys with a constant size.
829	if (MCI->isVolatile() \|\| !isa<ConstantInt>(MCI->getLength()))
830	return false;
831
832	// If we're not allowed to hack on memcpy, we fail.
833	if ((!HasMemcpy && !isa<MemCpyInlineInst>(MCI)) \|\| DisableLIRP::Memcpy)
834	return false;
835
836	Value *Dest = MCI->getDest();
837	Value *Source = MCI->getSource();
838	if (!Dest \|\| !Source)
839	return false;
840
841	// See if the load and store pointer expressions are AddRec like {base,+,1} on
842	// the current loop, which indicates a strided load and store. If we have
843	// something else, it's a random load or store we can't handle.
844	const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
845	if (!StoreEv \|\| StoreEv->getLoop() != CurLoop \|\| !StoreEv->isAffine())
846	return false;
847	const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
848	if (!LoadEv \|\| LoadEv->getLoop() != CurLoop \|\| !LoadEv->isAffine())
849	return false;
850
851	// Reject memcpys that are so large that they overflow an unsigned.
852	uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
853	if ((SizeInBytes >> 32) != 0)
854	return false;
855
856	// Check if the stride matches the size of the memcpy. If so, then we know
857	// that every byte is touched in the loop.
858	const SCEVConstant *ConstStoreStride =
859	dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
860	const SCEVConstant *ConstLoadStride =
861	dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
862	if (!ConstStoreStride \|\| !ConstLoadStride)
863	return false;
864
865	APInt StoreStrideValue = ConstStoreStride->getAPInt();
866	APInt LoadStrideValue = ConstLoadStride->getAPInt();
867	// Huge stride value - give up
868	if (StoreStrideValue.getBitWidth() > 64 \|\| LoadStrideValue.getBitWidth() > 64)
869	return false;
870
871	if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
872	ORE.emit([&]() {
873	return OptimizationRemarkMissed(DEBUG_TYPE"loop-idiom", "SizeStrideUnequal", MCI)
874	<< ore::NV("Inst", "memcpy") << " in "
875	<< ore::NV("Function", MCI->getFunction())
876	<< " function will not be hoisted: "
877	<< ore::NV("Reason", "memcpy size is not equal to stride");
878	});
879	return false;
880	}
881
882	int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
883	int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
884	// Check if the load stride matches the store stride.
885	if (StoreStrideInt != LoadStrideInt)
886	return false;
887
888	return processLoopStoreOfLoopLoad(
889	Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
890	MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
891	BECount);
892	}
893
894	/// processLoopMemSet - See if this memset can be promoted to a large memset.
895	bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
896	const SCEV *BECount) {
897	// We can only handle non-volatile memsets.
898	if (MSI->isVolatile())
899	return false;
900
901	// If we're not allowed to hack on memset, we fail.
902	if (!HasMemset \|\| DisableLIRP::Memset)
903	return false;
904
905	Value *Pointer = MSI->getDest();
906
907	// See if the pointer expression is an AddRec like {base,+,1} on the current
908	// loop, which indicates a strided store. If we have something else, it's a
909	// random store we can't handle.
910	const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
911	if (!Ev \|\| Ev->getLoop() != CurLoop)
912	return false;
913	if (!Ev->isAffine()) {
914	LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Pointer is not affine, abort\n" ; } } while (false);
915	return false;
916	}
917
918	const SCEV *PointerStrideSCEV = Ev->getOperand(1);
919	const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
920	if (!PointerStrideSCEV \|\| !MemsetSizeSCEV)
921	return false;
922
923	bool IsNegStride = false;
924	const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
925
926	if (IsConstantSize) {
927	// Memset size is constant.
928	// Check if the pointer stride matches the memset size. If so, then
929	// we know that every byte is touched in the loop.
930	LLVM_DEBUG(dbgs() << " memset size is constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " memset size is constant\n" ; } } while (false);
931	uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
932	const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
933	if (!ConstStride)
934	return false;
935
936	APInt Stride = ConstStride->getAPInt();
937	if (SizeInBytes != Stride && SizeInBytes != -Stride)
938	return false;
939
940	IsNegStride = SizeInBytes == -Stride;
941	} else {
942	// Memset size is non-constant.
943	// Check if the pointer stride matches the memset size.
944	// To be conservative, the pass would not promote pointers that aren't in
945	// address space zero. Also, the pass only handles memset length and stride
946	// that are invariant for the top level loop.
947	LLVM_DEBUG(dbgs() << " memset size is non-constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " memset size is non-constant\n" ; } } while (false);
948	if (Pointer->getType()->getPointerAddressSpace() != 0) {
949	LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " pointer is not in address space zero, " << "abort\n"; } } while (false)
950	<< "abort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " pointer is not in address space zero, " << "abort\n"; } } while (false);
951	return false;
952	}
953	if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
954	LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " memset size is not a loop-invariant, " << "abort\n"; } } while (false)
955	<< "abort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " memset size is not a loop-invariant, " << "abort\n"; } } while (false);
956	return false;
957	}
958
959	// Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
960	IsNegStride = PointerStrideSCEV->isNonConstantNegative();
961	const SCEV *PositiveStrideSCEV =
962	IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
963	: PointerStrideSCEV;
964	LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << MemsetSizeSCEV << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " MemsetSizeSCEV: " << MemsetSizeSCEV << "\n" << " PositiveStrideSCEV: " << *PositiveStrideSCEV << "\n"; } } while (false )
965	<< " PositiveStrideSCEV: " << PositiveStrideSCEVdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " MemsetSizeSCEV: " << MemsetSizeSCEV << "\n" << " PositiveStrideSCEV: " << *PositiveStrideSCEV << "\n"; } } while (false )
966	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " MemsetSizeSCEV: " << MemsetSizeSCEV << "\n" << " PositiveStrideSCEV: " << PositiveStrideSCEV << "\n"; } } while (false );
967
968	if (PositiveStrideSCEV != MemsetSizeSCEV) {
969	// If an expression is covered by the loop guard, compare again and
970	// proceed with optimization if equal.
971	const SCEV *FoldedPositiveStride =
972	SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
973	const SCEV *FoldedMemsetSize =
974	SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
975
976	LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Try to fold SCEV based on loop guard\n" << " FoldedMemsetSize: " << FoldedMemsetSize << "\n" << " FoldedPositiveStride: " << FoldedPositiveStride << "\n"; } } while (false)
977	<< " FoldedMemsetSize: " << FoldedMemsetSize << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Try to fold SCEV based on loop guard\n" << " FoldedMemsetSize: " << FoldedMemsetSize << "\n" << " FoldedPositiveStride: " << *FoldedPositiveStride << "\n"; } } while (false)
978	<< " FoldedPositiveStride: " << FoldedPositiveStridedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Try to fold SCEV based on loop guard\n" << " FoldedMemsetSize: " << FoldedMemsetSize << "\n" << " FoldedPositiveStride: " << *FoldedPositiveStride << "\n"; } } while (false)
979	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Try to fold SCEV based on loop guard\n" << " FoldedMemsetSize: " << FoldedMemsetSize << "\n" << " FoldedPositiveStride: " << FoldedPositiveStride << "\n"; } } while (false);
980
981	if (FoldedPositiveStride != FoldedMemsetSize) {
982	LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " SCEV don't match, abort\n" ; } } while (false);
983	return false;
984	}
985	}
986	}
987
988	// Verify that the memset value is loop invariant. If not, we can't promote
989	// the memset.
990	Value *SplatValue = MSI->getValue();
991	if (!SplatValue \|\| !CurLoop->isLoopInvariant(SplatValue))
992	return false;
993
994	SmallPtrSet<Instruction *, 1> MSIs;
995	MSIs.insert(MSI);
996	return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
997	MaybeAlign(MSI->getDestAlignment()),
998	SplatValue, MSI, MSIs, Ev, BECount,
999	IsNegStride, /IsLoopMemset=/true);
1000	}
1001
1002	/// mayLoopAccessLocation - Return true if the specified loop might access the
1003	/// specified pointer location, which is a loop-strided access. The 'Access'
1004	/// argument specifies what the verboten forms of access are (read or write).
1005	static bool
1006	mayLoopAccessLocation(Value Ptr, ModRefInfo Access, Loop L,
1007	const SCEV BECount, const SCEV StoreSizeSCEV,
1008	AliasAnalysis &AA,
1009	SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
1010	// Get the location that may be stored across the loop. Since the access is
1011	// strided positively through memory, we say that the modified location starts
1012	// at the pointer and has infinite size.
1013	LocationSize AccessSize = LocationSize::afterPointer();
1014
1015	// If the loop iterates a fixed number of times, we can refine the access size
1016	// to be exactly the size of the memset, which is (BECount+1)*StoreSize
1017	const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
1018	const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1019	if (BECst && ConstSize)
1020	AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1021	ConstSize->getValue()->getZExtValue());
1022
1023	// TODO: For this to be really effective, we have to dive into the pointer
1024	// operand in the store. Store to &A[i] of 100 will always return may alias
1025	// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1026	// which will then no-alias a store to &A[100].
1027	MemoryLocation StoreLoc(Ptr, AccessSize);
1028
1029	for (BasicBlock *B : L->blocks())
1030	for (Instruction &I : *B)
1031	if (!IgnoredInsts.contains(&I) &&
1032	isModOrRefSet(
1033	intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1034	return true;
1035	return false;
1036	}
1037
1038	// If we have a negative stride, Start refers to the end of the memory location
1039	// we're trying to memset. Therefore, we need to recompute the base pointer,
1040	// which is just Start - BECount*Size.
1041	static const SCEV getStartForNegStride(const SCEV Start, const SCEV *BECount,
1042	Type IntPtr, const SCEV StoreSizeSCEV,
1043	ScalarEvolution *SE) {
1044	const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1045	if (!StoreSizeSCEV->isOne()) {
1046	// index = back edge count * store size
1047	Index = SE->getMulExpr(Index,
1048	SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1049	SCEV::FlagNUW);
1050	}
1051	// base pointer = start - index * store size
1052	return SE->getMinusSCEV(Start, Index);
1053	}
1054
1055	/// Compute trip count from the backedge taken count.
1056	static const SCEV getTripCount(const SCEV BECount, Type *IntPtr,
1057	Loop CurLoop, const DataLayout DL,
1058	ScalarEvolution *SE) {
1059	const SCEV *TripCountS = nullptr;
1060	// The # stored bytes is (BECount+1). Expand the trip count out to
1061	// pointer size if it isn't already.
1062	//
1063	// If we're going to need to zero extend the BE count, check if we can add
1064	// one to it prior to zero extending without overflow. Provided this is safe,
1065	// it allows better simplification of the +1.
1066	if (DL->getTypeSizeInBits(BECount->getType()) <
1067	DL->getTypeSizeInBits(IntPtr) &&
1068	SE->isLoopEntryGuardedByCond(
1069	CurLoop, ICmpInst::ICMP_NE, BECount,
1070	SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
1071	TripCountS = SE->getZeroExtendExpr(
1072	SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
1073	IntPtr);
1074	} else {
1075	TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
1076	SE->getOne(IntPtr), SCEV::FlagNUW);
1077	}
1078
1079	return TripCountS;
1080	}
1081
1082	/// Compute the number of bytes as a SCEV from the backedge taken count.
1083	///
1084	/// This also maps the SCEV into the provided type and tries to handle the
1085	/// computation in a way that will fold cleanly.
1086	static const SCEV getNumBytes(const SCEV BECount, Type *IntPtr,
1087	const SCEV StoreSizeSCEV, Loop CurLoop,
1088	const DataLayout DL, ScalarEvolution SE) {
1089	const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
1090
1091	return SE->getMulExpr(TripCountSCEV,
1092	SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1093	SCEV::FlagNUW);
1094	}
1095
1096	/// processLoopStridedStore - We see a strided store of some value. If we can
1097	/// transform this into a memset or memset_pattern in the loop preheader, do so.
1098	bool LoopIdiomRecognize::processLoopStridedStore(
1099	Value DestPtr, const SCEV StoreSizeSCEV, MaybeAlign StoreAlignment,
1100	Value StoredVal, Instruction TheStore,
1101	SmallPtrSetImpl<Instruction > &Stores, const SCEVAddRecExpr Ev,
1102	const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1103	Value SplatValue = isBytewiseValue(StoredVal, DL);
1104	Constant *PatternValue = nullptr;
1105
1106	if (!SplatValue)
1107	PatternValue = getMemSetPatternValue(StoredVal, DL);
1108
1109	assert((SplatValue \|\| PatternValue) &&(static_cast <bool> ((SplatValue \|\| PatternValue) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(SplatValue \|\| PatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1110, __extension__ __PRETTY_FUNCTION__))
1110	"Expected either splat value or pattern value.")(static_cast <bool> ((SplatValue \|\| PatternValue) && "Expected either splat value or pattern value.") ? void (0) : __assert_fail ("(SplatValue \|\| PatternValue) && \"Expected either splat value or pattern value.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1110, __extension__ __PRETTY_FUNCTION__));
1111
1112	// The trip count of the loop and the base pointer of the addrec SCEV is
1113	// guaranteed to be loop invariant, which means that it should dominate the
1114	// header. This allows us to insert code for it in the preheader.
1115	unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
1116	BasicBlock *Preheader = CurLoop->getLoopPreheader();
1117	IRBuilder<> Builder(Preheader->getTerminator());
1118	SCEVExpander Expander(SE, DL, "loop-idiom");
1119	SCEVExpanderCleaner ExpCleaner(Expander);
1120
1121	Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
1122	Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1123
1124	bool Changed = false;
1125	const SCEV *Start = Ev->getStart();
1126	// Handle negative strided loops.
1127	if (IsNegStride)
1128	Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
1129
1130	// TODO: ideally we should still be able to generate memset if SCEV expander
1131	// is taught to generate the dependencies at the latest point.
1132	if (!isSafeToExpand(Start, *SE))
1133	return Changed;
1134
1135	// Okay, we have a strided store "p[i]" of a splattable value. We can turn
1136	// this into a memset in the loop preheader now if we want. However, this
1137	// would be unsafe to do if there is anything else in the loop that may read
1138	// or write to the aliased location. Check for any overlap by generating the
1139	// base pointer and checking the region.
1140	Value *BasePtr =
1141	Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1142
1143	// From here on out, conservatively report to the pass manager that we've
1144	// changed the IR, even if we later clean up these added instructions. There
1145	// may be structural differences e.g. in the order of use lists not accounted
1146	// for in just a textual dump of the IR. This is written as a variable, even
1147	// though statically all the places this dominates could be replaced with
1148	// 'true', with the hope that anyone trying to be clever / "more precise" with
1149	// the return value will read this comment, and leave them alone.
1150	Changed = true;
1151
1152	if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1153	StoreSizeSCEV, *AA, Stores))
1154	return Changed;
1155
1156	if (avoidLIRForMultiBlockLoop(/IsMemset=/true, IsLoopMemset))
1157	return Changed;
1158
1159	// Okay, everything looks good, insert the memset.
1160
1161	const SCEV *NumBytesS =
1162	getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1163
1164	// TODO: ideally we should still be able to generate memset if SCEV expander
1165	// is taught to generate the dependencies at the latest point.
1166	if (!isSafeToExpand(NumBytesS, *SE))
1167	return Changed;
1168
1169	Value *NumBytes =
1170	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1171
1172	CallInst *NewCall;
1173	if (SplatValue) {
1174	AAMDNodes AATags = TheStore->getAAMetadata();
1175	for (Instruction *Store : Stores)
1176	AATags = AATags.merge(Store->getAAMetadata());
1177	if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1178	AATags = AATags.extendTo(CI->getZExtValue());
1179	else
1180	AATags = AATags.extendTo(-1);
1181
1182	NewCall = Builder.CreateMemSet(
1183	BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
1184	/isVolatile=/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1185	} else {
1186	// Everything is emitted in default address space
1187	Type *Int8PtrTy = DestInt8PtrTy;
1188
1189	Module *M = TheStore->getModule();
1190	StringRef FuncName = "memset_pattern16";
1191	FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
1192	Int8PtrTy, Int8PtrTy, IntIdxTy);
1193	inferLibFuncAttributes(M, FuncName, *TLI);
1194
1195	// Otherwise we should form a memset_pattern16. PatternValue is known to be
1196	// an constant array of 16-bytes. Plop the value into a mergable global.
1197	GlobalVariable GV = new GlobalVariable(M, PatternValue->getType(), true,
1198	GlobalValue::PrivateLinkage,
1199	PatternValue, ".memset_pattern");
1200	GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1201	GV->setAlignment(Align(16));
1202	Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
1203	NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1204	}
1205	NewCall->setDebugLoc(TheStore->getDebugLoc());
1206
1207	if (MSSAU) {
1208	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1209	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1210	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1211	}
1212
1213	LLVM_DEBUG(dbgs() << " Formed memset: " << NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << TheStore << "\n"; } } while (false)
1214	<< " from store to: " << Ev << " at: " << TheStoredo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << *TheStore << "\n"; } } while (false)
1215	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed memset: " << NewCall << "\n" << " from store to: " << Ev << " at: " << *TheStore << "\n"; } } while (false);
1216
1217	ORE.emit([&]() {
1218	OptimizationRemark R(DEBUG_TYPE"loop-idiom", "ProcessLoopStridedStore",
1219	NewCall->getDebugLoc(), Preheader);
1220	R << "Transformed loop-strided store in "
1221	<< ore::NV("Function", TheStore->getFunction())
1222	<< " function into a call to "
1223	<< ore::NV("NewFunction", NewCall->getCalledFunction())
1224	<< "() intrinsic";
1225	if (!Stores.empty())
1226	R << ore::setExtraArgs();
1227	for (auto *I : Stores) {
1228	R << ore::NV("FromBlock", I->getParent()->getName())
1229	<< ore::NV("ToBlock", Preheader->getName());
1230	}
1231	return R;
1232	});
1233
1234	// Okay, the memset has been formed. Zap the original store and anything that
1235	// feeds into it.
1236	for (auto *I : Stores) {
1237	if (MSSAU)
1238	MSSAU->removeMemoryAccess(I, true);
1239	deleteDeadInstruction(I);
1240	}
1241	if (MSSAU && VerifyMemorySSA)
1242	MSSAU->getMemorySSA()->verifyMemorySSA();
1243	++NumMemSet;
1244	ExpCleaner.markResultUsed();
1245	return true;
1246	}
1247
1248	/// If the stored value is a strided load in the same loop with the same stride
1249	/// this may be transformable into a memcpy. This kicks in for stuff like
1250	/// for (i) A[i] = B[i];
1251	bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1252	const SCEV *BECount) {
1253	assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.")(static_cast <bool> (SI->isUnordered() && "Expected only non-volatile non-ordered stores." ) ? void (0) : __assert_fail ("SI->isUnordered() && \"Expected only non-volatile non-ordered stores.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1253, __extension__ __PRETTY_FUNCTION__));
1254
1255	Value *StorePtr = SI->getPointerOperand();
1256	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1257	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1258
1259	// The store must be feeding a non-volatile load.
1260	LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1261	assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.")(static_cast <bool> (LI->isUnordered() && "Expected only non-volatile non-ordered loads." ) ? void (0) : __assert_fail ("LI->isUnordered() && \"Expected only non-volatile non-ordered loads.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1261, __extension__ __PRETTY_FUNCTION__));
1262
1263	// See if the pointer expression is an AddRec like {base,+,1} on the current
1264	// loop, which indicates a strided load. If we have something else, it's a
1265	// random load we can't handle.
1266	Value *LoadPtr = LI->getPointerOperand();
1267	const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1268
1269	const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
1270	return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1271	SI->getAlign(), LI->getAlign(), SI, LI,
1272	StoreEv, LoadEv, BECount);
1273	}
1274
1275	class MemmoveVerifier {
1276	public:
1277	explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1278	const DataLayout &DL)
1279	: DL(DL), BP1(llvm::GetPointerBaseWithConstantOffset(
1280	LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1281	BP2(llvm::GetPointerBaseWithConstantOffset(
1282	StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1283	IsSameObject(BP1 == BP2) {}
1284
1285	bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1286	const Instruction &TheLoad,
1287	bool IsMemCpy) const {
1288	if (IsMemCpy) {
1289	// Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1290	// for negative stride.
1291	if ((!IsNegStride && LoadOff <= StoreOff) \|\|
1292	(IsNegStride && LoadOff >= StoreOff))
1293	return false;
1294	} else {
1295	// Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1296	// for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1297	int64_t LoadSize =
1298	DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
1299	if (BP1 != BP2 \|\| LoadSize != int64_t(StoreSize))
1300	return false;
1301	if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) \|\|
1302	(IsNegStride && LoadOff + LoadSize > StoreOff))
1303	return false;
1304	}
1305	return true;
1306	}
1307
1308	private:
1309	const DataLayout &DL;
1310	int64_t LoadOff = 0;
1311	int64_t StoreOff = 0;
1312	const Value *BP1;
1313	const Value *BP2;
1314
1315	public:
1316	const bool IsSameObject;
1317	};
1318
1319	bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1320	Value DestPtr, Value SourcePtr, const SCEV *StoreSizeSCEV,
1321	MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
1322	Instruction TheLoad, const SCEVAddRecExpr StoreEv,
1323	const SCEVAddRecExpr LoadEv, const SCEV BECount) {
1324
1325	// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
1326	// conservatively bail here, since otherwise we may have to transform
1327	// llvm.memcpy.inline into llvm.memcpy which is illegal.
1328	if (isa<MemCpyInlineInst>(TheStore))
1329	return false;
1330
1331	// The trip count of the loop and the base pointer of the addrec SCEV is
1332	// guaranteed to be loop invariant, which means that it should dominate the
1333	// header. This allows us to insert code for it in the preheader.
1334	BasicBlock *Preheader = CurLoop->getLoopPreheader();
1335	IRBuilder<> Builder(Preheader->getTerminator());
1336	SCEVExpander Expander(SE, DL, "loop-idiom");
1337
1338	SCEVExpanderCleaner ExpCleaner(Expander);
1339
1340	bool Changed = false;
1341	const SCEV *StrStart = StoreEv->getStart();
1342	unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
1343	Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1344
1345	APInt Stride = getStoreStride(StoreEv);
1346	const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1347
1348	// TODO: Deal with non-constant size; Currently expect constant store size
1349	assert(ConstStoreSize && "store size is expected to be a constant")(static_cast <bool> (ConstStoreSize && "store size is expected to be a constant" ) ? void (0) : __assert_fail ("ConstStoreSize && \"store size is expected to be a constant\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1349, __extension__ __PRETTY_FUNCTION__));
1350
1351	int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
1352	bool IsNegStride = StoreSize == -Stride;
1353
1354	// Handle negative strided loops.
1355	if (IsNegStride)
1356	StrStart =
1357	getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1358
1359	// Okay, we have a strided store "p[i]" of a loaded value. We can turn
1360	// this into a memcpy in the loop preheader now if we want. However, this
1361	// would be unsafe to do if there is anything else in the loop that may read
1362	// or write the memory region we're storing to. This includes the load that
1363	// feeds the stores. Check for an alias by generating the base address and
1364	// checking everything.
1365	Value *StoreBasePtr = Expander.expandCodeFor(
1366	StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1367
1368	// From here on out, conservatively report to the pass manager that we've
1369	// changed the IR, even if we later clean up these added instructions. There
1370	// may be structural differences e.g. in the order of use lists not accounted
1371	// for in just a textual dump of the IR. This is written as a variable, even
1372	// though statically all the places this dominates could be replaced with
1373	// 'true', with the hope that anyone trying to be clever / "more precise" with
1374	// the return value will read this comment, and leave them alone.
1375	Changed = true;
1376
1377	SmallPtrSet<Instruction *, 2> IgnoredInsts;
1378	IgnoredInsts.insert(TheStore);
1379
1380	bool IsMemCpy = isa<MemCpyInst>(TheStore);
1381	const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1382
1383	bool LoopAccessStore =
1384	mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1385	StoreSizeSCEV, *AA, IgnoredInsts);
1386	if (LoopAccessStore) {
1387	// For memmove case it's not enough to guarantee that loop doesn't access
1388	// TheStore and TheLoad. Additionally we need to make sure that TheStore is
1389	// the only user of TheLoad.
1390	if (!TheLoad->hasOneUse())
1391	return Changed;
1392	IgnoredInsts.insert(TheLoad);
1393	if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
1394	BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1395	ORE.emit([&]() {
1396	return OptimizationRemarkMissed(DEBUG_TYPE"loop-idiom", "LoopMayAccessStore",
1397	TheStore)
1398	<< ore::NV("Inst", InstRemark) << " in "
1399	<< ore::NV("Function", TheStore->getFunction())
1400	<< " function will not be hoisted: "
1401	<< ore::NV("Reason", "The loop may access store location");
1402	});
1403	return Changed;
1404	}
1405	IgnoredInsts.erase(TheLoad);
1406	}
1407
1408	const SCEV *LdStart = LoadEv->getStart();
1409	unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
1410
1411	// Handle negative strided loops.
1412	if (IsNegStride)
1413	LdStart =
1414	getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1415
1416	// For a memcpy, we have to make sure that the input array is not being
1417	// mutated by the loop.
1418	Value *LoadBasePtr = Expander.expandCodeFor(
1419	LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1420
1421	// If the store is a memcpy instruction, we must check if it will write to
1422	// the load memory locations. So remove it from the ignored stores.
1423	if (IsMemCpy)
1424	IgnoredInsts.erase(TheStore);
1425	MemmoveVerifier Verifier(LoadBasePtr, StoreBasePtr, *DL);
1426	if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1427	StoreSizeSCEV, *AA, IgnoredInsts)) {
1428	if (!IsMemCpy) {
1429	ORE.emit([&]() {
1430	return OptimizationRemarkMissed(DEBUG_TYPE"loop-idiom", "LoopMayAccessLoad",
1431	TheLoad)
1432	<< ore::NV("Inst", InstRemark) << " in "
1433	<< ore::NV("Function", TheStore->getFunction())
1434	<< " function will not be hoisted: "
1435	<< ore::NV("Reason", "The loop may access load location");
1436	});
1437	return Changed;
1438	}
1439	// At this point loop may access load only for memcpy in same underlying
1440	// object. If that's not the case bail out.
1441	if (!Verifier.IsSameObject)
1442	return Changed;
1443	}
1444
1445	bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1446	if (UseMemMove)
1447	if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1448	IsMemCpy))
1449	return Changed;
1450
1451	if (avoidLIRForMultiBlockLoop())
1452	return Changed;
1453
1454	// Okay, everything is safe, we can transform this!
1455
1456	const SCEV *NumBytesS =
1457	getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1458
1459	Value *NumBytes =
1460	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1461
1462	AAMDNodes AATags = TheLoad->getAAMetadata();
1463	AAMDNodes StoreAATags = TheStore->getAAMetadata();
1464	AATags = AATags.merge(StoreAATags);
1465	if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1466	AATags = AATags.extendTo(CI->getZExtValue());
1467	else
1468	AATags = AATags.extendTo(-1);
1469
1470	CallInst *NewCall = nullptr;
1471	// Check whether to generate an unordered atomic memcpy:
1472	// If the load or store are atomic, then they must necessarily be unordered
1473	// by previous checks.
1474	if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
1475	if (UseMemMove)
1476	NewCall = Builder.CreateMemMove(
1477	StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1478	/isVolatile=/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1479	else
1480	NewCall =
1481	Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1482	NumBytes, /isVolatile=/false, AATags.TBAA,
1483	AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
1484	} else {
1485	// For now don't support unordered atomic memmove.
1486	if (UseMemMove)
1487	return Changed;
1488	// We cannot allow unaligned ops for unordered load/store, so reject
1489	// anything where the alignment isn't at least the element size.
1490	assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&(static_cast <bool> ((StoreAlign.hasValue() && LoadAlign .hasValue()) && "Expect unordered load/store to have align." ) ? void (0) : __assert_fail ("(StoreAlign.hasValue() && LoadAlign.hasValue()) && \"Expect unordered load/store to have align.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1491, __extension__ __PRETTY_FUNCTION__))
1491	"Expect unordered load/store to have align.")(static_cast <bool> ((StoreAlign.hasValue() && LoadAlign .hasValue()) && "Expect unordered load/store to have align." ) ? void (0) : __assert_fail ("(StoreAlign.hasValue() && LoadAlign.hasValue()) && \"Expect unordered load/store to have align.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 1491, __extension__ __PRETTY_FUNCTION__));
1492	if (StoreAlign.getValue() < StoreSize \|\| LoadAlign.getValue() < StoreSize)
1493	return Changed;
1494
1495	// If the element.atomic memcpy is not lowered into explicit
1496	// loads/stores later, then it will be lowered into an element-size
1497	// specific lib call. If the lib call doesn't exist for our store size, then
1498	// we shouldn't generate the memcpy.
1499	if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1500	return Changed;
1501
1502	// Create the call.
1503	// Note that unordered atomic loads/stores are required by the spec to
1504	// have an alignment but non-atomic loads/stores may not.
1505	NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1506	StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
1507	NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
1508	AATags.NoAlias);
1509	}
1510	NewCall->setDebugLoc(TheStore->getDebugLoc());
1511
1512	if (MSSAU) {
1513	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1514	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1515	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1516	}
1517
1518	LLVM_DEBUG(dbgs() << " Formed new call: " << NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed new call: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << TheLoad << "\n" << " from store ptr=" << StoreEv << " at: " << TheStore << "\n"; } } while (false)
1519	<< " from load ptr=" << LoadEv << " at: " << TheLoaddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed new call: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << TheLoad << "\n" << " from store ptr=" << StoreEv << " at: " << *TheStore << "\n"; } } while (false)
1520	<< "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed new call: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << TheLoad << "\n" << " from store ptr=" << StoreEv << " at: " << *TheStore << "\n"; } } while (false)
1521	<< " from store ptr=" << StoreEv << " at: " << TheStoredo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed new call: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << TheLoad << "\n" << " from store ptr=" << StoreEv << " at: " << *TheStore << "\n"; } } while (false)
1522	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " Formed new call: " << NewCall << "\n" << " from load ptr=" << LoadEv << " at: " << TheLoad << "\n" << " from store ptr=" << StoreEv << " at: " << *TheStore << "\n"; } } while (false);
1523
1524	ORE.emit([&]() {
1525	return OptimizationRemark(DEBUG_TYPE"loop-idiom", "ProcessLoopStoreOfLoopLoad",
1526	NewCall->getDebugLoc(), Preheader)
1527	<< "Formed a call to "
1528	<< ore::NV("NewFunction", NewCall->getCalledFunction())
1529	<< "() intrinsic from " << ore::NV("Inst", InstRemark)
1530	<< " instruction in " << ore::NV("Function", TheStore->getFunction())
1531	<< " function"
1532	<< ore::setExtraArgs()
1533	<< ore::NV("FromBlock", TheStore->getParent()->getName())
1534	<< ore::NV("ToBlock", Preheader->getName());
1535	});
1536
1537	// Okay, a new call to memcpy/memmove has been formed. Zap the original store
1538	// and anything that feeds into it.
1539	if (MSSAU)
1540	MSSAU->removeMemoryAccess(TheStore, true);
1541	deleteDeadInstruction(TheStore);
1542	if (MSSAU && VerifyMemorySSA)
1543	MSSAU->getMemorySSA()->verifyMemorySSA();
1544	if (UseMemMove)
1545	++NumMemMove;
1546	else
1547	++NumMemCpy;
1548	ExpCleaner.markResultUsed();
1549	return true;
1550	}
1551
1552	// When compiling for codesize we avoid idiom recognition for a multi-block loop
1553	// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1554	//
1555	bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1556	bool IsLoopMemset) {
1557	if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1558	if (CurLoop->isOutermost() && (!IsMemset \|\| !IsLoopMemset)) {
1559	LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false)
1560	<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false)
1561	<< " avoided: multi-block top-level loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << " " << CurLoop->getHeader ()->getParent()->getName() << " : LIR " << ( IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n" ; } } while (false);
1562	return true;
1563	}
1564	}
1565
1566	return false;
1567	}
1568
1569	bool LoopIdiomRecognize::runOnNoncountableLoop() {
1570	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1571	<< CurLoop->getHeader()->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1572	<< "] Noncountable Loop %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false)
1573	<< CurLoop->getHeader()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Scanning: F[" << CurLoop->getHeader()->getParent()->getName () << "] Noncountable Loop %" << CurLoop->getHeader ()->getName() << "\n"; } } while (false);
1574
1575	return recognizePopcount() \|\| recognizeAndInsertFFS() \|\|
1576	recognizeShiftUntilBitTest() \|\| recognizeShiftUntilZero();
1577	}
1578
1579	/// Check if the given conditional branch is based on the comparison between
1580	/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1581	/// true), the control yields to the loop entry. If the branch matches the
1582	/// behavior, the variable involved in the comparison is returned. This function
1583	/// will be called to see if the precondition and postcondition of the loop are
1584	/// in desirable form.
1585	static Value matchCondition(BranchInst BI, BasicBlock *LoopEntry,
1586	bool JmpOnZero = false) {
1587	if (!BI \|\| !BI->isConditional())
1588	return nullptr;
1589
1590	ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1591	if (!Cond)
1592	return nullptr;
1593
1594	ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1595	if (!CmpZero \|\| !CmpZero->isZero())
1596	return nullptr;
1597
1598	BasicBlock *TrueSucc = BI->getSuccessor(0);
1599	BasicBlock *FalseSucc = BI->getSuccessor(1);
1600	if (JmpOnZero)
1601	std::swap(TrueSucc, FalseSucc);
1602
1603	ICmpInst::Predicate Pred = Cond->getPredicate();
1604	if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) \|\|
1605	(Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1606	return Cond->getOperand(0);
1607
1608	return nullptr;
1609	}
1610
1611	// Check if the recurrence variable `VarX` is in the right form to create
1612	// the idiom. Returns the value coerced to a PHINode if so.
1613	static PHINode getRecurrenceVar(Value VarX, Instruction *DefX,
1614	BasicBlock *LoopEntry) {
1615	auto *PhiX = dyn_cast<PHINode>(VarX);
1616	if (PhiX && PhiX->getParent() == LoopEntry &&
1617	(PhiX->getOperand(0) == DefX \|\| PhiX->getOperand(1) == DefX))
1618	return PhiX;
1619	return nullptr;
1620	}
1621
1622	/// Return true iff the idiom is detected in the loop.
1623	///
1624	/// Additionally:
1625	/// 1) \p CntInst is set to the instruction counting the population bit.
1626	/// 2) \p CntPhi is set to the corresponding phi node.
1627	/// 3) \p Var is set to the value whose population bits are being counted.
1628	///
1629	/// The core idiom we are trying to detect is:
1630	/// \code
1631	/// if (x0 != 0)
1632	/// goto loop-exit // the precondition of the loop
1633	/// cnt0 = init-val;
1634	/// do {
1635	/// x1 = phi (x0, x2);
1636	/// cnt1 = phi(cnt0, cnt2);
1637	///
1638	/// cnt2 = cnt1 + 1;
1639	/// ...
1640	/// x2 = x1 & (x1 - 1);
1641	/// ...
1642	/// } while(x != 0);
1643	///
1644	/// loop-exit:
1645	/// \endcode
1646	static bool detectPopcountIdiom(Loop CurLoop, BasicBlock PreCondBB,
1647	Instruction &CntInst, PHINode &CntPhi,
1648	Value *&Var) {
1649	// step 1: Check to see if the look-back branch match this pattern:
1650	// "if (a!=0) goto loop-entry".
1651	BasicBlock *LoopEntry;
1652	Instruction DefX2, CountInst;
1653	Value VarX1, VarX0;
1654	PHINode PhiX, CountPhi;
1655
1656	DefX2 = CountInst = nullptr;
1657	VarX1 = VarX0 = nullptr;
1658	PhiX = CountPhi = nullptr;
1659	LoopEntry = *(CurLoop->block_begin());
1660
1661	// step 1: Check if the loop-back branch is in desirable form.
1662	{
1663	if (Value *T = matchCondition(
1664	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1665	DefX2 = dyn_cast<Instruction>(T);
1666	else
1667	return false;
1668	}
1669
1670	// step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1671	{
1672	if (!DefX2 \|\| DefX2->getOpcode() != Instruction::And)
1673	return false;
1674
1675	BinaryOperator *SubOneOp;
1676
1677	if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1678	VarX1 = DefX2->getOperand(1);
1679	else {
1680	VarX1 = DefX2->getOperand(0);
1681	SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1682	}
1683	if (!SubOneOp \|\| SubOneOp->getOperand(0) != VarX1)
1684	return false;
1685
1686	ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1687	if (!Dec \|\|
1688	!((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) \|\|
1689	(SubOneOp->getOpcode() == Instruction::Add &&
1690	Dec->isMinusOne()))) {
1691	return false;
1692	}
1693	}
1694
1695	// step 3: Check the recurrence of variable X
1696	PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1697	if (!PhiX)
1698	return false;
1699
1700	// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1701	{
1702	CountInst = nullptr;
1703	for (Instruction &Inst : llvm::make_range(
1704	LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1705	if (Inst.getOpcode() != Instruction::Add)
1706	continue;
1707
1708	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1709	if (!Inc \|\| !Inc->isOne())
1710	continue;
1711
1712	PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1713	if (!Phi)
1714	continue;
1715
1716	// Check if the result of the instruction is live of the loop.
1717	bool LiveOutLoop = false;
1718	for (User *U : Inst.users()) {
1719	if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1720	LiveOutLoop = true;
1721	break;
1722	}
1723	}
1724
1725	if (LiveOutLoop) {
1726	CountInst = &Inst;
1727	CountPhi = Phi;
1728	break;
1729	}
1730	}
1731
1732	if (!CountInst)
1733	return false;
1734	}
1735
1736	// step 5: check if the precondition is in this form:
1737	// "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1738	{
1739	auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1740	Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1741	if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1742	return false;
1743
1744	CntInst = CountInst;
1745	CntPhi = CountPhi;
1746	Var = T;
1747	}
1748
1749	return true;
1750	}
1751
1752	/// Return true if the idiom is detected in the loop.
1753	///
1754	/// Additionally:
1755	/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1756	/// or nullptr if there is no such.
1757	/// 2) \p CntPhi is set to the corresponding phi node
1758	/// or nullptr if there is no such.
1759	/// 3) \p Var is set to the value whose CTLZ could be used.
1760	/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1761	///
1762	/// The core idiom we are trying to detect is:
1763	/// \code
1764	/// if (x0 == 0)
1765	/// goto loop-exit // the precondition of the loop
1766	/// cnt0 = init-val;
1767	/// do {
1768	/// x = phi (x0, x.next); //PhiX
1769	/// cnt = phi(cnt0, cnt.next);
1770	///
1771	/// cnt.next = cnt + 1;
1772	/// ...
1773	/// x.next = x >> 1; // DefX
1774	/// ...
1775	/// } while(x.next != 0);
1776	///
1777	/// loop-exit:
1778	/// \endcode
1779	static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1780	Intrinsic::ID &IntrinID, Value *&InitX,
1781	Instruction &CntInst, PHINode &CntPhi,
1782	Instruction *&DefX) {
1783	BasicBlock *LoopEntry;
1784	Value *VarX = nullptr;
1785
1786	DefX = nullptr;
1787	CntInst = nullptr;
1788	CntPhi = nullptr;
1789	LoopEntry = *(CurLoop->block_begin());
1790
1791	// step 1: Check if the loop-back branch is in desirable form.
1792	if (Value *T = matchCondition(
1793	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1794	DefX = dyn_cast<Instruction>(T);
1795	else
1796	return false;
1797
1798	// step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1799	if (!DefX \|\| !DefX->isShift())
1800	return false;
1801	IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1802	Intrinsic::ctlz;
1803	ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1804	if (!Shft \|\| !Shft->isOne())
1805	return false;
1806	VarX = DefX->getOperand(0);
1807
1808	// step 3: Check the recurrence of variable X
1809	PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1810	if (!PhiX)
1811	return false;
1812
1813	InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1814
1815	// Make sure the initial value can't be negative otherwise the ashr in the
1816	// loop might never reach zero which would make the loop infinite.
1817	if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1818	return false;
1819
1820	// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1821	// or cnt.next = cnt + -1.
1822	// TODO: We can skip the step. If loop trip count is known (CTLZ),
1823	// then all uses of "cnt.next" could be optimized to the trip count
1824	// plus "cnt0". Currently it is not optimized.
1825	// This step could be used to detect POPCNT instruction:
1826	// cnt.next = cnt + (x.next & 1)
1827	for (Instruction &Inst : llvm::make_range(
1828	LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1829	if (Inst.getOpcode() != Instruction::Add)
1830	continue;
1831
1832	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1833	if (!Inc \|\| (!Inc->isOne() && !Inc->isMinusOne()))
1834	continue;
1835
1836	PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1837	if (!Phi)
1838	continue;
1839
1840	CntInst = &Inst;
1841	CntPhi = Phi;
1842	break;
1843	}
1844	if (!CntInst)
1845	return false;
1846
1847	return true;
1848	}
1849
1850	/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1851	/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1852	/// trip count returns true; otherwise, returns false.
1853	bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1854	// Give up if the loop has multiple blocks or multiple backedges.
1855	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
1856	return false;
1857
1858	Intrinsic::ID IntrinID;
1859	Value *InitX;
1860	Instruction *DefX = nullptr;
1861	PHINode *CntPhi = nullptr;
1862	Instruction *CntInst = nullptr;
1863	// Help decide if transformation is profitable. For ShiftUntilZero idiom,
1864	// this is always 6.
1865	size_t IdiomCanonicalSize = 6;
1866
1867	if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1868	CntInst, CntPhi, DefX))
1869	return false;
1870
1871	bool IsCntPhiUsedOutsideLoop = false;
1872	for (User *U : CntPhi->users())
1873	if (!CurLoop->contains(cast<Instruction>(U))) {
1874	IsCntPhiUsedOutsideLoop = true;
1875	break;
1876	}
1877	bool IsCntInstUsedOutsideLoop = false;
1878	for (User *U : CntInst->users())
1879	if (!CurLoop->contains(cast<Instruction>(U))) {
1880	IsCntInstUsedOutsideLoop = true;
1881	break;
1882	}
1883	// If both CntInst and CntPhi are used outside the loop the profitability
1884	// is questionable.
1885	if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1886	return false;
1887
1888	// For some CPUs result of CTLZ(X) intrinsic is undefined
1889	// when X is 0. If we can not guarantee X != 0, we need to check this
1890	// when expand.
1891	bool ZeroCheck = false;
1892	// It is safe to assume Preheader exist as it was checked in
1893	// parent function RunOnLoop.
1894	BasicBlock *PH = CurLoop->getLoopPreheader();
1895
1896	// If we are using the count instruction outside the loop, make sure we
1897	// have a zero check as a precondition. Without the check the loop would run
1898	// one iteration for before any check of the input value. This means 0 and 1
1899	// would have identical behavior in the original loop and thus
1900	if (!IsCntPhiUsedOutsideLoop) {
1901	auto *PreCondBB = PH->getSinglePredecessor();
1902	if (!PreCondBB)
1903	return false;
1904	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1905	if (!PreCondBI)
1906	return false;
1907	if (matchCondition(PreCondBI, PH) != InitX)
1908	return false;
1909	ZeroCheck = true;
1910	}
1911
1912	// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1913	// profitable if we delete the loop.
1914
1915	// the loop has only 6 instructions:
1916	// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1917	// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1918	// %shr = ashr %n.addr.0, 1
1919	// %tobool = icmp eq %shr, 0
1920	// %inc = add nsw %i.0, 1
1921	// br i1 %tobool
1922
1923	const Value *Args[] = {InitX,
1924	ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
1925
1926	// @llvm.dbg doesn't count as they have no semantic effect.
1927	auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1928	uint32_t HeaderSize =
1929	std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1930
1931	IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1932	InstructionCost Cost =
1933	TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1934	if (HeaderSize != IdiomCanonicalSize &&
1935	Cost > TargetTransformInfo::TCC_Basic)
1936	return false;
1937
1938	transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1939	DefX->getDebugLoc(), ZeroCheck,
1940	IsCntPhiUsedOutsideLoop);
1941	return true;
1942	}
1943
1944	/// Recognizes a population count idiom in a non-countable loop.
1945	///
1946	/// If detected, transforms the relevant code to issue the popcount intrinsic
1947	/// function call, and returns true; otherwise, returns false.
1948	bool LoopIdiomRecognize::recognizePopcount() {
1949	if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
1950	return false;
1951
1952	// Counting population are usually conducted by few arithmetic instructions.
1953	// Such instructions can be easily "absorbed" by vacant slots in a
1954	// non-compact loop. Therefore, recognizing popcount idiom only makes sense
1955	// in a compact loop.
1956
1957	// Give up if the loop has multiple blocks or multiple backedges.
1958	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
1959	return false;
1960
1961	BasicBlock LoopBody = (CurLoop->block_begin());
1962	if (LoopBody->size() >= 20) {
1963	// The loop is too big, bail out.
1964	return false;
1965	}
1966
1967	// It should have a preheader containing nothing but an unconditional branch.
1968	BasicBlock *PH = CurLoop->getLoopPreheader();
1969	if (!PH \|\| &PH->front() != PH->getTerminator())
1970	return false;
1971	auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1972	if (!EntryBI \|\| EntryBI->isConditional())
1973	return false;
1974
1975	// It should have a precondition block where the generated popcount intrinsic
1976	// function can be inserted.
1977	auto *PreCondBB = PH->getSinglePredecessor();
1978	if (!PreCondBB)
1979	return false;
1980	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1981	if (!PreCondBI \|\| PreCondBI->isUnconditional())
1982	return false;
1983
1984	Instruction *CntInst;
1985	PHINode *CntPhi;
1986	Value *Val;
1987	if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1988	return false;
1989
1990	transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1991	return true;
1992	}
1993
1994	static CallInst createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value Val,
1995	const DebugLoc &DL) {
1996	Value *Ops[] = {Val};
1997	Type *Tys[] = {Val->getType()};
1998
1999	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
2000	Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
2001	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
2002	CI->setDebugLoc(DL);
2003
2004	return CI;
2005	}
2006
2007	static CallInst createFFSIntrinsic(IRBuilder<> &IRBuilder, Value Val,
2008	const DebugLoc &DL, bool ZeroCheck,
2009	Intrinsic::ID IID) {
2010	Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
2011	Type *Tys[] = {Val->getType()};
2012
2013	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
2014	Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
2015	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
2016	CI->setDebugLoc(DL);
2017
2018	return CI;
2019	}
2020
2021	/// Transform the following loop (Using CTLZ, CTTZ is similar):
2022	/// loop:
2023	/// CntPhi = PHI [Cnt0, CntInst]
2024	/// PhiX = PHI [InitX, DefX]
2025	/// CntInst = CntPhi + 1
2026	/// DefX = PhiX >> 1
2027	/// LOOP_BODY
2028	/// Br: loop if (DefX != 0)
2029	/// Use(CntPhi) or Use(CntInst)
2030	///
2031	/// Into:
2032	/// If CntPhi used outside the loop:
2033	/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
2034	/// Count = CountPrev + 1
2035	/// else
2036	/// Count = BitWidth(InitX) - CTLZ(InitX)
2037	/// loop:
2038	/// CntPhi = PHI [Cnt0, CntInst]
2039	/// PhiX = PHI [InitX, DefX]
2040	/// PhiCount = PHI [Count, Dec]
2041	/// CntInst = CntPhi + 1
2042	/// DefX = PhiX >> 1
2043	/// Dec = PhiCount - 1
2044	/// LOOP_BODY
2045	/// Br: loop if (Dec != 0)
2046	/// Use(CountPrev + Cnt0) // Use(CntPhi)
2047	/// or
2048	/// Use(Count + Cnt0) // Use(CntInst)
2049	///
2050	/// If LOOP_BODY is empty the loop will be deleted.
2051	/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
2052	void LoopIdiomRecognize::transformLoopToCountable(
2053	Intrinsic::ID IntrinID, BasicBlock Preheader, Instruction CntInst,
2054	PHINode CntPhi, Value InitX, Instruction *DefX, const DebugLoc &DL,
2055	bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
2056	BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
2057
2058	// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
2059	IRBuilder<> Builder(PreheaderBr);
2060	Builder.SetCurrentDebugLocation(DL);
2061
2062	// If there are no uses of CntPhi crate:
2063	// Count = BitWidth - CTLZ(InitX);
2064	// NewCount = Count;
2065	// If there are uses of CntPhi create:
2066	// NewCount = BitWidth - CTLZ(InitX >> 1);
2067	// Count = NewCount + 1;
2068	Value *InitXNext;
2069	if (IsCntPhiUsedOutsideLoop) {
2070	if (DefX->getOpcode() == Instruction::AShr)
2071	InitXNext = Builder.CreateAShr(InitX, 1);
2072	else if (DefX->getOpcode() == Instruction::LShr)
2073	InitXNext = Builder.CreateLShr(InitX, 1);
2074	else if (DefX->getOpcode() == Instruction::Shl) // cttz
2075	InitXNext = Builder.CreateShl(InitX, 1);
2076	else
2077	llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp" , 2077);
2078	} else
2079	InitXNext = InitX;
2080	Value *Count =
2081	createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
2082	Type *CountTy = Count->getType();
2083	Count = Builder.CreateSub(
2084	ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2085	Value *NewCount = Count;
2086	if (IsCntPhiUsedOutsideLoop)
2087	Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2088
2089	NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2090
2091	Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
2092	if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
2093	// If the counter was being incremented in the loop, add NewCount to the
2094	// counter's initial value, but only if the initial value is not zero.
2095	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2096	if (!InitConst \|\| !InitConst->isZero())
2097	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2098	} else {
2099	// If the count was being decremented in the loop, subtract NewCount from
2100	// the counter's initial value.
2101	NewCount = Builder.CreateSub(CntInitVal, NewCount);
2102	}
2103
2104	// Step 2: Insert new IV and loop condition:
2105	// loop:
2106	// ...
2107	// PhiCount = PHI [Count, Dec]
2108	// ...
2109	// Dec = PhiCount - 1
2110	// ...
2111	// Br: loop if (Dec != 0)
2112	BasicBlock Body = (CurLoop->block_begin());
2113	auto *LbBr = cast<BranchInst>(Body->getTerminator());
2114	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2115
2116	PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front());
2117
2118	Builder.SetInsertPoint(LbCond);
2119	Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2120	TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2121
2122	TcPhi->addIncoming(Count, Preheader);
2123	TcPhi->addIncoming(TcDec, Body);
2124
2125	CmpInst::Predicate Pred =
2126	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
2127	LbCond->setPredicate(Pred);
2128	LbCond->setOperand(0, TcDec);
2129	LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2130
2131	// Step 3: All the references to the original counter outside
2132	// the loop are replaced with the NewCount
2133	if (IsCntPhiUsedOutsideLoop)
2134	CntPhi->replaceUsesOutsideBlock(NewCount, Body);
2135	else
2136	CntInst->replaceUsesOutsideBlock(NewCount, Body);
2137
2138	// step 4: Forget the "non-computable" trip-count SCEV associated with the
2139	// loop. The loop would otherwise not be deleted even if it becomes empty.
2140	SE->forgetLoop(CurLoop);
2141	}
2142
2143	void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2144	Instruction *CntInst,
2145	PHINode CntPhi, Value Var) {
2146	BasicBlock *PreHead = CurLoop->getLoopPreheader();
2147	auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
2148	const DebugLoc &DL = CntInst->getDebugLoc();
2149
2150	// Assuming before transformation, the loop is following:
2151	// if (x) // the precondition
2152	// do { cnt++; x &= x - 1; } while(x);
2153
2154	// Step 1: Insert the ctpop instruction at the end of the precondition block
2155	IRBuilder<> Builder(PreCondBr);
2156	Value PopCnt, PopCntZext, NewCount, TripCnt;
2157	{
2158	PopCnt = createPopcntIntrinsic(Builder, Var, DL);
2159	NewCount = PopCntZext =
2160	Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
2161
2162	if (NewCount != PopCnt)
2163	(cast<Instruction>(NewCount))->setDebugLoc(DL);
2164
2165	// TripCnt is exactly the number of iterations the loop has
2166	TripCnt = NewCount;
2167
2168	// If the population counter's initial value is not zero, insert Add Inst.
2169	Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
2170	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2171	if (!InitConst \|\| !InitConst->isZero()) {
2172	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2173	(cast<Instruction>(NewCount))->setDebugLoc(DL);
2174	}
2175	}
2176
2177	// Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
2178	// "if (NewCount == 0) loop-exit". Without this change, the intrinsic
2179	// function would be partial dead code, and downstream passes will drag
2180	// it back from the precondition block to the preheader.
2181	{
2182	ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2183
2184	Value *Opnd0 = PopCntZext;
2185	Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2186	if (PreCond->getOperand(0) != Var)
2187	std::swap(Opnd0, Opnd1);
2188
2189	ICmpInst *NewPreCond = cast<ICmpInst>(
2190	Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2191	PreCondBr->setCondition(NewPreCond);
2192
2193	RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
2194	}
2195
2196	// Step 3: Note that the population count is exactly the trip count of the
2197	// loop in question, which enable us to convert the loop from noncountable
2198	// loop into a countable one. The benefit is twofold:
2199	//
2200	// - If the loop only counts population, the entire loop becomes dead after
2201	// the transformation. It is a lot easier to prove a countable loop dead
2202	// than to prove a noncountable one. (In some C dialects, an infinite loop
2203	// isn't dead even if it computes nothing useful. In general, DCE needs
2204	// to prove a noncountable loop finite before safely delete it.)
2205	//
2206	// - If the loop also performs something else, it remains alive.
2207	// Since it is transformed to countable form, it can be aggressively
2208	// optimized by some optimizations which are in general not applicable
2209	// to a noncountable loop.
2210	//
2211	// After this step, this loop (conceptually) would look like following:
2212	// newcnt = __builtin_ctpop(x);
2213	// t = newcnt;
2214	// if (x)
2215	// do { cnt++; x &= x-1; t--) } while (t > 0);
2216	BasicBlock Body = (CurLoop->block_begin());
2217	{
2218	auto *LbBr = cast<BranchInst>(Body->getTerminator());
2219	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2220	Type *Ty = TripCnt->getType();
2221
2222	PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
2223
2224	Builder.SetInsertPoint(LbCond);
2225	Instruction *TcDec = cast<Instruction>(
2226	Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2227	"tcdec", false, true));
2228
2229	TcPhi->addIncoming(TripCnt, PreHead);
2230	TcPhi->addIncoming(TcDec, Body);
2231
2232	CmpInst::Predicate Pred =
2233	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
2234	LbCond->setPredicate(Pred);
2235	LbCond->setOperand(0, TcDec);
2236	LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2237	}
2238
2239	// Step 4: All the references to the original population counter outside
2240	// the loop are replaced with the NewCount -- the value returned from
2241	// __builtin_ctpop().
2242	CntInst->replaceUsesOutsideBlock(NewCount, Body);
2243
2244	// step 5: Forget the "non-computable" trip-count SCEV associated with the
2245	// loop. The loop would otherwise not be deleted even if it becomes empty.
2246	SE->forgetLoop(CurLoop);
2247	}
2248
2249	/// Match loop-invariant value.
2250	template <typename SubPattern_t> struct match_LoopInvariant {
2251	SubPattern_t SubPattern;
2252	const Loop *L;
2253
2254	match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
2255	: SubPattern(SP), L(L) {}
2256
2257	template <typename ITy> bool match(ITy *V) {
2258	return L->isLoopInvariant(V) && SubPattern.match(V);
2259	}
2260	};
2261
2262	/// Matches if the value is loop-invariant.
2263	template <typename Ty>
2264	inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
2265	return match_LoopInvariant<Ty>(M, L);
2266	}
2267
2268	/// Return true if the idiom is detected in the loop.
2269	///
2270	/// The core idiom we are trying to detect is:
2271	/// \code
2272	/// entry:
2273	/// <...>
2274	/// %bitmask = shl i32 1, %bitpos
2275	/// br label %loop
2276	///
2277	/// loop:
2278	/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2279	/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2280	/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2281	/// %x.next = shl i32 %x.curr, 1
2282	/// <...>
2283	/// br i1 %x.curr.isbitunset, label %loop, label %end
2284	///
2285	/// end:
2286	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2287	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2288	/// <...>
2289	/// \endcode
2290	static bool detectShiftUntilBitTestIdiom(Loop CurLoop, Value &BaseX,
2291	Value &BitMask, Value &BitPos,
2292	Value &CurrX, Instruction &NextX) {
2293	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Performing shift-until-bittest idiom detection.\n" ; } } while (false)
2294	" Performing shift-until-bittest idiom detection.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Performing shift-until-bittest idiom detection.\n" ; } } while (false);
2295
2296	// Give up if the loop has multiple blocks or multiple backedges.
2297	if (CurLoop->getNumBlocks() != 1 \|\| CurLoop->getNumBackEdges() != 1) {
2298	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad block/backedge count.\n" ; } } while (false);
2299	return false;
2300	}
2301
2302	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2303	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2304	assert(LoopPreheaderBB && "There is always a loop preheader.")(static_cast <bool> (LoopPreheaderBB && "There is always a loop preheader." ) ? void (0) : __assert_fail ("LoopPreheaderBB && \"There is always a loop preheader.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2304, __extension__ __PRETTY_FUNCTION__));
2305
2306	using namespace PatternMatch;
2307
2308	// Step 1: Check if the loop backedge is in desirable form.
2309
2310	ICmpInst::Predicate Pred;
2311	Value CmpLHS, CmpRHS;
2312	BasicBlock TrueBB, FalseBB;
2313	if (!match(LoopHeaderBB->getTerminator(),
2314	m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
2315	m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
2316	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad backedge structure.\n" ; } } while (false);
2317	return false;
2318	}
2319
2320	// Step 2: Check if the backedge's condition is in desirable form.
2321
2322	auto MatchVariableBitMask = [&]() {
2323	return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2324	match(CmpLHS,
2325	m_c_And(m_Value(CurrX),
2326	m_CombineAnd(
2327	m_Value(BitMask),
2328	m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
2329	CurLoop))));
2330	};
2331	auto MatchConstantBitMask = [&]() {
2332	return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2333	match(CmpLHS, m_And(m_Value(CurrX),
2334	m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
2335	(BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
2336	};
2337	auto MatchDecomposableConstantBitMask = [&]() {
2338	APInt Mask;
2339	return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
2340	ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
2341	(BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
2342	(BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
2343	};
2344
2345	if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2346	!MatchDecomposableConstantBitMask()) {
2347	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad backedge comparison.\n" ; } } while (false);
2348	return false;
2349	}
2350
2351	// Step 3: Check if the recurrence is in desirable form.
2352	auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2353	if (!CurrXPN \|\| CurrXPN->getParent() != LoopHeaderBB) {
2354	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Not an expected PHI node.\n" ; } } while (false);
2355	return false;
2356	}
2357
2358	BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2359	NextX =
2360	dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2361
2362	assert(CurLoop->isLoopInvariant(BaseX) &&(static_cast <bool> (CurLoop->isLoopInvariant(BaseX) && "Expected BaseX to be avaliable in the preheader!" ) ? void (0) : __assert_fail ("CurLoop->isLoopInvariant(BaseX) && \"Expected BaseX to be avaliable in the preheader!\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2363, __extension__ __PRETTY_FUNCTION__))
2363	"Expected BaseX to be avaliable in the preheader!")(static_cast <bool> (CurLoop->isLoopInvariant(BaseX) && "Expected BaseX to be avaliable in the preheader!" ) ? void (0) : __assert_fail ("CurLoop->isLoopInvariant(BaseX) && \"Expected BaseX to be avaliable in the preheader!\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2363, __extension__ __PRETTY_FUNCTION__));
2364
2365	if (!NextX \|\| !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2366	// FIXME: support right-shift?
2367	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad recurrence.\n" ; } } while (false);
2368	return false;
2369	}
2370
2371	// Step 4: Check if the backedge's destinations are in desirable form.
2372
2373	assert(ICmpInst::isEquality(Pred) &&(static_cast <bool> (ICmpInst::isEquality(Pred) && "Should only get equality predicates here.") ? void (0) : __assert_fail ("ICmpInst::isEquality(Pred) && \"Should only get equality predicates here.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2374, __extension__ __PRETTY_FUNCTION__))
2374	"Should only get equality predicates here.")(static_cast <bool> (ICmpInst::isEquality(Pred) && "Should only get equality predicates here.") ? void (0) : __assert_fail ("ICmpInst::isEquality(Pred) && \"Should only get equality predicates here.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2374, __extension__ __PRETTY_FUNCTION__));
2375
2376	// cmp-br is commutative, so canonicalize to a single variant.
2377	if (Pred != ICmpInst::Predicate::ICMP_EQ) {
2378	Pred = ICmpInst::getInversePredicate(Pred);
2379	std::swap(TrueBB, FalseBB);
2380	}
2381
2382	// We expect to exit loop when comparison yields false,
2383	// so when it yields true we should branch back to loop header.
2384	if (TrueBB != LoopHeaderBB) {
2385	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad backedge flow.\n" ; } } while (false);
2386	return false;
2387	}
2388
2389	// Okay, idiom checks out.
2390	return true;
2391	}
2392
2393	/// Look for the following loop:
2394	/// \code
2395	/// entry:
2396	/// <...>
2397	/// %bitmask = shl i32 1, %bitpos
2398	/// br label %loop
2399	///
2400	/// loop:
2401	/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2402	/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2403	/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2404	/// %x.next = shl i32 %x.curr, 1
2405	/// <...>
2406	/// br i1 %x.curr.isbitunset, label %loop, label %end
2407	///
2408	/// end:
2409	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2410	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2411	/// <...>
2412	/// \endcode
2413	///
2414	/// And transform it into:
2415	/// \code
2416	/// entry:
2417	/// %bitmask = shl i32 1, %bitpos
2418	/// %lowbitmask = add i32 %bitmask, -1
2419	/// %mask = or i32 %lowbitmask, %bitmask
2420	/// %x.masked = and i32 %x, %mask
2421	/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
2422	/// i1 true)
2423	/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
2424	/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
2425	/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
2426	/// %tripcount = add i32 %backedgetakencount, 1
2427	/// %x.curr = shl i32 %x, %backedgetakencount
2428	/// %x.next = shl i32 %x, %tripcount
2429	/// br label %loop
2430	///
2431	/// loop:
2432	/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
2433	/// %loop.iv.next = add nuw i32 %loop.iv, 1
2434	/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
2435	/// <...>
2436	/// br i1 %loop.ivcheck, label %end, label %loop
2437	///
2438	/// end:
2439	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2440	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2441	/// <...>
2442	/// \endcode
2443	bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2444	bool MadeChange = false;
2445
2446	Value X, BitMask, BitPos, XCurr;
2447	Instruction *XNext;
2448	if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
2449	XNext)) {
2450	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-bittest idiom detection failed.\n" ; } } while (false)
2451	" shift-until-bittest idiom detection failed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-bittest idiom detection failed.\n" ; } } while (false);
2452	return MadeChange;
2453	}
2454	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-bittest idiom detected!\n" ; } } while (false);
2455
2456	// Ok, it is the idiom we were looking for, we could transform this loop,
2457	// but is it profitable to transform?
2458
2459	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2460	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2461	assert(LoopPreheaderBB && "There is always a loop preheader.")(static_cast <bool> (LoopPreheaderBB && "There is always a loop preheader." ) ? void (0) : __assert_fail ("LoopPreheaderBB && \"There is always a loop preheader.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2461, __extension__ __PRETTY_FUNCTION__));
2462
2463	BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2464	assert(SuccessorBB && "There is only a single successor.")(static_cast <bool> (SuccessorBB && "There is only a single successor." ) ? void (0) : __assert_fail ("SuccessorBB && \"There is only a single successor.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2464, __extension__ __PRETTY_FUNCTION__));
2465
2466	IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2467	Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
2468
2469	Intrinsic::ID IntrID = Intrinsic::ctlz;
2470	Type *Ty = X->getType();
2471	unsigned Bitwidth = Ty->getScalarSizeInBits();
2472
2473	TargetTransformInfo::TargetCostKind CostKind =
2474	TargetTransformInfo::TCK_SizeAndLatency;
2475
2476	// The rewrite is considered to be unprofitable iff and only iff the
2477	// intrinsic/shift we'll use are not cheap. Note that we are okay with just
2478	// making the loop countable, even if nothing else changes.
2479	IntrinsicCostAttributes Attrs(
2480	IntrID, Ty, {UndefValue::get(Ty), /is_zero_undef=/Builder.getTrue()});
2481	InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
2482	if (Cost > TargetTransformInfo::TCC_Basic) {
2483	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Intrinsic is too costly, not beneficial\n" ; } } while (false)
2484	" Intrinsic is too costly, not beneficial\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Intrinsic is too costly, not beneficial\n" ; } } while (false);
2485	return MadeChange;
2486	}
2487	if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
2488	TargetTransformInfo::TCC_Basic) {
2489	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Shift is too costly, not beneficial\n" ; } } while (false);
2490	return MadeChange;
2491	}
2492
2493	// Ok, transform appears worthwhile.
2494	MadeChange = true;
2495
2496	// Step 1: Compute the loop trip count.
2497
2498	Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
2499	BitPos->getName() + ".lowbitmask");
2500	Value *Mask =
2501	Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
2502	Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
2503	CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2504	IntrID, Ty, {XMasked, /is_zero_undef=/Builder.getTrue()},
2505	/FMFSource=/nullptr, XMasked->getName() + ".numleadingzeros");
2506	Value *XMaskedNumActiveBits = Builder.CreateSub(
2507	ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
2508	XMasked->getName() + ".numactivebits", /HasNUW=/true,
2509	/HasNSW=/Bitwidth != 2);
2510	Value *XMaskedLeadingOnePos =
2511	Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
2512	XMasked->getName() + ".leadingonepos", /HasNUW=/false,
2513	/HasNSW=/Bitwidth > 2);
2514
2515	Value *LoopBackedgeTakenCount = Builder.CreateSub(
2516	BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
2517	/HasNUW=/true, /HasNSW=/true);
2518	// We know loop's backedge-taken count, but what's loop's trip count?
2519	// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2520	Value *LoopTripCount =
2521	Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2522	CurLoop->getName() + ".tripcount", /HasNUW=/true,
2523	/HasNSW=/Bitwidth != 2);
2524
2525	// Step 2: Compute the recurrence's final value without a loop.
2526
2527	// NewX is always safe to compute, because `LoopBackedgeTakenCount`
2528	// will always be smaller than `bitwidth(X)`, i.e. we never get poison.
2529	Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
2530	NewX->takeName(XCurr);
2531	if (auto *I = dyn_cast<Instruction>(NewX))
2532	I->copyIRFlags(XNext, /IncludeWrapFlags=/true);
2533
2534	Value *NewXNext;
2535	// Rewriting XNext is more complicated, however, because `X << LoopTripCount`
2536	// will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
2537	// iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
2538	// that isn't the case, we'll need to emit an alternative, safe IR.
2539	if (XNext->hasNoSignedWrap() \|\| XNext->hasNoUnsignedWrap() \|\|
2540	PatternMatch::match(
2541	BitPos, PatternMatch::m_SpecificInt_ICMP(
2542	ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
2543	Ty->getScalarSizeInBits() - 1))))
2544	NewXNext = Builder.CreateShl(X, LoopTripCount);
2545	else {
2546	// Otherwise, just additionally shift by one. It's the smallest solution,
2547	// alternatively, we could check that NewX is INT_MIN (or BitPos is )
2548	// and select 0 instead.
2549	NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2550	}
2551
2552	NewXNext->takeName(XNext);
2553	if (auto *I = dyn_cast<Instruction>(NewXNext))
2554	I->copyIRFlags(XNext, /IncludeWrapFlags=/true);
2555
2556	// Step 3: Adjust the successor basic block to recieve the computed
2557	// recurrence's final value instead of the recurrence itself.
2558
2559	XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
2560	XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
2561
2562	// Step 4: Rewrite the loop into a countable form, with canonical IV.
2563
2564	// The new canonical induction variable.
2565	Builder.SetInsertPoint(&LoopHeaderBB->front());
2566	auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2567
2568	// The induction itself.
2569	// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2570	Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2571	auto *IVNext =
2572	Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
2573	/HasNUW=/true, /HasNSW=/Bitwidth != 2);
2574
2575	// The loop trip count check.
2576	auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2577	CurLoop->getName() + ".ivcheck");
2578	Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2579	LoopHeaderBB->getTerminator()->eraseFromParent();
2580
2581	// Populate the IV PHI.
2582	IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2583	IV->addIncoming(IVNext, LoopHeaderBB);
2584
2585	// Step 5: Forget the "non-computable" trip-count SCEV associated with the
2586	// loop. The loop would otherwise not be deleted even if it becomes empty.
2587
2588	SE->forgetLoop(CurLoop);
2589
2590	// Other passes will take care of actually deleting the loop if possible.
2591
2592	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-bittest idiom optimized!\n" ; } } while (false);
2593
2594	++NumShiftUntilBitTest;
2595	return MadeChange;
2596	}
2597
2598	/// Return true if the idiom is detected in the loop.
2599	///
2600	/// The core idiom we are trying to detect is:
2601	/// \code
2602	/// entry:
2603	/// <...>
2604	/// %start = <...>
2605	/// %extraoffset = <...>
2606	/// <...>
2607	/// br label %for.cond
2608	///
2609	/// loop:
2610	/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2611	/// %nbits = add nsw i8 %iv, %extraoffset
2612	/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2613	/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2614	/// %iv.next = add i8 %iv, 1
2615	/// <...>
2616	/// br i1 %val.shifted.iszero, label %end, label %loop
2617	///
2618	/// end:
2619	/// %iv.res = phi i8 [ %iv, %loop ] <...>
2620	/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2621	/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2622	/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2623	/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2624	/// <...>
2625	/// \endcode
2626	static bool detectShiftUntilZeroIdiom(Loop CurLoop, ScalarEvolution SE,
2627	Instruction *&ValShiftedIsZero,
2628	Intrinsic::ID &IntrinID, Instruction *&IV,
2629	Value &Start, Value &Val,
2630	const SCEV *&ExtraOffsetExpr,
2631	bool &InvertedCond) {
2632	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Performing shift-until-zero idiom detection.\n" ; } } while (false)
2633	" Performing shift-until-zero idiom detection.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Performing shift-until-zero idiom detection.\n" ; } } while (false);
2634
2635	// Give up if the loop has multiple blocks or multiple backedges.
2636	if (CurLoop->getNumBlocks() != 1 \|\| CurLoop->getNumBackEdges() != 1) {
2637	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad block/backedge count.\n" ; } } while (false);
2638	return false;
2639	}
2640
2641	Instruction ValShifted, NBits, *IVNext;
2642	Value *ExtraOffset;
2643
2644	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2645	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2646	assert(LoopPreheaderBB && "There is always a loop preheader.")(static_cast <bool> (LoopPreheaderBB && "There is always a loop preheader." ) ? void (0) : __assert_fail ("LoopPreheaderBB && \"There is always a loop preheader.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2646, __extension__ __PRETTY_FUNCTION__));
2647
2648	using namespace PatternMatch;
2649
2650	// Step 1: Check if the loop backedge, condition is in desirable form.
2651
2652	ICmpInst::Predicate Pred;
2653	BasicBlock TrueBB, FalseBB;
2654	if (!match(LoopHeaderBB->getTerminator(),
2655	m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
2656	m_BasicBlock(FalseBB))) \|\|
2657	!match(ValShiftedIsZero,
2658	m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) \|\|
2659	!ICmpInst::isEquality(Pred)) {
2660	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad backedge structure.\n" ; } } while (false);
2661	return false;
2662	}
2663
2664	// Step 2: Check if the comparison's operand is in desirable form.
2665	// FIXME: Val could be a one-input PHI node, which we should look past.
2666	if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
2667	m_Instruction(NBits)))) {
2668	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad comparisons value computation.\n" ; } } while (false);
2669	return false;
2670	}
2671	IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
2672	: Intrinsic::ctlz;
2673
2674	// Step 3: Check if the shift amount is in desirable form.
2675
2676	if (match(NBits, m_c_Add(m_Instruction(IV),
2677	m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2678	(NBits->hasNoSignedWrap() \|\| NBits->hasNoUnsignedWrap()))
2679	ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
2680	else if (match(NBits,
2681	m_Sub(m_Instruction(IV),
2682	m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2683	NBits->hasNoSignedWrap())
2684	ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
2685	else {
2686	IV = NBits;
2687	ExtraOffsetExpr = SE->getZero(NBits->getType());
2688	}
2689
2690	// Step 4: Check if the recurrence is in desirable form.
2691	auto *IVPN = dyn_cast<PHINode>(IV);
2692	if (!IVPN \|\| IVPN->getParent() != LoopHeaderBB) {
2693	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Not an expected PHI node.\n" ; } } while (false);
2694	return false;
2695	}
2696
2697	Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2698	IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2699
2700	if (!IVNext \|\| !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
2701	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad recurrence.\n" ; } } while (false);
2702	return false;
2703	}
2704
2705	// Step 4: Check if the backedge's destinations are in desirable form.
2706
2707	assert(ICmpInst::isEquality(Pred) &&(static_cast <bool> (ICmpInst::isEquality(Pred) && "Should only get equality predicates here.") ? void (0) : __assert_fail ("ICmpInst::isEquality(Pred) && \"Should only get equality predicates here.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2708, __extension__ __PRETTY_FUNCTION__))
2708	"Should only get equality predicates here.")(static_cast <bool> (ICmpInst::isEquality(Pred) && "Should only get equality predicates here.") ? void (0) : __assert_fail ("ICmpInst::isEquality(Pred) && \"Should only get equality predicates here.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2708, __extension__ __PRETTY_FUNCTION__));
2709
2710	// cmp-br is commutative, so canonicalize to a single variant.
2711	InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
2712	if (InvertedCond) {
2713	Pred = ICmpInst::getInversePredicate(Pred);
	Value stored to 'Pred' is never read
2714	std::swap(TrueBB, FalseBB);
2715	}
2716
2717	// We expect to exit loop when comparison yields true,
2718	// so when it yields false we should branch back to loop header.
2719	if (FalseBB != LoopHeaderBB) {
2720	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Bad backedge flow.\n" ; } } while (false);
2721	return false;
2722	}
2723
2724	// The new, countable, loop will certainly only run a known number of
2725	// iterations, It won't be infinite. But the old loop might be infinite
2726	// under certain conditions. For logical shifts, the value will become zero
2727	// after at most bitwidth(%Val) loop iterations. However, for arithmetic
2728	// right-shift, iff the sign bit was set, the value will never become zero,
2729	// and the loop may never finish.
2730	if (ValShifted->getOpcode() == Instruction::AShr &&
2731	!isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
2732	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Can not prove the loop is finite.\n" ; } } while (false);
2733	return false;
2734	}
2735
2736	// Okay, idiom checks out.
2737	return true;
2738	}
2739
2740	/// Look for the following loop:
2741	/// \code
2742	/// entry:
2743	/// <...>
2744	/// %start = <...>
2745	/// %extraoffset = <...>
2746	/// <...>
2747	/// br label %for.cond
2748	///
2749	/// loop:
2750	/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2751	/// %nbits = add nsw i8 %iv, %extraoffset
2752	/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2753	/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2754	/// %iv.next = add i8 %iv, 1
2755	/// <...>
2756	/// br i1 %val.shifted.iszero, label %end, label %loop
2757	///
2758	/// end:
2759	/// %iv.res = phi i8 [ %iv, %loop ] <...>
2760	/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2761	/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2762	/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2763	/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2764	/// <...>
2765	/// \endcode
2766	///
2767	/// And transform it into:
2768	/// \code
2769	/// entry:
2770	/// <...>
2771	/// %start = <...>
2772	/// %extraoffset = <...>
2773	/// <...>
2774	/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
2775	/// %val.numactivebits = sub i8 8, %val.numleadingzeros
2776	/// %extraoffset.neg = sub i8 0, %extraoffset
2777	/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
2778	/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
2779	/// %loop.tripcount = sub i8 %iv.final, %start
2780	/// br label %loop
2781	///
2782	/// loop:
2783	/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
2784	/// %loop.iv.next = add i8 %loop.iv, 1
2785	/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
2786	/// %iv = add i8 %loop.iv, %start
2787	/// <...>
2788	/// br i1 %loop.ivcheck, label %end, label %loop
2789	///
2790	/// end:
2791	/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
2792	/// <...>
2793	/// \endcode
2794	bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2795	bool MadeChange = false;
2796
2797	Instruction *ValShiftedIsZero;
2798	Intrinsic::ID IntrID;
2799	Instruction *IV;
2800	Value Start, Val;
2801	const SCEV *ExtraOffsetExpr;
2802	bool InvertedCond;
2803	if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
2804	Start, Val, ExtraOffsetExpr, InvertedCond)) {
2805	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-zero idiom detection failed.\n" ; } } while (false)
2806	" shift-until-zero idiom detection failed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-zero idiom detection failed.\n" ; } } while (false);
2807	return MadeChange;
2808	}
2809	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-zero idiom detected!\n" ; } } while (false);
2810
2811	// Ok, it is the idiom we were looking for, we could transform this loop,
2812	// but is it profitable to transform?
2813
2814	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2815	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2816	assert(LoopPreheaderBB && "There is always a loop preheader.")(static_cast <bool> (LoopPreheaderBB && "There is always a loop preheader." ) ? void (0) : __assert_fail ("LoopPreheaderBB && \"There is always a loop preheader.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2816, __extension__ __PRETTY_FUNCTION__));
2817
2818	BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2819	assert(SuccessorBB && "There is only a single successor.")(static_cast <bool> (SuccessorBB && "There is only a single successor." ) ? void (0) : __assert_fail ("SuccessorBB && \"There is only a single successor.\"" , "llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp", 2819, __extension__ __PRETTY_FUNCTION__));
2820
2821	IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2822	Builder.SetCurrentDebugLocation(IV->getDebugLoc());
2823
2824	Type *Ty = Val->getType();
2825	unsigned Bitwidth = Ty->getScalarSizeInBits();
2826
2827	TargetTransformInfo::TargetCostKind CostKind =
2828	TargetTransformInfo::TCK_SizeAndLatency;
2829
2830	// The rewrite is considered to be unprofitable iff and only iff the
2831	// intrinsic we'll use are not cheap. Note that we are okay with just
2832	// making the loop countable, even if nothing else changes.
2833	IntrinsicCostAttributes Attrs(
2834	IntrID, Ty, {UndefValue::get(Ty), /is_zero_undef=/Builder.getFalse()});
2835	InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
2836	if (Cost > TargetTransformInfo::TCC_Basic) {
2837	LLVM_DEBUG(dbgs() << DEBUG_TYPEdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Intrinsic is too costly, not beneficial\n" ; } } while (false)
2838	" Intrinsic is too costly, not beneficial\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " Intrinsic is too costly, not beneficial\n" ; } } while (false);
2839	return MadeChange;
2840	}
2841
2842	// Ok, transform appears worthwhile.
2843	MadeChange = true;
2844
2845	bool OffsetIsZero = false;
2846	if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2847	OffsetIsZero = ExtraOffsetExprC->isZero();
2848
2849	// Step 1: Compute the loop's final IV value / trip count.
2850
2851	CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
2852	IntrID, Ty, {Val, /is_zero_undef=/Builder.getFalse()},
2853	/FMFSource=/nullptr, Val->getName() + ".numleadingzeros");
2854	Value *ValNumActiveBits = Builder.CreateSub(
2855	ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
2856	Val->getName() + ".numactivebits", /HasNUW=/true,
2857	/HasNSW=/Bitwidth != 2);
2858
2859	SCEVExpander Expander(SE, DL, "loop-idiom");
2860	Expander.setInsertPoint(&*Builder.GetInsertPoint());
2861	Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
2862
2863	Value *ValNumActiveBitsOffset = Builder.CreateAdd(
2864	ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
2865	/HasNUW=/OffsetIsZero, /HasNSW=/true);
2866	Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
2867	{ValNumActiveBitsOffset, Start},
2868	/FMFSource=/nullptr, "iv.final");
2869
2870	auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
2871	IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
2872	/HasNUW=/OffsetIsZero, /HasNSW=/true));
2873	// FIXME: or when the offset was `add nuw`
2874
2875	// We know loop's backedge-taken count, but what's loop's trip count?
2876	Value *LoopTripCount =
2877	Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2878	CurLoop->getName() + ".tripcount", /HasNUW=/true,
2879	/HasNSW=/Bitwidth != 2);
2880
2881	// Step 2: Adjust the successor basic block to recieve the original
2882	// induction variable's final value instead of the orig. IV itself.
2883
2884	IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
2885
2886	// Step 3: Rewrite the loop into a countable form, with canonical IV.
2887
2888	// The new canonical induction variable.
2889	Builder.SetInsertPoint(&LoopHeaderBB->front());
2890	auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2891
2892	// The induction itself.
2893	Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI());
2894	auto *CIVNext =
2895	Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
2896	/HasNUW=/true, /HasNSW=/Bitwidth != 2);
2897
2898	// The loop trip count check.
2899	auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
2900	CurLoop->getName() + ".ivcheck");
2901	auto *NewIVCheck = CIVCheck;
2902	if (InvertedCond) {
2903	NewIVCheck = Builder.CreateNot(CIVCheck);
2904	NewIVCheck->takeName(ValShiftedIsZero);
2905	}
2906
2907	// The original IV, but rebased to be an offset to the CIV.
2908	auto IVDePHId = Builder.CreateAdd(CIV, Start, "", /HasNUW=*/false,
2909	/HasNSW=/true); // FIXME: what about NUW?
2910	IVDePHId->takeName(IV);
2911
2912	// The loop terminator.
2913	Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2914	Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
2915	LoopHeaderBB->getTerminator()->eraseFromParent();
2916
2917	// Populate the IV PHI.
2918	CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2919	CIV->addIncoming(CIVNext, LoopHeaderBB);
2920
2921	// Step 4: Forget the "non-computable" trip-count SCEV associated with the
2922	// loop. The loop would otherwise not be deleted even if it becomes empty.
2923
2924	SE->forgetLoop(CurLoop);
2925
2926	// Step 5: Try to cleanup the loop's body somewhat.
2927	IV->replaceAllUsesWith(IVDePHId);
2928	IV->eraseFromParent();
2929
2930	ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
2931	ValShiftedIsZero->eraseFromParent();
2932
2933	// Other passes will take care of actually deleting the loop if possible.
2934
2935	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-idiom")) { dbgs() << "loop-idiom" " shift-until-zero idiom optimized!\n" ; } } while (false);
2936
2937	++NumShiftUntilZero;
2938	return MadeChange;
2939	}