Bug Summary

File:lib/CodeGen/CodeGenPrepare.cpp
Warning:line 3037, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374877/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374877=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-233810-7101-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Transforms/Utils/Local.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/Analysis.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/SelectionDAGNodes.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/TargetPassConfig.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CallSite.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/GlobalVariable.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
61#include "llvm/IR/Instructions.h"
62#include "llvm/IR/IntrinsicInst.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/MDBuilder.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/PatternMatch.h"
69#include "llvm/IR/Statepoint.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
75#include "llvm/IR/ValueMap.h"
76#include "llvm/Pass.h"
77#include "llvm/Support/BlockFrequency.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Compiler.h"
82#include "llvm/Support/Debug.h"
83#include "llvm/Support/ErrorHandling.h"
84#include "llvm/Support/MachineValueType.h"
85#include "llvm/Support/MathExtras.h"
86#include "llvm/Support/raw_ostream.h"
87#include "llvm/Target/TargetMachine.h"
88#include "llvm/Target/TargetOptions.h"
89#include "llvm/Transforms/Utils/BasicBlockUtils.h"
90#include "llvm/Transforms/Utils/BypassSlowDivision.h"
91#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <limits>
97#include <memory>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102using namespace llvm::PatternMatch;
103
104#define DEBUG_TYPE"codegenprepare" "codegenprepare"
105
106STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated"}
;
107STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated"}
;
108STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts"}
;
109STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
110 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
;
111STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
112 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
;
113STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
114 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
;
115STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
116 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
117 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
;
118STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
119 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
120 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
;
121STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads"}
;
122STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized"}
;
123STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
124 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
;
125STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized"}
;
126STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated"}
;
127STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved"}
;
128STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
}
;
129STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
}
;
130
131static cl::opt<bool> DisableBranchOpts(
132 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
133 cl::desc("Disable branch optimizations in CodeGenPrepare"));
134
135static cl::opt<bool>
136 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
137 cl::desc("Disable GC optimizations in CodeGenPrepare"));
138
139static cl::opt<bool> DisableSelectToBranch(
140 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
141 cl::desc("Disable select to branch conversion."));
142
143static cl::opt<bool> AddrSinkUsingGEPs(
144 "addr-sink-using-gep", cl::Hidden, cl::init(true),
145 cl::desc("Address sinking in CGP using GEPs."));
146
147static cl::opt<bool> EnableAndCmpSinking(
148 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
149 cl::desc("Enable sinkinig and/cmp into branches."));
150
151static cl::opt<bool> DisableStoreExtract(
152 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
153 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
154
155static cl::opt<bool> StressStoreExtract(
156 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
157 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
158
159static cl::opt<bool> DisableExtLdPromotion(
160 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
161 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
162 "CodeGenPrepare"));
163
164static cl::opt<bool> StressExtLdPromotion(
165 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
166 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
167 "optimization in CodeGenPrepare"));
168
169static cl::opt<bool> DisablePreheaderProtect(
170 "disable-preheader-prot", cl::Hidden, cl::init(false),
171 cl::desc("Disable protection against removing loop preheaders"));
172
173static cl::opt<bool> ProfileGuidedSectionPrefix(
174 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
175 cl::desc("Use profile info to add section prefix for hot/cold functions"));
176
177static cl::opt<unsigned> FreqRatioToSkipMerge(
178 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
179 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
180 "(frequency of destination block) is greater than this ratio"));
181
182static cl::opt<bool> ForceSplitStore(
183 "force-split-store", cl::Hidden, cl::init(false),
184 cl::desc("Force store splitting no matter what the target query says."));
185
186static cl::opt<bool>
187EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
188 cl::desc("Enable merging of redundant sexts when one is dominating"
189 " the other."), cl::init(true));
190
191static cl::opt<bool> DisableComplexAddrModes(
192 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
193 cl::desc("Disables combining addressing modes with different parts "
194 "in optimizeMemoryInst."));
195
196static cl::opt<bool>
197AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
198 cl::desc("Allow creation of Phis in Address sinking."));
199
200static cl::opt<bool>
201AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
202 cl::desc("Allow creation of selects in Address sinking."));
203
204static cl::opt<bool> AddrSinkCombineBaseReg(
205 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
206 cl::desc("Allow combining of BaseReg field in Address sinking."));
207
208static cl::opt<bool> AddrSinkCombineBaseGV(
209 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
210 cl::desc("Allow combining of BaseGV field in Address sinking."));
211
212static cl::opt<bool> AddrSinkCombineBaseOffs(
213 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
214 cl::desc("Allow combining of BaseOffs field in Address sinking."));
215
216static cl::opt<bool> AddrSinkCombineScaledReg(
217 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
218 cl::desc("Allow combining of ScaledReg field in Address sinking."));
219
220static cl::opt<bool>
221 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
222 cl::init(true),
223 cl::desc("Enable splitting large offset of GEP."));
224
225namespace {
226
227enum ExtType {
228 ZeroExtension, // Zero extension has been seen.
229 SignExtension, // Sign extension has been seen.
230 BothExtension // This extension type is used if we saw sext after
231 // ZeroExtension had been set, or if we saw zext after
232 // SignExtension had been set. It makes the type
233 // information of a promoted instruction invalid.
234};
235
236using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
237using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
238using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
239using SExts = SmallVector<Instruction *, 16>;
240using ValueToSExts = DenseMap<Value *, SExts>;
241
242class TypePromotionTransaction;
243
244 class CodeGenPrepare : public FunctionPass {
245 const TargetMachine *TM = nullptr;
246 const TargetSubtargetInfo *SubtargetInfo;
247 const TargetLowering *TLI = nullptr;
248 const TargetRegisterInfo *TRI;
249 const TargetTransformInfo *TTI = nullptr;
250 const TargetLibraryInfo *TLInfo;
251 const LoopInfo *LI;
252 std::unique_ptr<BlockFrequencyInfo> BFI;
253 std::unique_ptr<BranchProbabilityInfo> BPI;
254
255 /// As we scan instructions optimizing them, this is the next instruction
256 /// to optimize. Transforms that can invalidate this should update it.
257 BasicBlock::iterator CurInstIterator;
258
259 /// Keeps track of non-local addresses that have been sunk into a block.
260 /// This allows us to avoid inserting duplicate code for blocks with
261 /// multiple load/stores of the same address. The usage of WeakTrackingVH
262 /// enables SunkAddrs to be treated as a cache whose entries can be
263 /// invalidated if a sunken address computation has been erased.
264 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
265
266 /// Keeps track of all instructions inserted for the current function.
267 SetOfInstrs InsertedInsts;
268
269 /// Keeps track of the type of the related instruction before their
270 /// promotion for the current function.
271 InstrToOrigTy PromotedInsts;
272
273 /// Keep track of instructions removed during promotion.
274 SetOfInstrs RemovedInsts;
275
276 /// Keep track of sext chains based on their initial value.
277 DenseMap<Value *, Instruction *> SeenChainsForSExt;
278
279 /// Keep track of GEPs accessing the same data structures such as structs or
280 /// arrays that are candidates to be split later because of their large
281 /// size.
282 MapVector<
283 AssertingVH<Value>,
284 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
285 LargeOffsetGEPMap;
286
287 /// Keep track of new GEP base after splitting the GEPs having large offset.
288 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
289
290 /// Map serial numbers to Large offset GEPs.
291 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
292
293 /// Keep track of SExt promoted.
294 ValueToSExts ValToSExtendedUses;
295
296 /// True if optimizing for size.
297 bool OptSize;
298
299 /// DataLayout for the Function being processed.
300 const DataLayout *DL = nullptr;
301
302 /// Building the dominator tree can be expensive, so we only build it
303 /// lazily and update it when required.
304 std::unique_ptr<DominatorTree> DT;
305
306 public:
307 static char ID; // Pass identification, replacement for typeid
308
309 CodeGenPrepare() : FunctionPass(ID) {
310 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
311 }
312
313 bool runOnFunction(Function &F) override;
314
315 StringRef getPassName() const override { return "CodeGen Prepare"; }
316
317 void getAnalysisUsage(AnalysisUsage &AU) const override {
318 // FIXME: When we can selectively preserve passes, preserve the domtree.
319 AU.addRequired<ProfileSummaryInfoWrapperPass>();
320 AU.addRequired<TargetLibraryInfoWrapperPass>();
321 AU.addRequired<TargetTransformInfoWrapperPass>();
322 AU.addRequired<LoopInfoWrapperPass>();
323 }
324
325 private:
326 template <typename F>
327 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
328 // Substituting can cause recursive simplifications, which can invalidate
329 // our iterator. Use a WeakTrackingVH to hold onto it in case this
330 // happens.
331 Value *CurValue = &*CurInstIterator;
332 WeakTrackingVH IterHandle(CurValue);
333
334 f();
335
336 // If the iterator instruction was recursively deleted, start over at the
337 // start of the block.
338 if (IterHandle != CurValue) {
339 CurInstIterator = BB->begin();
340 SunkAddrs.clear();
341 }
342 }
343
344 // Get the DominatorTree, building if necessary.
345 DominatorTree &getDT(Function &F) {
346 if (!DT)
347 DT = std::make_unique<DominatorTree>(F);
348 return *DT;
349 }
350
351 bool eliminateFallThrough(Function &F);
352 bool eliminateMostlyEmptyBlocks(Function &F);
353 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
354 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
355 void eliminateMostlyEmptyBlock(BasicBlock *BB);
356 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
357 bool isPreheader);
358 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
359 bool optimizeInst(Instruction *I, bool &ModifiedDT);
360 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
361 Type *AccessTy, unsigned AddrSpace);
362 bool optimizeInlineAsmInst(CallInst *CS);
363 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
364 bool optimizeExt(Instruction *&I);
365 bool optimizeExtUses(Instruction *I);
366 bool optimizeLoadExt(LoadInst *Load);
367 bool optimizeShiftInst(BinaryOperator *BO);
368 bool optimizeSelectInst(SelectInst *SI);
369 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
370 bool optimizeSwitchInst(SwitchInst *SI);
371 bool optimizeExtractElementInst(Instruction *Inst);
372 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
373 bool placeDbgValues(Function &F);
374 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
375 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
376 bool tryToPromoteExts(TypePromotionTransaction &TPT,
377 const SmallVectorImpl<Instruction *> &Exts,
378 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
379 unsigned CreatedInstsCost = 0);
380 bool mergeSExts(Function &F);
381 bool splitLargeGEPOffsets();
382 bool performAddressTypePromotion(
383 Instruction *&Inst,
384 bool AllowPromotionWithoutCommonHeader,
385 bool HasPromoted, TypePromotionTransaction &TPT,
386 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
387 bool splitBranchCondition(Function &F, bool &ModifiedDT);
388 bool simplifyOffsetableRelocate(Instruction &I);
389
390 bool tryToSinkFreeOperands(Instruction *I);
391 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
392 Intrinsic::ID IID);
393 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
394 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
395 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
396 };
397
398} // end anonymous namespace
399
400char CodeGenPrepare::ID = 0;
401
402INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
403 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
404INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
405INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
406 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
407
408FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
409
410bool CodeGenPrepare::runOnFunction(Function &F) {
411 if (skipFunction(F))
412 return false;
413
414 DL = &F.getParent()->getDataLayout();
415
416 bool EverMadeChange = false;
417 // Clear per function information.
418 InsertedInsts.clear();
419 PromotedInsts.clear();
420
421 if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
422 TM = &TPC->getTM<TargetMachine>();
423 SubtargetInfo = TM->getSubtargetImpl(F);
424 TLI = SubtargetInfo->getTargetLowering();
425 TRI = SubtargetInfo->getRegisterInfo();
426 }
427 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
428 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
429 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
430 BPI.reset(new BranchProbabilityInfo(F, *LI));
431 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
432 OptSize = F.hasOptSize();
433
434 ProfileSummaryInfo *PSI =
435 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
436 if (ProfileGuidedSectionPrefix) {
437 if (PSI->isFunctionHotInCallGraph(&F, *BFI))
438 F.setSectionPrefix(".hot");
439 else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
440 F.setSectionPrefix(".unlikely");
441 }
442
443 /// This optimization identifies DIV instructions that can be
444 /// profitably bypassed and carried out with a shorter, faster divide.
445 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
446 TLI->isSlowDivBypassed()) {
447 const DenseMap<unsigned int, unsigned int> &BypassWidths =
448 TLI->getBypassSlowDivWidths();
449 BasicBlock* BB = &*F.begin();
450 while (BB != nullptr) {
451 // bypassSlowDivision may create new BBs, but we don't want to reapply the
452 // optimization to those blocks.
453 BasicBlock* Next = BB->getNextNode();
454 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
455 BB = Next;
456 }
457 }
458
459 // Eliminate blocks that contain only PHI nodes and an
460 // unconditional branch.
461 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
462
463 bool ModifiedDT = false;
464 if (!DisableBranchOpts)
465 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
466
467 // Split some critical edges where one of the sources is an indirect branch,
468 // to help generate sane code for PHIs involving such edges.
469 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
470
471 bool MadeChange = true;
472 while (MadeChange) {
473 MadeChange = false;
474 DT.reset();
475 for (Function::iterator I = F.begin(); I != F.end(); ) {
476 BasicBlock *BB = &*I++;
477 bool ModifiedDTOnIteration = false;
478 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
479
480 // Restart BB iteration if the dominator tree of the Function was changed
481 if (ModifiedDTOnIteration)
482 break;
483 }
484 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
485 MadeChange |= mergeSExts(F);
486 if (!LargeOffsetGEPMap.empty())
487 MadeChange |= splitLargeGEPOffsets();
488
489 // Really free removed instructions during promotion.
490 for (Instruction *I : RemovedInsts)
491 I->deleteValue();
492
493 EverMadeChange |= MadeChange;
494 SeenChainsForSExt.clear();
495 ValToSExtendedUses.clear();
496 RemovedInsts.clear();
497 LargeOffsetGEPMap.clear();
498 LargeOffsetGEPID.clear();
499 }
500
501 SunkAddrs.clear();
502
503 if (!DisableBranchOpts) {
504 MadeChange = false;
505 // Use a set vector to get deterministic iteration order. The order the
506 // blocks are removed may affect whether or not PHI nodes in successors
507 // are removed.
508 SmallSetVector<BasicBlock*, 8> WorkList;
509 for (BasicBlock &BB : F) {
510 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
511 MadeChange |= ConstantFoldTerminator(&BB, true);
512 if (!MadeChange) continue;
513
514 for (SmallVectorImpl<BasicBlock*>::iterator
515 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
516 if (pred_begin(*II) == pred_end(*II))
517 WorkList.insert(*II);
518 }
519
520 // Delete the dead blocks and any of their dead successors.
521 MadeChange |= !WorkList.empty();
522 while (!WorkList.empty()) {
523 BasicBlock *BB = WorkList.pop_back_val();
524 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
525
526 DeleteDeadBlock(BB);
527
528 for (SmallVectorImpl<BasicBlock*>::iterator
529 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
530 if (pred_begin(*II) == pred_end(*II))
531 WorkList.insert(*II);
532 }
533
534 // Merge pairs of basic blocks with unconditional branches, connected by
535 // a single edge.
536 if (EverMadeChange || MadeChange)
537 MadeChange |= eliminateFallThrough(F);
538
539 EverMadeChange |= MadeChange;
540 }
541
542 if (!DisableGCOpts) {
543 SmallVector<Instruction *, 2> Statepoints;
544 for (BasicBlock &BB : F)
545 for (Instruction &I : BB)
546 if (isStatepoint(I))
547 Statepoints.push_back(&I);
548 for (auto &I : Statepoints)
549 EverMadeChange |= simplifyOffsetableRelocate(*I);
550 }
551
552 // Do this last to clean up use-before-def scenarios introduced by other
553 // preparatory transforms.
554 EverMadeChange |= placeDbgValues(F);
555
556 return EverMadeChange;
557}
558
559/// Merge basic blocks which are connected by a single edge, where one of the
560/// basic blocks has a single successor pointing to the other basic block,
561/// which has a single predecessor.
562bool CodeGenPrepare::eliminateFallThrough(Function &F) {
563 bool Changed = false;
564 // Scan all of the blocks in the function, except for the entry block.
565 // Use a temporary array to avoid iterator being invalidated when
566 // deleting blocks.
567 SmallVector<WeakTrackingVH, 16> Blocks;
568 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
569 Blocks.push_back(&Block);
570
571 for (auto &Block : Blocks) {
572 auto *BB = cast_or_null<BasicBlock>(Block);
573 if (!BB)
574 continue;
575 // If the destination block has a single pred, then this is a trivial
576 // edge, just collapse it.
577 BasicBlock *SinglePred = BB->getSinglePredecessor();
578
579 // Don't merge if BB's address is taken.
580 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
581
582 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
583 if (Term && !Term->isConditional()) {
584 Changed = true;
585 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
586
587 // Merge BB into SinglePred and delete it.
588 MergeBlockIntoPredecessor(BB);
589 }
590 }
591 return Changed;
592}
593
594/// Find a destination block from BB if BB is mergeable empty block.
595BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
596 // If this block doesn't end with an uncond branch, ignore it.
597 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
598 if (!BI || !BI->isUnconditional())
599 return nullptr;
600
601 // If the instruction before the branch (skipping debug info) isn't a phi
602 // node, then other stuff is happening here.
603 BasicBlock::iterator BBI = BI->getIterator();
604 if (BBI != BB->begin()) {
605 --BBI;
606 while (isa<DbgInfoIntrinsic>(BBI)) {
607 if (BBI == BB->begin())
608 break;
609 --BBI;
610 }
611 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
612 return nullptr;
613 }
614
615 // Do not break infinite loops.
616 BasicBlock *DestBB = BI->getSuccessor(0);
617 if (DestBB == BB)
618 return nullptr;
619
620 if (!canMergeBlocks(BB, DestBB))
621 DestBB = nullptr;
622
623 return DestBB;
624}
625
626/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
627/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
628/// edges in ways that are non-optimal for isel. Start by eliminating these
629/// blocks so we can split them the way we want them.
630bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
631 SmallPtrSet<BasicBlock *, 16> Preheaders;
632 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
633 while (!LoopList.empty()) {
634 Loop *L = LoopList.pop_back_val();
635 LoopList.insert(LoopList.end(), L->begin(), L->end());
636 if (BasicBlock *Preheader = L->getLoopPreheader())
637 Preheaders.insert(Preheader);
638 }
639
640 bool MadeChange = false;
641 // Copy blocks into a temporary array to avoid iterator invalidation issues
642 // as we remove them.
643 // Note that this intentionally skips the entry block.
644 SmallVector<WeakTrackingVH, 16> Blocks;
645 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
646 Blocks.push_back(&Block);
647
648 for (auto &Block : Blocks) {
649 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
650 if (!BB)
651 continue;
652 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
653 if (!DestBB ||
654 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
655 continue;
656
657 eliminateMostlyEmptyBlock(BB);
658 MadeChange = true;
659 }
660 return MadeChange;
661}
662
663bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
664 BasicBlock *DestBB,
665 bool isPreheader) {
666 // Do not delete loop preheaders if doing so would create a critical edge.
667 // Loop preheaders can be good locations to spill registers. If the
668 // preheader is deleted and we create a critical edge, registers may be
669 // spilled in the loop body instead.
670 if (!DisablePreheaderProtect && isPreheader &&
671 !(BB->getSinglePredecessor() &&
672 BB->getSinglePredecessor()->getSingleSuccessor()))
673 return false;
674
675 // Skip merging if the block's successor is also a successor to any callbr
676 // that leads to this block.
677 // FIXME: Is this really needed? Is this a correctness issue?
678 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
679 if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
680 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
681 if (DestBB == CBI->getSuccessor(i))
682 return false;
683 }
684
685 // Try to skip merging if the unique predecessor of BB is terminated by a
686 // switch or indirect branch instruction, and BB is used as an incoming block
687 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
688 // add COPY instructions in the predecessor of BB instead of BB (if it is not
689 // merged). Note that the critical edge created by merging such blocks wont be
690 // split in MachineSink because the jump table is not analyzable. By keeping
691 // such empty block (BB), ISel will place COPY instructions in BB, not in the
692 // predecessor of BB.
693 BasicBlock *Pred = BB->getUniquePredecessor();
694 if (!Pred ||
695 !(isa<SwitchInst>(Pred->getTerminator()) ||
696 isa<IndirectBrInst>(Pred->getTerminator())))
697 return true;
698
699 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
700 return true;
701
702 // We use a simple cost heuristic which determine skipping merging is
703 // profitable if the cost of skipping merging is less than the cost of
704 // merging : Cost(skipping merging) < Cost(merging BB), where the
705 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
706 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
707 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
708 // Freq(Pred) / Freq(BB) > 2.
709 // Note that if there are multiple empty blocks sharing the same incoming
710 // value for the PHIs in the DestBB, we consider them together. In such
711 // case, Cost(merging BB) will be the sum of their frequencies.
712
713 if (!isa<PHINode>(DestBB->begin()))
714 return true;
715
716 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
717
718 // Find all other incoming blocks from which incoming values of all PHIs in
719 // DestBB are the same as the ones from BB.
720 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
721 ++PI) {
722 BasicBlock *DestBBPred = *PI;
723 if (DestBBPred == BB)
724 continue;
725
726 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
727 return DestPN.getIncomingValueForBlock(BB) ==
728 DestPN.getIncomingValueForBlock(DestBBPred);
729 }))
730 SameIncomingValueBBs.insert(DestBBPred);
731 }
732
733 // See if all BB's incoming values are same as the value from Pred. In this
734 // case, no reason to skip merging because COPYs are expected to be place in
735 // Pred already.
736 if (SameIncomingValueBBs.count(Pred))
737 return true;
738
739 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
740 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
741
742 for (auto SameValueBB : SameIncomingValueBBs)
743 if (SameValueBB->getUniquePredecessor() == Pred &&
744 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
745 BBFreq += BFI->getBlockFreq(SameValueBB);
746
747 return PredFreq.getFrequency() <=
748 BBFreq.getFrequency() * FreqRatioToSkipMerge;
749}
750
751/// Return true if we can merge BB into DestBB if there is a single
752/// unconditional branch between them, and BB contains no other non-phi
753/// instructions.
754bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
755 const BasicBlock *DestBB) const {
756 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
757 // the successor. If there are more complex condition (e.g. preheaders),
758 // don't mess around with them.
759 for (const PHINode &PN : BB->phis()) {
760 for (const User *U : PN.users()) {
761 const Instruction *UI = cast<Instruction>(U);
762 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
763 return false;
764 // If User is inside DestBB block and it is a PHINode then check
765 // incoming value. If incoming value is not from BB then this is
766 // a complex condition (e.g. preheaders) we want to avoid here.
767 if (UI->getParent() == DestBB) {
768 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
769 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
770 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
771 if (Insn && Insn->getParent() == BB &&
772 Insn->getParent() != UPN->getIncomingBlock(I))
773 return false;
774 }
775 }
776 }
777 }
778
779 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
780 // and DestBB may have conflicting incoming values for the block. If so, we
781 // can't merge the block.
782 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
783 if (!DestBBPN) return true; // no conflict.
784
785 // Collect the preds of BB.
786 SmallPtrSet<const BasicBlock*, 16> BBPreds;
787 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
788 // It is faster to get preds from a PHI than with pred_iterator.
789 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
790 BBPreds.insert(BBPN->getIncomingBlock(i));
791 } else {
792 BBPreds.insert(pred_begin(BB), pred_end(BB));
793 }
794
795 // Walk the preds of DestBB.
796 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
797 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
798 if (BBPreds.count(Pred)) { // Common predecessor?
799 for (const PHINode &PN : DestBB->phis()) {
800 const Value *V1 = PN.getIncomingValueForBlock(Pred);
801 const Value *V2 = PN.getIncomingValueForBlock(BB);
802
803 // If V2 is a phi node in BB, look up what the mapped value will be.
804 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
805 if (V2PN->getParent() == BB)
806 V2 = V2PN->getIncomingValueForBlock(Pred);
807
808 // If there is a conflict, bail out.
809 if (V1 != V2) return false;
810 }
811 }
812 }
813
814 return true;
815}
816
817/// Eliminate a basic block that has only phi's and an unconditional branch in
818/// it.
819void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
820 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
821 BasicBlock *DestBB = BI->getSuccessor(0);
822
823 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
824 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
825
826 // If the destination block has a single pred, then this is a trivial edge,
827 // just collapse it.
828 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
829 if (SinglePred != DestBB) {
830 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 831, __PRETTY_FUNCTION__))
831 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 831, __PRETTY_FUNCTION__))
;
832 // Merge DestBB into SinglePred/BB and delete it.
833 MergeBlockIntoPredecessor(DestBB);
834 // Note: BB(=SinglePred) will not be deleted on this path.
835 // DestBB(=its single successor) is the one that was deleted.
836 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
837 return;
838 }
839 }
840
841 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
842 // to handle the new incoming edges it is about to have.
843 for (PHINode &PN : DestBB->phis()) {
844 // Remove the incoming value for BB, and remember it.
845 Value *InVal = PN.removeIncomingValue(BB, false);
846
847 // Two options: either the InVal is a phi node defined in BB or it is some
848 // value that dominates BB.
849 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
850 if (InValPhi && InValPhi->getParent() == BB) {
851 // Add all of the input values of the input PHI as inputs of this phi.
852 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
853 PN.addIncoming(InValPhi->getIncomingValue(i),
854 InValPhi->getIncomingBlock(i));
855 } else {
856 // Otherwise, add one instance of the dominating value for each edge that
857 // we will be adding.
858 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
859 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
860 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
861 } else {
862 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
863 PN.addIncoming(InVal, *PI);
864 }
865 }
866 }
867
868 // The PHIs are now updated, change everything that refers to BB to use
869 // DestBB and remove BB.
870 BB->replaceAllUsesWith(DestBB);
871 BB->eraseFromParent();
872 ++NumBlocksElim;
873
874 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
875}
876
877// Computes a map of base pointer relocation instructions to corresponding
878// derived pointer relocation instructions given a vector of all relocate calls
879static void computeBaseDerivedRelocateMap(
880 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
881 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
882 &RelocateInstMap) {
883 // Collect information in two maps: one primarily for locating the base object
884 // while filling the second map; the second map is the final structure holding
885 // a mapping between Base and corresponding Derived relocate calls
886 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
887 for (auto *ThisRelocate : AllRelocateCalls) {
888 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
889 ThisRelocate->getDerivedPtrIndex());
890 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
891 }
892 for (auto &Item : RelocateIdxMap) {
893 std::pair<unsigned, unsigned> Key = Item.first;
894 if (Key.first == Key.second)
895 // Base relocation: nothing to insert
896 continue;
897
898 GCRelocateInst *I = Item.second;
899 auto BaseKey = std::make_pair(Key.first, Key.first);
900
901 // We're iterating over RelocateIdxMap so we cannot modify it.
902 auto MaybeBase = RelocateIdxMap.find(BaseKey);
903 if (MaybeBase == RelocateIdxMap.end())
904 // TODO: We might want to insert a new base object relocate and gep off
905 // that, if there are enough derived object relocates.
906 continue;
907
908 RelocateInstMap[MaybeBase->second].push_back(I);
909 }
910}
911
912// Accepts a GEP and extracts the operands into a vector provided they're all
913// small integer constants
914static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
915 SmallVectorImpl<Value *> &OffsetV) {
916 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
917 // Only accept small constant integer operands
918 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
919 if (!Op || Op->getZExtValue() > 20)
920 return false;
921 }
922
923 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
924 OffsetV.push_back(GEP->getOperand(i));
925 return true;
926}
927
928// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
929// replace, computes a replacement, and affects it.
930static bool
931simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
932 const SmallVectorImpl<GCRelocateInst *> &Targets) {
933 bool MadeChange = false;
934 // We must ensure the relocation of derived pointer is defined after
935 // relocation of base pointer. If we find a relocation corresponding to base
936 // defined earlier than relocation of base then we move relocation of base
937 // right before found relocation. We consider only relocation in the same
938 // basic block as relocation of base. Relocations from other basic block will
939 // be skipped by optimization and we do not care about them.
940 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
941 &*R != RelocatedBase; ++R)
942 if (auto RI = dyn_cast<GCRelocateInst>(R))
943 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
944 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
945 RelocatedBase->moveBefore(RI);
946 break;
947 }
948
949 for (GCRelocateInst *ToReplace : Targets) {
950 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 951, __PRETTY_FUNCTION__))
951 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 951, __PRETTY_FUNCTION__))
;
952 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
953 // A duplicate relocate call. TODO: coalesce duplicates.
954 continue;
955 }
956
957 if (RelocatedBase->getParent() != ToReplace->getParent()) {
958 // Base and derived relocates are in different basic blocks.
959 // In this case transform is only valid when base dominates derived
960 // relocate. However it would be too expensive to check dominance
961 // for each such relocate, so we skip the whole transformation.
962 continue;
963 }
964
965 Value *Base = ToReplace->getBasePtr();
966 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
967 if (!Derived || Derived->getPointerOperand() != Base)
968 continue;
969
970 SmallVector<Value *, 2> OffsetV;
971 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
972 continue;
973
974 // Create a Builder and replace the target callsite with a gep
975 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 976, __PRETTY_FUNCTION__))
976 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 976, __PRETTY_FUNCTION__))
;
977
978 // Insert after RelocatedBase
979 IRBuilder<> Builder(RelocatedBase->getNextNode());
980 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
981
982 // If gc_relocate does not match the actual type, cast it to the right type.
983 // In theory, there must be a bitcast after gc_relocate if the type does not
984 // match, and we should reuse it to get the derived pointer. But it could be
985 // cases like this:
986 // bb1:
987 // ...
988 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
989 // br label %merge
990 //
991 // bb2:
992 // ...
993 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
994 // br label %merge
995 //
996 // merge:
997 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
998 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
999 //
1000 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1001 // no matter there is already one or not. In this way, we can handle all cases, and
1002 // the extra bitcast should be optimized away in later passes.
1003 Value *ActualRelocatedBase = RelocatedBase;
1004 if (RelocatedBase->getType() != Base->getType()) {
1005 ActualRelocatedBase =
1006 Builder.CreateBitCast(RelocatedBase, Base->getType());
1007 }
1008 Value *Replacement = Builder.CreateGEP(
1009 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1010 Replacement->takeName(ToReplace);
1011 // If the newly generated derived pointer's type does not match the original derived
1012 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1013 Value *ActualReplacement = Replacement;
1014 if (Replacement->getType() != ToReplace->getType()) {
1015 ActualReplacement =
1016 Builder.CreateBitCast(Replacement, ToReplace->getType());
1017 }
1018 ToReplace->replaceAllUsesWith(ActualReplacement);
1019 ToReplace->eraseFromParent();
1020
1021 MadeChange = true;
1022 }
1023 return MadeChange;
1024}
1025
1026// Turns this:
1027//
1028// %base = ...
1029// %ptr = gep %base + 15
1030// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1031// %base' = relocate(%tok, i32 4, i32 4)
1032// %ptr' = relocate(%tok, i32 4, i32 5)
1033// %val = load %ptr'
1034//
1035// into this:
1036//
1037// %base = ...
1038// %ptr = gep %base + 15
1039// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1040// %base' = gc.relocate(%tok, i32 4, i32 4)
1041// %ptr' = gep %base' + 15
1042// %val = load %ptr'
1043bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1044 bool MadeChange = false;
1045 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1046
1047 for (auto *U : I.users())
1048 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1049 // Collect all the relocate calls associated with a statepoint
1050 AllRelocateCalls.push_back(Relocate);
1051
1052 // We need atleast one base pointer relocation + one derived pointer
1053 // relocation to mangle
1054 if (AllRelocateCalls.size() < 2)
1055 return false;
1056
1057 // RelocateInstMap is a mapping from the base relocate instruction to the
1058 // corresponding derived relocate instructions
1059 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1060 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1061 if (RelocateInstMap.empty())
1062 return false;
1063
1064 for (auto &Item : RelocateInstMap)
1065 // Item.first is the RelocatedBase to offset against
1066 // Item.second is the vector of Targets to replace
1067 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1068 return MadeChange;
1069}
1070
1071/// Sink the specified cast instruction into its user blocks.
1072static bool SinkCast(CastInst *CI) {
1073 BasicBlock *DefBB = CI->getParent();
1074
1075 /// InsertedCasts - Only insert a cast in each block once.
1076 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1077
1078 bool MadeChange = false;
1079 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1080 UI != E; ) {
1081 Use &TheUse = UI.getUse();
1082 Instruction *User = cast<Instruction>(*UI);
1083
1084 // Figure out which BB this cast is used in. For PHI's this is the
1085 // appropriate predecessor block.
1086 BasicBlock *UserBB = User->getParent();
1087 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1088 UserBB = PN->getIncomingBlock(TheUse);
1089 }
1090
1091 // Preincrement use iterator so we don't invalidate it.
1092 ++UI;
1093
1094 // The first insertion point of a block containing an EH pad is after the
1095 // pad. If the pad is the user, we cannot sink the cast past the pad.
1096 if (User->isEHPad())
1097 continue;
1098
1099 // If the block selected to receive the cast is an EH pad that does not
1100 // allow non-PHI instructions before the terminator, we can't sink the
1101 // cast.
1102 if (UserBB->getTerminator()->isEHPad())
1103 continue;
1104
1105 // If this user is in the same block as the cast, don't change the cast.
1106 if (UserBB == DefBB) continue;
1107
1108 // If we have already inserted a cast into this block, use it.
1109 CastInst *&InsertedCast = InsertedCasts[UserBB];
1110
1111 if (!InsertedCast) {
1112 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1113 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1113, __PRETTY_FUNCTION__))
;
1114 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1115 CI->getType(), "", &*InsertPt);
1116 InsertedCast->setDebugLoc(CI->getDebugLoc());
1117 }
1118
1119 // Replace a use of the cast with a use of the new cast.
1120 TheUse = InsertedCast;
1121 MadeChange = true;
1122 ++NumCastUses;
1123 }
1124
1125 // If we removed all uses, nuke the cast.
1126 if (CI->use_empty()) {
1127 salvageDebugInfo(*CI);
1128 CI->eraseFromParent();
1129 MadeChange = true;
1130 }
1131
1132 return MadeChange;
1133}
1134
1135/// If the specified cast instruction is a noop copy (e.g. it's casting from
1136/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1137/// reduce the number of virtual registers that must be created and coalesced.
1138///
1139/// Return true if any changes are made.
1140static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1141 const DataLayout &DL) {
1142 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1143 // than sinking only nop casts, but is helpful on some platforms.
1144 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1145 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1146 ASC->getDestAddressSpace()))
1147 return false;
1148 }
1149
1150 // If this is a noop copy,
1151 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1152 EVT DstVT = TLI.getValueType(DL, CI->getType());
1153
1154 // This is an fp<->int conversion?
1155 if (SrcVT.isInteger() != DstVT.isInteger())
1156 return false;
1157
1158 // If this is an extension, it will be a zero or sign extension, which
1159 // isn't a noop.
1160 if (SrcVT.bitsLT(DstVT)) return false;
1161
1162 // If these values will be promoted, find out what they will be promoted
1163 // to. This helps us consider truncates on PPC as noop copies when they
1164 // are.
1165 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1166 TargetLowering::TypePromoteInteger)
1167 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1168 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1169 TargetLowering::TypePromoteInteger)
1170 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1171
1172 // If, after promotion, these are the same types, this is a noop copy.
1173 if (SrcVT != DstVT)
1174 return false;
1175
1176 return SinkCast(CI);
1177}
1178
1179bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1180 CmpInst *Cmp,
1181 Intrinsic::ID IID) {
1182 if (BO->getParent() != Cmp->getParent()) {
1183 // We used to use a dominator tree here to allow multi-block optimization.
1184 // But that was problematic because:
1185 // 1. It could cause a perf regression by hoisting the math op into the
1186 // critical path.
1187 // 2. It could cause a perf regression by creating a value that was live
1188 // across multiple blocks and increasing register pressure.
1189 // 3. Use of a dominator tree could cause large compile-time regression.
1190 // This is because we recompute the DT on every change in the main CGP
1191 // run-loop. The recomputing is probably unnecessary in many cases, so if
1192 // that was fixed, using a DT here would be ok.
1193 return false;
1194 }
1195
1196 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1197 Value *Arg0 = BO->getOperand(0);
1198 Value *Arg1 = BO->getOperand(1);
1199 if (BO->getOpcode() == Instruction::Add &&
1200 IID == Intrinsic::usub_with_overflow) {
1201 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1201, __PRETTY_FUNCTION__))
;
1202 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1203 }
1204
1205 // Insert at the first instruction of the pair.
1206 Instruction *InsertPt = nullptr;
1207 for (Instruction &Iter : *Cmp->getParent()) {
1208 if (&Iter == BO || &Iter == Cmp) {
1209 InsertPt = &Iter;
1210 break;
1211 }
1212 }
1213 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1213, __PRETTY_FUNCTION__))
;
1214
1215 IRBuilder<> Builder(InsertPt);
1216 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1217 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1218 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1219 BO->replaceAllUsesWith(Math);
1220 Cmp->replaceAllUsesWith(OV);
1221 BO->eraseFromParent();
1222 Cmp->eraseFromParent();
1223 return true;
1224}
1225
1226/// Match special-case patterns that check for unsigned add overflow.
1227static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1228 BinaryOperator *&Add) {
1229 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1230 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1231 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1232
1233 // We are not expecting non-canonical/degenerate code. Just bail out.
1234 if (isa<Constant>(A))
1235 return false;
1236
1237 ICmpInst::Predicate Pred = Cmp->getPredicate();
1238 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1239 B = ConstantInt::get(B->getType(), 1);
1240 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1241 B = ConstantInt::get(B->getType(), -1);
1242 else
1243 return false;
1244
1245 // Check the users of the variable operand of the compare looking for an add
1246 // with the adjusted constant.
1247 for (User *U : A->users()) {
1248 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1249 Add = cast<BinaryOperator>(U);
1250 return true;
1251 }
1252 }
1253 return false;
1254}
1255
1256/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1257/// intrinsic. Return true if any changes were made.
1258bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1259 bool &ModifiedDT) {
1260 Value *A, *B;
1261 BinaryOperator *Add;
1262 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
1263 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1264 return false;
1265
1266 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1267 TLI->getValueType(*DL, Add->getType())))
1268 return false;
1269
1270 // We don't want to move around uses of condition values this late, so we
1271 // check if it is legal to create the call to the intrinsic in the basic
1272 // block containing the icmp.
1273 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1274 return false;
1275
1276 if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
1277 return false;
1278
1279 // Reset callers - do not crash by iterating over a dead instruction.
1280 ModifiedDT = true;
1281 return true;
1282}
1283
1284bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1285 bool &ModifiedDT) {
1286 // We are not expecting non-canonical/degenerate code. Just bail out.
1287 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1288 if (isa<Constant>(A) && isa<Constant>(B))
1289 return false;
1290
1291 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1292 ICmpInst::Predicate Pred = Cmp->getPredicate();
1293 if (Pred == ICmpInst::ICMP_UGT) {
1294 std::swap(A, B);
1295 Pred = ICmpInst::ICMP_ULT;
1296 }
1297 // Convert special-case: (A == 0) is the same as (A u< 1).
1298 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1299 B = ConstantInt::get(B->getType(), 1);
1300 Pred = ICmpInst::ICMP_ULT;
1301 }
1302 // Convert special-case: (A != 0) is the same as (0 u< A).
1303 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1304 std::swap(A, B);
1305 Pred = ICmpInst::ICMP_ULT;
1306 }
1307 if (Pred != ICmpInst::ICMP_ULT)
1308 return false;
1309
1310 // Walk the users of a variable operand of a compare looking for a subtract or
1311 // add with that same operand. Also match the 2nd operand of the compare to
1312 // the add/sub, but that may be a negated constant operand of an add.
1313 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1314 BinaryOperator *Sub = nullptr;
1315 for (User *U : CmpVariableOperand->users()) {
1316 // A - B, A u< B --> usubo(A, B)
1317 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1318 Sub = cast<BinaryOperator>(U);
1319 break;
1320 }
1321
1322 // A + (-C), A u< C (canonicalized form of (sub A, C))
1323 const APInt *CmpC, *AddC;
1324 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1325 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1326 Sub = cast<BinaryOperator>(U);
1327 break;
1328 }
1329 }
1330 if (!Sub)
1331 return false;
1332
1333 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1334 TLI->getValueType(*DL, Sub->getType())))
1335 return false;
1336
1337 if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
1338 return false;
1339
1340 // Reset callers - do not crash by iterating over a dead instruction.
1341 ModifiedDT = true;
1342 return true;
1343}
1344
1345/// Sink the given CmpInst into user blocks to reduce the number of virtual
1346/// registers that must be created and coalesced. This is a clear win except on
1347/// targets with multiple condition code registers (PowerPC), where it might
1348/// lose; some adjustment may be wanted there.
1349///
1350/// Return true if any changes are made.
1351static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1352 if (TLI.hasMultipleConditionRegisters())
1353 return false;
1354
1355 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1356 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1357 return false;
1358
1359 // Only insert a cmp in each block once.
1360 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1361
1362 bool MadeChange = false;
1363 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1364 UI != E; ) {
1365 Use &TheUse = UI.getUse();
1366 Instruction *User = cast<Instruction>(*UI);
1367
1368 // Preincrement use iterator so we don't invalidate it.
1369 ++UI;
1370
1371 // Don't bother for PHI nodes.
1372 if (isa<PHINode>(User))
1373 continue;
1374
1375 // Figure out which BB this cmp is used in.
1376 BasicBlock *UserBB = User->getParent();
1377 BasicBlock *DefBB = Cmp->getParent();
1378
1379 // If this user is in the same block as the cmp, don't change the cmp.
1380 if (UserBB == DefBB) continue;
1381
1382 // If we have already inserted a cmp into this block, use it.
1383 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1384
1385 if (!InsertedCmp) {
1386 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1387 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1387, __PRETTY_FUNCTION__))
;
1388 InsertedCmp =
1389 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1390 Cmp->getOperand(0), Cmp->getOperand(1), "",
1391 &*InsertPt);
1392 // Propagate the debug info.
1393 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1394 }
1395
1396 // Replace a use of the cmp with a use of the new cmp.
1397 TheUse = InsertedCmp;
1398 MadeChange = true;
1399 ++NumCmpUses;
1400 }
1401
1402 // If we removed all uses, nuke the cmp.
1403 if (Cmp->use_empty()) {
1404 Cmp->eraseFromParent();
1405 MadeChange = true;
1406 }
1407
1408 return MadeChange;
1409}
1410
1411bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1412 if (sinkCmpExpression(Cmp, *TLI))
1413 return true;
1414
1415 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1416 return true;
1417
1418 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1419 return true;
1420
1421 return false;
1422}
1423
1424/// Duplicate and sink the given 'and' instruction into user blocks where it is
1425/// used in a compare to allow isel to generate better code for targets where
1426/// this operation can be combined.
1427///
1428/// Return true if any changes are made.
1429static bool sinkAndCmp0Expression(Instruction *AndI,
1430 const TargetLowering &TLI,
1431 SetOfInstrs &InsertedInsts) {
1432 // Double-check that we're not trying to optimize an instruction that was
1433 // already optimized by some other part of this pass.
1434 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1435, __PRETTY_FUNCTION__))
1435 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1435, __PRETTY_FUNCTION__))
;
1436 (void) InsertedInsts;
1437
1438 // Nothing to do for single use in same basic block.
1439 if (AndI->hasOneUse() &&
1440 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1441 return false;
1442
1443 // Try to avoid cases where sinking/duplicating is likely to increase register
1444 // pressure.
1445 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1446 !isa<ConstantInt>(AndI->getOperand(1)) &&
1447 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1448 return false;
1449
1450 for (auto *U : AndI->users()) {
1451 Instruction *User = cast<Instruction>(U);
1452
1453 // Only sink 'and' feeding icmp with 0.
1454 if (!isa<ICmpInst>(User))
1455 return false;
1456
1457 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1458 if (!CmpC || !CmpC->isZero())
1459 return false;
1460 }
1461
1462 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1463 return false;
1464
1465 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1466 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1467
1468 // Push the 'and' into the same block as the icmp 0. There should only be
1469 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1470 // others, so we don't need to keep track of which BBs we insert into.
1471 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1472 UI != E; ) {
1473 Use &TheUse = UI.getUse();
1474 Instruction *User = cast<Instruction>(*UI);
1475
1476 // Preincrement use iterator so we don't invalidate it.
1477 ++UI;
1478
1479 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1480
1481 // Keep the 'and' in the same place if the use is already in the same block.
1482 Instruction *InsertPt =
1483 User->getParent() == AndI->getParent() ? AndI : User;
1484 Instruction *InsertedAnd =
1485 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1486 AndI->getOperand(1), "", InsertPt);
1487 // Propagate the debug info.
1488 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1489
1490 // Replace a use of the 'and' with a use of the new 'and'.
1491 TheUse = InsertedAnd;
1492 ++NumAndUses;
1493 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1494 }
1495
1496 // We removed all uses, nuke the and.
1497 AndI->eraseFromParent();
1498 return true;
1499}
1500
1501/// Check if the candidates could be combined with a shift instruction, which
1502/// includes:
1503/// 1. Truncate instruction
1504/// 2. And instruction and the imm is a mask of the low bits:
1505/// imm & (imm+1) == 0
1506static bool isExtractBitsCandidateUse(Instruction *User) {
1507 if (!isa<TruncInst>(User)) {
1508 if (User->getOpcode() != Instruction::And ||
1509 !isa<ConstantInt>(User->getOperand(1)))
1510 return false;
1511
1512 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1513
1514 if ((Cimm & (Cimm + 1)).getBoolValue())
1515 return false;
1516 }
1517 return true;
1518}
1519
1520/// Sink both shift and truncate instruction to the use of truncate's BB.
1521static bool
1522SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1523 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1524 const TargetLowering &TLI, const DataLayout &DL) {
1525 BasicBlock *UserBB = User->getParent();
1526 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1527 auto *TruncI = cast<TruncInst>(User);
1528 bool MadeChange = false;
1529
1530 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1531 TruncE = TruncI->user_end();
1532 TruncUI != TruncE;) {
1533
1534 Use &TruncTheUse = TruncUI.getUse();
1535 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1536 // Preincrement use iterator so we don't invalidate it.
1537
1538 ++TruncUI;
1539
1540 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1541 if (!ISDOpcode)
1542 continue;
1543
1544 // If the use is actually a legal node, there will not be an
1545 // implicit truncate.
1546 // FIXME: always querying the result type is just an
1547 // approximation; some nodes' legality is determined by the
1548 // operand or other means. There's no good way to find out though.
1549 if (TLI.isOperationLegalOrCustom(
1550 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1551 continue;
1552
1553 // Don't bother for PHI nodes.
1554 if (isa<PHINode>(TruncUser))
1555 continue;
1556
1557 BasicBlock *TruncUserBB = TruncUser->getParent();
1558
1559 if (UserBB == TruncUserBB)
1560 continue;
1561
1562 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1563 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1564
1565 if (!InsertedShift && !InsertedTrunc) {
1566 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1567 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1567, __PRETTY_FUNCTION__))
;
1568 // Sink the shift
1569 if (ShiftI->getOpcode() == Instruction::AShr)
1570 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1571 "", &*InsertPt);
1572 else
1573 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1574 "", &*InsertPt);
1575 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1576
1577 // Sink the trunc
1578 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1579 TruncInsertPt++;
1580 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1580, __PRETTY_FUNCTION__))
;
1581
1582 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1583 TruncI->getType(), "", &*TruncInsertPt);
1584 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1585
1586 MadeChange = true;
1587
1588 TruncTheUse = InsertedTrunc;
1589 }
1590 }
1591 return MadeChange;
1592}
1593
1594/// Sink the shift *right* instruction into user blocks if the uses could
1595/// potentially be combined with this shift instruction and generate BitExtract
1596/// instruction. It will only be applied if the architecture supports BitExtract
1597/// instruction. Here is an example:
1598/// BB1:
1599/// %x.extract.shift = lshr i64 %arg1, 32
1600/// BB2:
1601/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1602/// ==>
1603///
1604/// BB2:
1605/// %x.extract.shift.1 = lshr i64 %arg1, 32
1606/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1607///
1608/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1609/// instruction.
1610/// Return true if any changes are made.
1611static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1612 const TargetLowering &TLI,
1613 const DataLayout &DL) {
1614 BasicBlock *DefBB = ShiftI->getParent();
1615
1616 /// Only insert instructions in each block once.
1617 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1618
1619 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1620
1621 bool MadeChange = false;
1622 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1623 UI != E;) {
1624 Use &TheUse = UI.getUse();
1625 Instruction *User = cast<Instruction>(*UI);
1626 // Preincrement use iterator so we don't invalidate it.
1627 ++UI;
1628
1629 // Don't bother for PHI nodes.
1630 if (isa<PHINode>(User))
1631 continue;
1632
1633 if (!isExtractBitsCandidateUse(User))
1634 continue;
1635
1636 BasicBlock *UserBB = User->getParent();
1637
1638 if (UserBB == DefBB) {
1639 // If the shift and truncate instruction are in the same BB. The use of
1640 // the truncate(TruncUse) may still introduce another truncate if not
1641 // legal. In this case, we would like to sink both shift and truncate
1642 // instruction to the BB of TruncUse.
1643 // for example:
1644 // BB1:
1645 // i64 shift.result = lshr i64 opnd, imm
1646 // trunc.result = trunc shift.result to i16
1647 //
1648 // BB2:
1649 // ----> We will have an implicit truncate here if the architecture does
1650 // not have i16 compare.
1651 // cmp i16 trunc.result, opnd2
1652 //
1653 if (isa<TruncInst>(User) && shiftIsLegal
1654 // If the type of the truncate is legal, no truncate will be
1655 // introduced in other basic blocks.
1656 &&
1657 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1658 MadeChange =
1659 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1660
1661 continue;
1662 }
1663 // If we have already inserted a shift into this block, use it.
1664 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1665
1666 if (!InsertedShift) {
1667 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1668 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1668, __PRETTY_FUNCTION__))
;
1669
1670 if (ShiftI->getOpcode() == Instruction::AShr)
1671 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1672 "", &*InsertPt);
1673 else
1674 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1675 "", &*InsertPt);
1676 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1677
1678 MadeChange = true;
1679 }
1680
1681 // Replace a use of the shift with a use of the new shift.
1682 TheUse = InsertedShift;
1683 }
1684
1685 // If we removed all uses, or there are none, nuke the shift.
1686 if (ShiftI->use_empty()) {
1687 salvageDebugInfo(*ShiftI);
1688 ShiftI->eraseFromParent();
1689 MadeChange = true;
1690 }
1691
1692 return MadeChange;
1693}
1694
1695/// If counting leading or trailing zeros is an expensive operation and a zero
1696/// input is defined, add a check for zero to avoid calling the intrinsic.
1697///
1698/// We want to transform:
1699/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1700///
1701/// into:
1702/// entry:
1703/// %cmpz = icmp eq i64 %A, 0
1704/// br i1 %cmpz, label %cond.end, label %cond.false
1705/// cond.false:
1706/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1707/// br label %cond.end
1708/// cond.end:
1709/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1710///
1711/// If the transform is performed, return true and set ModifiedDT to true.
1712static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1713 const TargetLowering *TLI,
1714 const DataLayout *DL,
1715 bool &ModifiedDT) {
1716 if (!TLI || !DL)
1717 return false;
1718
1719 // If a zero input is undefined, it doesn't make sense to despeculate that.
1720 if (match(CountZeros->getOperand(1), m_One()))
1721 return false;
1722
1723 // If it's cheap to speculate, there's nothing to do.
1724 auto IntrinsicID = CountZeros->getIntrinsicID();
1725 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1726 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1727 return false;
1728
1729 // Only handle legal scalar cases. Anything else requires too much work.
1730 Type *Ty = CountZeros->getType();
1731 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1732 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1733 return false;
1734
1735 // The intrinsic will be sunk behind a compare against zero and branch.
1736 BasicBlock *StartBlock = CountZeros->getParent();
1737 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1738
1739 // Create another block after the count zero intrinsic. A PHI will be added
1740 // in this block to select the result of the intrinsic or the bit-width
1741 // constant if the input to the intrinsic is zero.
1742 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1743 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1744
1745 // Set up a builder to create a compare, conditional branch, and PHI.
1746 IRBuilder<> Builder(CountZeros->getContext());
1747 Builder.SetInsertPoint(StartBlock->getTerminator());
1748 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1749
1750 // Replace the unconditional branch that was created by the first split with
1751 // a compare against zero and a conditional branch.
1752 Value *Zero = Constant::getNullValue(Ty);
1753 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1754 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1755 StartBlock->getTerminator()->eraseFromParent();
1756
1757 // Create a PHI in the end block to select either the output of the intrinsic
1758 // or the bit width of the operand.
1759 Builder.SetInsertPoint(&EndBlock->front());
1760 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1761 CountZeros->replaceAllUsesWith(PN);
1762 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1763 PN->addIncoming(BitWidth, StartBlock);
1764 PN->addIncoming(CountZeros, CallBlock);
1765
1766 // We are explicitly handling the zero case, so we can set the intrinsic's
1767 // undefined zero argument to 'true'. This will also prevent reprocessing the
1768 // intrinsic; we only despeculate when a zero input is defined.
1769 CountZeros->setArgOperand(1, Builder.getTrue());
1770 ModifiedDT = true;
1771 return true;
1772}
1773
1774bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1775 BasicBlock *BB = CI->getParent();
1776
1777 // Lower inline assembly if we can.
1778 // If we found an inline asm expession, and if the target knows how to
1779 // lower it to normal LLVM code, do so now.
1780 if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1781 if (TLI->ExpandInlineAsm(CI)) {
1782 // Avoid invalidating the iterator.
1783 CurInstIterator = BB->begin();
1784 // Avoid processing instructions out of order, which could cause
1785 // reuse before a value is defined.
1786 SunkAddrs.clear();
1787 return true;
1788 }
1789 // Sink address computing for memory operands into the block.
1790 if (optimizeInlineAsmInst(CI))
1791 return true;
1792 }
1793
1794 // Align the pointer arguments to this call if the target thinks it's a good
1795 // idea
1796 unsigned MinSize, PrefAlign;
1797 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1798 for (auto &Arg : CI->arg_operands()) {
1799 // We want to align both objects whose address is used directly and
1800 // objects whose address is used in casts and GEPs, though it only makes
1801 // sense for GEPs if the offset is a multiple of the desired alignment and
1802 // if size - offset meets the size threshold.
1803 if (!Arg->getType()->isPointerTy())
1804 continue;
1805 APInt Offset(DL->getIndexSizeInBits(
1806 cast<PointerType>(Arg->getType())->getAddressSpace()),
1807 0);
1808 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1809 uint64_t Offset2 = Offset.getLimitedValue();
1810 if ((Offset2 & (PrefAlign-1)) != 0)
1811 continue;
1812 AllocaInst *AI;
1813 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1814 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1815 AI->setAlignment(MaybeAlign(PrefAlign));
1816 // Global variables can only be aligned if they are defined in this
1817 // object (i.e. they are uniquely initialized in this object), and
1818 // over-aligning global variables that have an explicit section is
1819 // forbidden.
1820 GlobalVariable *GV;
1821 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1822 GV->getPointerAlignment(*DL) < PrefAlign &&
1823 DL->getTypeAllocSize(GV->getValueType()) >=
1824 MinSize + Offset2)
1825 GV->setAlignment(PrefAlign);
1826 }
1827 // If this is a memcpy (or similar) then we may be able to improve the
1828 // alignment
1829 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1830 unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1831 if (DestAlign > MI->getDestAlignment())
1832 MI->setDestAlignment(DestAlign);
1833 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1834 unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1835 if (SrcAlign > MTI->getSourceAlignment())
1836 MTI->setSourceAlignment(SrcAlign);
1837 }
1838 }
1839 }
1840
1841 // If we have a cold call site, try to sink addressing computation into the
1842 // cold block. This interacts with our handling for loads and stores to
1843 // ensure that we can fold all uses of a potential addressing computation
1844 // into their uses. TODO: generalize this to work over profiling data
1845 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1846 for (auto &Arg : CI->arg_operands()) {
1847 if (!Arg->getType()->isPointerTy())
1848 continue;
1849 unsigned AS = Arg->getType()->getPointerAddressSpace();
1850 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1851 }
1852
1853 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1854 if (II) {
1855 switch (II->getIntrinsicID()) {
1856 default: break;
1857 case Intrinsic::experimental_widenable_condition: {
1858 // Give up on future widening oppurtunties so that we can fold away dead
1859 // paths and merge blocks before going into block-local instruction
1860 // selection.
1861 if (II->use_empty()) {
1862 II->eraseFromParent();
1863 return true;
1864 }
1865 Constant *RetVal = ConstantInt::getTrue(II->getContext());
1866 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1867 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1868 });
1869 return true;
1870 }
1871 case Intrinsic::objectsize:
1872 llvm_unreachable("llvm.objectsize.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.objectsize.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1872)
;
1873 case Intrinsic::is_constant:
1874 llvm_unreachable("llvm.is.constant.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.is.constant.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 1874)
;
1875 case Intrinsic::aarch64_stlxr:
1876 case Intrinsic::aarch64_stxr: {
1877 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1878 if (!ExtVal || !ExtVal->hasOneUse() ||
1879 ExtVal->getParent() == CI->getParent())
1880 return false;
1881 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1882 ExtVal->moveBefore(CI);
1883 // Mark this instruction as "inserted by CGP", so that other
1884 // optimizations don't touch it.
1885 InsertedInsts.insert(ExtVal);
1886 return true;
1887 }
1888
1889 case Intrinsic::launder_invariant_group:
1890 case Intrinsic::strip_invariant_group: {
1891 Value *ArgVal = II->getArgOperand(0);
1892 auto it = LargeOffsetGEPMap.find(II);
1893 if (it != LargeOffsetGEPMap.end()) {
1894 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1895 // Make sure not to have to deal with iterator invalidation
1896 // after possibly adding ArgVal to LargeOffsetGEPMap.
1897 auto GEPs = std::move(it->second);
1898 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
1899 LargeOffsetGEPMap.erase(II);
1900 }
1901
1902 II->replaceAllUsesWith(ArgVal);
1903 II->eraseFromParent();
1904 return true;
1905 }
1906 case Intrinsic::cttz:
1907 case Intrinsic::ctlz:
1908 // If counting zeros is expensive, try to avoid it.
1909 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
1910 }
1911
1912 if (TLI) {
1913 SmallVector<Value*, 2> PtrOps;
1914 Type *AccessTy;
1915 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
1916 while (!PtrOps.empty()) {
1917 Value *PtrVal = PtrOps.pop_back_val();
1918 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
1919 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
1920 return true;
1921 }
1922 }
1923 }
1924
1925 // From here on out we're working with named functions.
1926 if (!CI->getCalledFunction()) return false;
1927
1928 // Lower all default uses of _chk calls. This is very similar
1929 // to what InstCombineCalls does, but here we are only lowering calls
1930 // to fortified library functions (e.g. __memcpy_chk) that have the default
1931 // "don't know" as the objectsize. Anything else should be left alone.
1932 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1933 if (Value *V = Simplifier.optimizeCall(CI)) {
1934 CI->replaceAllUsesWith(V);
1935 CI->eraseFromParent();
1936 return true;
1937 }
1938
1939 return false;
1940}
1941
1942/// Look for opportunities to duplicate return instructions to the predecessor
1943/// to enable tail call optimizations. The case it is currently looking for is:
1944/// @code
1945/// bb0:
1946/// %tmp0 = tail call i32 @f0()
1947/// br label %return
1948/// bb1:
1949/// %tmp1 = tail call i32 @f1()
1950/// br label %return
1951/// bb2:
1952/// %tmp2 = tail call i32 @f2()
1953/// br label %return
1954/// return:
1955/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1956/// ret i32 %retval
1957/// @endcode
1958///
1959/// =>
1960///
1961/// @code
1962/// bb0:
1963/// %tmp0 = tail call i32 @f0()
1964/// ret i32 %tmp0
1965/// bb1:
1966/// %tmp1 = tail call i32 @f1()
1967/// ret i32 %tmp1
1968/// bb2:
1969/// %tmp2 = tail call i32 @f2()
1970/// ret i32 %tmp2
1971/// @endcode
1972bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
1973 if (!TLI)
1974 return false;
1975
1976 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
1977 if (!RetI)
1978 return false;
1979
1980 PHINode *PN = nullptr;
1981 BitCastInst *BCI = nullptr;
1982 Value *V = RetI->getReturnValue();
1983 if (V) {
1984 BCI = dyn_cast<BitCastInst>(V);
1985 if (BCI)
1986 V = BCI->getOperand(0);
1987
1988 PN = dyn_cast<PHINode>(V);
1989 if (!PN)
1990 return false;
1991 }
1992
1993 if (PN && PN->getParent() != BB)
1994 return false;
1995
1996 // Make sure there are no instructions between the PHI and return, or that the
1997 // return is the first instruction in the block.
1998 if (PN) {
1999 BasicBlock::iterator BI = BB->begin();
2000 // Skip over debug and the bitcast.
2001 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
2002 if (&*BI != RetI)
2003 return false;
2004 } else {
2005 BasicBlock::iterator BI = BB->begin();
2006 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2007 if (&*BI != RetI)
2008 return false;
2009 }
2010
2011 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2012 /// call.
2013 const Function *F = BB->getParent();
2014 SmallVector<BasicBlock*, 4> TailCallBBs;
2015 if (PN) {
2016 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2017 // Look through bitcasts.
2018 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2019 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2020 BasicBlock *PredBB = PN->getIncomingBlock(I);
2021 // Make sure the phi value is indeed produced by the tail call.
2022 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2023 TLI->mayBeEmittedAsTailCall(CI) &&
2024 attributesPermitTailCall(F, CI, RetI, *TLI))
2025 TailCallBBs.push_back(PredBB);
2026 }
2027 } else {
2028 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2029 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2030 if (!VisitedBBs.insert(*PI).second)
2031 continue;
2032
2033 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2034 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2035 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2036 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2037 if (RI == RE)
2038 continue;
2039
2040 CallInst *CI = dyn_cast<CallInst>(&*RI);
2041 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2042 attributesPermitTailCall(F, CI, RetI, *TLI))
2043 TailCallBBs.push_back(*PI);
2044 }
2045 }
2046
2047 bool Changed = false;
2048 for (auto const &TailCallBB : TailCallBBs) {
2049 // Make sure the call instruction is followed by an unconditional branch to
2050 // the return block.
2051 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2052 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2053 continue;
2054
2055 // Duplicate the return into TailCallBB.
2056 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2057 ModifiedDT = Changed = true;
2058 ++NumRetsDup;
2059 }
2060
2061 // If we eliminated all predecessors of the block, delete the block now.
2062 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2063 BB->eraseFromParent();
2064
2065 return Changed;
2066}
2067
2068//===----------------------------------------------------------------------===//
2069// Memory Optimization
2070//===----------------------------------------------------------------------===//
2071
2072namespace {
2073
2074/// This is an extended version of TargetLowering::AddrMode
2075/// which holds actual Value*'s for register values.
2076struct ExtAddrMode : public TargetLowering::AddrMode {
2077 Value *BaseReg = nullptr;
2078 Value *ScaledReg = nullptr;
2079 Value *OriginalValue = nullptr;
2080 bool InBounds = true;
2081
2082 enum FieldName {
2083 NoField = 0x00,
2084 BaseRegField = 0x01,
2085 BaseGVField = 0x02,
2086 BaseOffsField = 0x04,
2087 ScaledRegField = 0x08,
2088 ScaleField = 0x10,
2089 MultipleFields = 0xff
2090 };
2091
2092
2093 ExtAddrMode() = default;
2094
2095 void print(raw_ostream &OS) const;
2096 void dump() const;
2097
2098 FieldName compare(const ExtAddrMode &other) {
2099 // First check that the types are the same on each field, as differing types
2100 // is something we can't cope with later on.
2101 if (BaseReg && other.BaseReg &&
2102 BaseReg->getType() != other.BaseReg->getType())
2103 return MultipleFields;
2104 if (BaseGV && other.BaseGV &&
2105 BaseGV->getType() != other.BaseGV->getType())
2106 return MultipleFields;
2107 if (ScaledReg && other.ScaledReg &&
2108 ScaledReg->getType() != other.ScaledReg->getType())
2109 return MultipleFields;
2110
2111 // Conservatively reject 'inbounds' mismatches.
2112 if (InBounds != other.InBounds)
2113 return MultipleFields;
2114
2115 // Check each field to see if it differs.
2116 unsigned Result = NoField;
2117 if (BaseReg != other.BaseReg)
2118 Result |= BaseRegField;
2119 if (BaseGV != other.BaseGV)
2120 Result |= BaseGVField;
2121 if (BaseOffs != other.BaseOffs)
2122 Result |= BaseOffsField;
2123 if (ScaledReg != other.ScaledReg)
2124 Result |= ScaledRegField;
2125 // Don't count 0 as being a different scale, because that actually means
2126 // unscaled (which will already be counted by having no ScaledReg).
2127 if (Scale && other.Scale && Scale != other.Scale)
2128 Result |= ScaleField;
2129
2130 if (countPopulation(Result) > 1)
2131 return MultipleFields;
2132 else
2133 return static_cast<FieldName>(Result);
2134 }
2135
2136 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2137 // with no offset.
2138 bool isTrivial() {
2139 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2140 // trivial if at most one of these terms is nonzero, except that BaseGV and
2141 // BaseReg both being zero actually means a null pointer value, which we
2142 // consider to be 'non-zero' here.
2143 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2144 }
2145
2146 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2147 switch (Field) {
2148 default:
2149 return nullptr;
2150 case BaseRegField:
2151 return BaseReg;
2152 case BaseGVField:
2153 return BaseGV;
2154 case ScaledRegField:
2155 return ScaledReg;
2156 case BaseOffsField:
2157 return ConstantInt::get(IntPtrTy, BaseOffs);
2158 }
2159 }
2160
2161 void SetCombinedField(FieldName Field, Value *V,
2162 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2163 switch (Field) {
2164 default:
2165 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2165)
;
2166 break;
2167 case ExtAddrMode::BaseRegField:
2168 BaseReg = V;
2169 break;
2170 case ExtAddrMode::BaseGVField:
2171 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2172 // in the BaseReg field.
2173 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2173, __PRETTY_FUNCTION__))
;
2174 BaseReg = V;
2175 BaseGV = nullptr;
2176 break;
2177 case ExtAddrMode::ScaledRegField:
2178 ScaledReg = V;
2179 // If we have a mix of scaled and unscaled addrmodes then we want scale
2180 // to be the scale and not zero.
2181 if (!Scale)
2182 for (const ExtAddrMode &AM : AddrModes)
2183 if (AM.Scale) {
2184 Scale = AM.Scale;
2185 break;
2186 }
2187 break;
2188 case ExtAddrMode::BaseOffsField:
2189 // The offset is no longer a constant, so it goes in ScaledReg with a
2190 // scale of 1.
2191 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2191, __PRETTY_FUNCTION__))
;
2192 ScaledReg = V;
2193 Scale = 1;
2194 BaseOffs = 0;
2195 break;
2196 }
2197 }
2198};
2199
2200} // end anonymous namespace
2201
2202#ifndef NDEBUG
2203static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2204 AM.print(OS);
2205 return OS;
2206}
2207#endif
2208
2209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2210void ExtAddrMode::print(raw_ostream &OS) const {
2211 bool NeedPlus = false;
2212 OS << "[";
2213 if (InBounds)
2214 OS << "inbounds ";
2215 if (BaseGV) {
2216 OS << (NeedPlus ? " + " : "")
2217 << "GV:";
2218 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2219 NeedPlus = true;
2220 }
2221
2222 if (BaseOffs) {
2223 OS << (NeedPlus ? " + " : "")
2224 << BaseOffs;
2225 NeedPlus = true;
2226 }
2227
2228 if (BaseReg) {
2229 OS << (NeedPlus ? " + " : "")
2230 << "Base:";
2231 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2232 NeedPlus = true;
2233 }
2234 if (Scale) {
2235 OS << (NeedPlus ? " + " : "")
2236 << Scale << "*";
2237 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2238 }
2239
2240 OS << ']';
2241}
2242
2243LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2244 print(dbgs());
2245 dbgs() << '\n';
2246}
2247#endif
2248
2249namespace {
2250
2251/// This class provides transaction based operation on the IR.
2252/// Every change made through this class is recorded in the internal state and
2253/// can be undone (rollback) until commit is called.
2254class TypePromotionTransaction {
2255 /// This represents the common interface of the individual transaction.
2256 /// Each class implements the logic for doing one specific modification on
2257 /// the IR via the TypePromotionTransaction.
2258 class TypePromotionAction {
2259 protected:
2260 /// The Instruction modified.
2261 Instruction *Inst;
2262
2263 public:
2264 /// Constructor of the action.
2265 /// The constructor performs the related action on the IR.
2266 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2267
2268 virtual ~TypePromotionAction() = default;
2269
2270 /// Undo the modification done by this action.
2271 /// When this method is called, the IR must be in the same state as it was
2272 /// before this action was applied.
2273 /// \pre Undoing the action works if and only if the IR is in the exact same
2274 /// state as it was directly after this action was applied.
2275 virtual void undo() = 0;
2276
2277 /// Advocate every change made by this action.
2278 /// When the results on the IR of the action are to be kept, it is important
2279 /// to call this function, otherwise hidden information may be kept forever.
2280 virtual void commit() {
2281 // Nothing to be done, this action is not doing anything.
2282 }
2283 };
2284
2285 /// Utility to remember the position of an instruction.
2286 class InsertionHandler {
2287 /// Position of an instruction.
2288 /// Either an instruction:
2289 /// - Is the first in a basic block: BB is used.
2290 /// - Has a previous instruction: PrevInst is used.
2291 union {
2292 Instruction *PrevInst;
2293 BasicBlock *BB;
2294 } Point;
2295
2296 /// Remember whether or not the instruction had a previous instruction.
2297 bool HasPrevInstruction;
2298
2299 public:
2300 /// Record the position of \p Inst.
2301 InsertionHandler(Instruction *Inst) {
2302 BasicBlock::iterator It = Inst->getIterator();
2303 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2304 if (HasPrevInstruction)
2305 Point.PrevInst = &*--It;
2306 else
2307 Point.BB = Inst->getParent();
2308 }
2309
2310 /// Insert \p Inst at the recorded position.
2311 void insert(Instruction *Inst) {
2312 if (HasPrevInstruction) {
2313 if (Inst->getParent())
2314 Inst->removeFromParent();
2315 Inst->insertAfter(Point.PrevInst);
2316 } else {
2317 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2318 if (Inst->getParent())
2319 Inst->moveBefore(Position);
2320 else
2321 Inst->insertBefore(Position);
2322 }
2323 }
2324 };
2325
2326 /// Move an instruction before another.
2327 class InstructionMoveBefore : public TypePromotionAction {
2328 /// Original position of the instruction.
2329 InsertionHandler Position;
2330
2331 public:
2332 /// Move \p Inst before \p Before.
2333 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2334 : TypePromotionAction(Inst), Position(Inst) {
2335 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2336 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2337 Inst->moveBefore(Before);
2338 }
2339
2340 /// Move the instruction back to its original position.
2341 void undo() override {
2342 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2343 Position.insert(Inst);
2344 }
2345 };
2346
2347 /// Set the operand of an instruction with a new value.
2348 class OperandSetter : public TypePromotionAction {
2349 /// Original operand of the instruction.
2350 Value *Origin;
2351
2352 /// Index of the modified instruction.
2353 unsigned Idx;
2354
2355 public:
2356 /// Set \p Idx operand of \p Inst with \p NewVal.
2357 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2358 : TypePromotionAction(Inst), Idx(Idx) {
2359 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2360 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2361 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2362 Origin = Inst->getOperand(Idx);
2363 Inst->setOperand(Idx, NewVal);
2364 }
2365
2366 /// Restore the original value of the instruction.
2367 void undo() override {
2368 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2369 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2370 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2371 Inst->setOperand(Idx, Origin);
2372 }
2373 };
2374
2375 /// Hide the operands of an instruction.
2376 /// Do as if this instruction was not using any of its operands.
2377 class OperandsHider : public TypePromotionAction {
2378 /// The list of original operands.
2379 SmallVector<Value *, 4> OriginalValues;
2380
2381 public:
2382 /// Remove \p Inst from the uses of the operands of \p Inst.
2383 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2384 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2385 unsigned NumOpnds = Inst->getNumOperands();
2386 OriginalValues.reserve(NumOpnds);
2387 for (unsigned It = 0; It < NumOpnds; ++It) {
2388 // Save the current operand.
2389 Value *Val = Inst->getOperand(It);
2390 OriginalValues.push_back(Val);
2391 // Set a dummy one.
2392 // We could use OperandSetter here, but that would imply an overhead
2393 // that we are not willing to pay.
2394 Inst->setOperand(It, UndefValue::get(Val->getType()));
2395 }
2396 }
2397
2398 /// Restore the original list of uses.
2399 void undo() override {
2400 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2401 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2402 Inst->setOperand(It, OriginalValues[It]);
2403 }
2404 };
2405
2406 /// Build a truncate instruction.
2407 class TruncBuilder : public TypePromotionAction {
2408 Value *Val;
2409
2410 public:
2411 /// Build a truncate instruction of \p Opnd producing a \p Ty
2412 /// result.
2413 /// trunc Opnd to Ty.
2414 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2415 IRBuilder<> Builder(Opnd);
2416 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2417 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2418 }
2419
2420 /// Get the built value.
2421 Value *getBuiltValue() { return Val; }
2422
2423 /// Remove the built instruction.
2424 void undo() override {
2425 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2426 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2427 IVal->eraseFromParent();
2428 }
2429 };
2430
2431 /// Build a sign extension instruction.
2432 class SExtBuilder : public TypePromotionAction {
2433 Value *Val;
2434
2435 public:
2436 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2437 /// result.
2438 /// sext Opnd to Ty.
2439 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2440 : TypePromotionAction(InsertPt) {
2441 IRBuilder<> Builder(InsertPt);
2442 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2443 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2444 }
2445
2446 /// Get the built value.
2447 Value *getBuiltValue() { return Val; }
2448
2449 /// Remove the built instruction.
2450 void undo() override {
2451 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2452 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2453 IVal->eraseFromParent();
2454 }
2455 };
2456
2457 /// Build a zero extension instruction.
2458 class ZExtBuilder : public TypePromotionAction {
2459 Value *Val;
2460
2461 public:
2462 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2463 /// result.
2464 /// zext Opnd to Ty.
2465 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2466 : TypePromotionAction(InsertPt) {
2467 IRBuilder<> Builder(InsertPt);
2468 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2469 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2470 }
2471
2472 /// Get the built value.
2473 Value *getBuiltValue() { return Val; }
2474
2475 /// Remove the built instruction.
2476 void undo() override {
2477 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2478 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2479 IVal->eraseFromParent();
2480 }
2481 };
2482
2483 /// Mutate an instruction to another type.
2484 class TypeMutator : public TypePromotionAction {
2485 /// Record the original type.
2486 Type *OrigTy;
2487
2488 public:
2489 /// Mutate the type of \p Inst into \p NewTy.
2490 TypeMutator(Instruction *Inst, Type *NewTy)
2491 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2492 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2493 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2494 Inst->mutateType(NewTy);
2495 }
2496
2497 /// Mutate the instruction back to its original type.
2498 void undo() override {
2499 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2500 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2501 Inst->mutateType(OrigTy);
2502 }
2503 };
2504
2505 /// Replace the uses of an instruction by another instruction.
2506 class UsesReplacer : public TypePromotionAction {
2507 /// Helper structure to keep track of the replaced uses.
2508 struct InstructionAndIdx {
2509 /// The instruction using the instruction.
2510 Instruction *Inst;
2511
2512 /// The index where this instruction is used for Inst.
2513 unsigned Idx;
2514
2515 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2516 : Inst(Inst), Idx(Idx) {}
2517 };
2518
2519 /// Keep track of the original uses (pair Instruction, Index).
2520 SmallVector<InstructionAndIdx, 4> OriginalUses;
2521 /// Keep track of the debug users.
2522 SmallVector<DbgValueInst *, 1> DbgValues;
2523
2524 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2525
2526 public:
2527 /// Replace all the use of \p Inst by \p New.
2528 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2529 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2530 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2531 // Record the original uses.
2532 for (Use &U : Inst->uses()) {
2533 Instruction *UserI = cast<Instruction>(U.getUser());
2534 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2535 }
2536 // Record the debug uses separately. They are not in the instruction's
2537 // use list, but they are replaced by RAUW.
2538 findDbgValues(DbgValues, Inst);
2539
2540 // Now, we can replace the uses.
2541 Inst->replaceAllUsesWith(New);
2542 }
2543
2544 /// Reassign the original uses of Inst to Inst.
2545 void undo() override {
2546 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2547 for (use_iterator UseIt = OriginalUses.begin(),
2548 EndIt = OriginalUses.end();
2549 UseIt != EndIt; ++UseIt) {
2550 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2551 }
2552 // RAUW has replaced all original uses with references to the new value,
2553 // including the debug uses. Since we are undoing the replacements,
2554 // the original debug uses must also be reinstated to maintain the
2555 // correctness and utility of debug value instructions.
2556 for (auto *DVI: DbgValues) {
2557 LLVMContext &Ctx = Inst->getType()->getContext();
2558 auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2559 DVI->setOperand(0, MV);
2560 }
2561 }
2562 };
2563
2564 /// Remove an instruction from the IR.
2565 class InstructionRemover : public TypePromotionAction {
2566 /// Original position of the instruction.
2567 InsertionHandler Inserter;
2568
2569 /// Helper structure to hide all the link to the instruction. In other
2570 /// words, this helps to do as if the instruction was removed.
2571 OperandsHider Hider;
2572
2573 /// Keep track of the uses replaced, if any.
2574 UsesReplacer *Replacer = nullptr;
2575
2576 /// Keep track of instructions removed.
2577 SetOfInstrs &RemovedInsts;
2578
2579 public:
2580 /// Remove all reference of \p Inst and optionally replace all its
2581 /// uses with New.
2582 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2583 /// \pre If !Inst->use_empty(), then New != nullptr
2584 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2585 Value *New = nullptr)
2586 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2587 RemovedInsts(RemovedInsts) {
2588 if (New)
2589 Replacer = new UsesReplacer(Inst, New);
2590 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2591 RemovedInsts.insert(Inst);
2592 /// The instructions removed here will be freed after completing
2593 /// optimizeBlock() for all blocks as we need to keep track of the
2594 /// removed instructions during promotion.
2595 Inst->removeFromParent();
2596 }
2597
2598 ~InstructionRemover() override { delete Replacer; }
2599
2600 /// Resurrect the instruction and reassign it to the proper uses if
2601 /// new value was provided when build this action.
2602 void undo() override {
2603 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2604 Inserter.insert(Inst);
2605 if (Replacer)
2606 Replacer->undo();
2607 Hider.undo();
2608 RemovedInsts.erase(Inst);
2609 }
2610 };
2611
2612public:
2613 /// Restoration point.
2614 /// The restoration point is a pointer to an action instead of an iterator
2615 /// because the iterator may be invalidated but not the pointer.
2616 using ConstRestorationPt = const TypePromotionAction *;
2617
2618 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2619 : RemovedInsts(RemovedInsts) {}
2620
2621 /// Advocate every changes made in that transaction.
2622 void commit();
2623
2624 /// Undo all the changes made after the given point.
2625 void rollback(ConstRestorationPt Point);
2626
2627 /// Get the current restoration point.
2628 ConstRestorationPt getRestorationPoint() const;
2629
2630 /// \name API for IR modification with state keeping to support rollback.
2631 /// @{
2632 /// Same as Instruction::setOperand.
2633 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2634
2635 /// Same as Instruction::eraseFromParent.
2636 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2637
2638 /// Same as Value::replaceAllUsesWith.
2639 void replaceAllUsesWith(Instruction *Inst, Value *New);
2640
2641 /// Same as Value::mutateType.
2642 void mutateType(Instruction *Inst, Type *NewTy);
2643
2644 /// Same as IRBuilder::createTrunc.
2645 Value *createTrunc(Instruction *Opnd, Type *Ty);
2646
2647 /// Same as IRBuilder::createSExt.
2648 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2649
2650 /// Same as IRBuilder::createZExt.
2651 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2652
2653 /// Same as Instruction::moveBefore.
2654 void moveBefore(Instruction *Inst, Instruction *Before);
2655 /// @}
2656
2657private:
2658 /// The ordered list of actions made so far.
2659 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2660
2661 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2662
2663 SetOfInstrs &RemovedInsts;
2664};
2665
2666} // end anonymous namespace
2667
2668void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2669 Value *NewVal) {
2670 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
2671 Inst, Idx, NewVal));
2672}
2673
2674void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2675 Value *NewVal) {
2676 Actions.push_back(
2677 std::make_unique<TypePromotionTransaction::InstructionRemover>(
2678 Inst, RemovedInsts, NewVal));
2679}
2680
2681void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2682 Value *New) {
2683 Actions.push_back(
2684 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2685}
2686
2687void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2688 Actions.push_back(
2689 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2690}
2691
2692Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2693 Type *Ty) {
2694 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2695 Value *Val = Ptr->getBuiltValue();
2696 Actions.push_back(std::move(Ptr));
2697 return Val;
2698}
2699
2700Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2701 Value *Opnd, Type *Ty) {
2702 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2703 Value *Val = Ptr->getBuiltValue();
2704 Actions.push_back(std::move(Ptr));
2705 return Val;
2706}
2707
2708Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2709 Value *Opnd, Type *Ty) {
2710 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2711 Value *Val = Ptr->getBuiltValue();
2712 Actions.push_back(std::move(Ptr));
2713 return Val;
2714}
2715
2716void TypePromotionTransaction::moveBefore(Instruction *Inst,
2717 Instruction *Before) {
2718 Actions.push_back(
2719 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2720 Inst, Before));
2721}
2722
2723TypePromotionTransaction::ConstRestorationPt
2724TypePromotionTransaction::getRestorationPoint() const {
2725 return !Actions.empty() ? Actions.back().get() : nullptr;
2726}
2727
2728void TypePromotionTransaction::commit() {
2729 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2730 ++It)
2731 (*It)->commit();
2732 Actions.clear();
2733}
2734
2735void TypePromotionTransaction::rollback(
2736 TypePromotionTransaction::ConstRestorationPt Point) {
2737 while (!Actions.empty() && Point != Actions.back().get()) {
2738 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2739 Curr->undo();
2740 }
2741}
2742
2743namespace {
2744
2745/// A helper class for matching addressing modes.
2746///
2747/// This encapsulates the logic for matching the target-legal addressing modes.
2748class AddressingModeMatcher {
2749 SmallVectorImpl<Instruction*> &AddrModeInsts;
2750 const TargetLowering &TLI;
2751 const TargetRegisterInfo &TRI;
2752 const DataLayout &DL;
2753
2754 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2755 /// the memory instruction that we're computing this address for.
2756 Type *AccessTy;
2757 unsigned AddrSpace;
2758 Instruction *MemoryInst;
2759
2760 /// This is the addressing mode that we're building up. This is
2761 /// part of the return value of this addressing mode matching stuff.
2762 ExtAddrMode &AddrMode;
2763
2764 /// The instructions inserted by other CodeGenPrepare optimizations.
2765 const SetOfInstrs &InsertedInsts;
2766
2767 /// A map from the instructions to their type before promotion.
2768 InstrToOrigTy &PromotedInsts;
2769
2770 /// The ongoing transaction where every action should be registered.
2771 TypePromotionTransaction &TPT;
2772
2773 // A GEP which has too large offset to be folded into the addressing mode.
2774 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2775
2776 /// This is set to true when we should not do profitability checks.
2777 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2778 bool IgnoreProfitability;
2779
2780 AddressingModeMatcher(
2781 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2782 const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2783 ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2784 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2785 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2786 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2787 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2788 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2789 PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2790 IgnoreProfitability = false;
2791 }
2792
2793public:
2794 /// Find the maximal addressing mode that a load/store of V can fold,
2795 /// give an access type of AccessTy. This returns a list of involved
2796 /// instructions in AddrModeInsts.
2797 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2798 /// optimizations.
2799 /// \p PromotedInsts maps the instructions to their type before promotion.
2800 /// \p The ongoing transaction where every action should be registered.
2801 static ExtAddrMode
2802 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2803 SmallVectorImpl<Instruction *> &AddrModeInsts,
2804 const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2805 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2806 TypePromotionTransaction &TPT,
2807 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2808 ExtAddrMode Result;
2809
2810 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2811 MemoryInst, Result, InsertedInsts,
2812 PromotedInsts, TPT, LargeOffsetGEP)
2813 .matchAddr(V, 0);
2814 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2814, __PRETTY_FUNCTION__))
;
2815 return Result;
2816 }
2817
2818private:
2819 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2820 bool matchAddr(Value *Addr, unsigned Depth);
2821 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2822 bool *MovedAway = nullptr);
2823 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2824 ExtAddrMode &AMBefore,
2825 ExtAddrMode &AMAfter);
2826 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2827 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2828 Value *PromotedOperand) const;
2829};
2830
2831class PhiNodeSet;
2832
2833/// An iterator for PhiNodeSet.
2834class PhiNodeSetIterator {
2835 PhiNodeSet * const Set;
2836 size_t CurrentIndex = 0;
2837
2838public:
2839 /// The constructor. Start should point to either a valid element, or be equal
2840 /// to the size of the underlying SmallVector of the PhiNodeSet.
2841 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2842 PHINode * operator*() const;
2843 PhiNodeSetIterator& operator++();
2844 bool operator==(const PhiNodeSetIterator &RHS) const;
2845 bool operator!=(const PhiNodeSetIterator &RHS) const;
2846};
2847
2848/// Keeps a set of PHINodes.
2849///
2850/// This is a minimal set implementation for a specific use case:
2851/// It is very fast when there are very few elements, but also provides good
2852/// performance when there are many. It is similar to SmallPtrSet, but also
2853/// provides iteration by insertion order, which is deterministic and stable
2854/// across runs. It is also similar to SmallSetVector, but provides removing
2855/// elements in O(1) time. This is achieved by not actually removing the element
2856/// from the underlying vector, so comes at the cost of using more memory, but
2857/// that is fine, since PhiNodeSets are used as short lived objects.
2858class PhiNodeSet {
2859 friend class PhiNodeSetIterator;
2860
2861 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2862 using iterator = PhiNodeSetIterator;
2863
2864 /// Keeps the elements in the order of their insertion in the underlying
2865 /// vector. To achieve constant time removal, it never deletes any element.
2866 SmallVector<PHINode *, 32> NodeList;
2867
2868 /// Keeps the elements in the underlying set implementation. This (and not the
2869 /// NodeList defined above) is the source of truth on whether an element
2870 /// is actually in the collection.
2871 MapType NodeMap;
2872
2873 /// Points to the first valid (not deleted) element when the set is not empty
2874 /// and the value is not zero. Equals to the size of the underlying vector
2875 /// when the set is empty. When the value is 0, as in the beginning, the
2876 /// first element may or may not be valid.
2877 size_t FirstValidElement = 0;
2878
2879public:
2880 /// Inserts a new element to the collection.
2881 /// \returns true if the element is actually added, i.e. was not in the
2882 /// collection before the operation.
2883 bool insert(PHINode *Ptr) {
2884 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2885 NodeList.push_back(Ptr);
2886 return true;
2887 }
2888 return false;
2889 }
2890
2891 /// Removes the element from the collection.
2892 /// \returns whether the element is actually removed, i.e. was in the
2893 /// collection before the operation.
2894 bool erase(PHINode *Ptr) {
2895 auto it = NodeMap.find(Ptr);
2896 if (it != NodeMap.end()) {
2897 NodeMap.erase(Ptr);
2898 SkipRemovedElements(FirstValidElement);
2899 return true;
2900 }
2901 return false;
2902 }
2903
2904 /// Removes all elements and clears the collection.
2905 void clear() {
2906 NodeMap.clear();
2907 NodeList.clear();
2908 FirstValidElement = 0;
2909 }
2910
2911 /// \returns an iterator that will iterate the elements in the order of
2912 /// insertion.
2913 iterator begin() {
2914 if (FirstValidElement == 0)
2915 SkipRemovedElements(FirstValidElement);
2916 return PhiNodeSetIterator(this, FirstValidElement);
2917 }
2918
2919 /// \returns an iterator that points to the end of the collection.
2920 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
2921
2922 /// Returns the number of elements in the collection.
2923 size_t size() const {
2924 return NodeMap.size();
2925 }
2926
2927 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
2928 size_t count(PHINode *Ptr) const {
2929 return NodeMap.count(Ptr);
2930 }
2931
2932private:
2933 /// Updates the CurrentIndex so that it will point to a valid element.
2934 ///
2935 /// If the element of NodeList at CurrentIndex is valid, it does not
2936 /// change it. If there are no more valid elements, it updates CurrentIndex
2937 /// to point to the end of the NodeList.
2938 void SkipRemovedElements(size_t &CurrentIndex) {
2939 while (CurrentIndex < NodeList.size()) {
2940 auto it = NodeMap.find(NodeList[CurrentIndex]);
2941 // If the element has been deleted and added again later, NodeMap will
2942 // point to a different index, so CurrentIndex will still be invalid.
2943 if (it != NodeMap.end() && it->second == CurrentIndex)
2944 break;
2945 ++CurrentIndex;
2946 }
2947 }
2948};
2949
2950PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
2951 : Set(Set), CurrentIndex(Start) {}
2952
2953PHINode * PhiNodeSetIterator::operator*() const {
2954 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2955, __PRETTY_FUNCTION__))
2955 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2955, __PRETTY_FUNCTION__))
;
2956 return Set->NodeList[CurrentIndex];
2957}
2958
2959PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
2960 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2961, __PRETTY_FUNCTION__))
2961 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 2961, __PRETTY_FUNCTION__))
;
2962 ++CurrentIndex;
2963 Set->SkipRemovedElements(CurrentIndex);
2964 return *this;
2965}
2966
2967bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
2968 return CurrentIndex == RHS.CurrentIndex;
2969}
2970
2971bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
2972 return !((*this) == RHS);
2973}
2974
2975/// Keep track of simplification of Phi nodes.
2976/// Accept the set of all phi nodes and erase phi node from this set
2977/// if it is simplified.
2978class SimplificationTracker {
2979 DenseMap<Value *, Value *> Storage;
2980 const SimplifyQuery &SQ;
2981 // Tracks newly created Phi nodes. The elements are iterated by insertion
2982 // order.
2983 PhiNodeSet AllPhiNodes;
2984 // Tracks newly created Select nodes.
2985 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
2986
2987public:
2988 SimplificationTracker(const SimplifyQuery &sq)
2989 : SQ(sq) {}
2990
2991 Value *Get(Value *V) {
2992 do {
2993 auto SV = Storage.find(V);
2994 if (SV == Storage.end())
2995 return V;
2996 V = SV->second;
2997 } while (true);
2998 }
2999
3000 Value *Simplify(Value *Val) {
3001 SmallVector<Value *, 32> WorkList;
3002 SmallPtrSet<Value *, 32> Visited;
3003 WorkList.push_back(Val);
3004 while (!WorkList.empty()) {
3005 auto P = WorkList.pop_back_val();
3006 if (!Visited.insert(P).second)
3007 continue;
3008 if (auto *PI = dyn_cast<Instruction>(P))
3009 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3010 for (auto *U : PI->users())
3011 WorkList.push_back(cast<Value>(U));
3012 Put(PI, V);
3013 PI->replaceAllUsesWith(V);
3014 if (auto *PHI = dyn_cast<PHINode>(PI))
3015 AllPhiNodes.erase(PHI);
3016 if (auto *Select = dyn_cast<SelectInst>(PI))
3017 AllSelectNodes.erase(Select);
3018 PI->eraseFromParent();
3019 }
3020 }
3021 return Get(Val);
3022 }
3023
3024 void Put(Value *From, Value *To) {
3025 Storage.insert({ From, To });
3026 }
3027
3028 void ReplacePhi(PHINode *From, PHINode *To) {
3029 Value* OldReplacement = Get(From);
3030 while (OldReplacement != From) {
34
Assuming 'OldReplacement' is not equal to 'From'
35
Loop condition is true. Entering loop body
38
Assuming 'OldReplacement' is not equal to 'From'
39
Loop condition is true. Entering loop body
42
Assuming 'OldReplacement' is equal to 'From'
43
Loop condition is false. Execution continues on line 3035
3031 From = To;
40
Null pointer value stored to 'From'
3032 To = dyn_cast<PHINode>(OldReplacement);
36
Assuming 'OldReplacement' is not a 'PHINode'
37
Null pointer value stored to 'To'
41
Assuming 'OldReplacement' is a 'PHINode'
3033 OldReplacement = Get(From);
3034 }
3035 assert
43.1
'To' is non-null
(To && Get(To) == To && "Replacement PHI node is already replaced.")((To && Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("To && Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3035, __PRETTY_FUNCTION__))
;
44
Assuming the condition is true
45
'?' condition is true
3036 Put(From, To);
3037 From->replaceAllUsesWith(To);
46
Called C++ object pointer is null
3038 AllPhiNodes.erase(From);
3039 From->eraseFromParent();
3040 }
3041
3042 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3043
3044 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3045
3046 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3047
3048 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3049
3050 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3051
3052 void destroyNewNodes(Type *CommonType) {
3053 // For safe erasing, replace the uses with dummy value first.
3054 auto Dummy = UndefValue::get(CommonType);
3055 for (auto I : AllPhiNodes) {
3056 I->replaceAllUsesWith(Dummy);
3057 I->eraseFromParent();
3058 }
3059 AllPhiNodes.clear();
3060 for (auto I : AllSelectNodes) {
3061 I->replaceAllUsesWith(Dummy);
3062 I->eraseFromParent();
3063 }
3064 AllSelectNodes.clear();
3065 }
3066};
3067
3068/// A helper class for combining addressing modes.
3069class AddressingModeCombiner {
3070 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3071 typedef std::pair<PHINode *, PHINode *> PHIPair;
3072
3073private:
3074 /// The addressing modes we've collected.
3075 SmallVector<ExtAddrMode, 16> AddrModes;
3076
3077 /// The field in which the AddrModes differ, when we have more than one.
3078 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3079
3080 /// Are the AddrModes that we have all just equal to their original values?
3081 bool AllAddrModesTrivial = true;
3082
3083 /// Common Type for all different fields in addressing modes.
3084 Type *CommonType;
3085
3086 /// SimplifyQuery for simplifyInstruction utility.
3087 const SimplifyQuery &SQ;
3088
3089 /// Original Address.
3090 Value *Original;
3091
3092public:
3093 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3094 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3095
3096 /// Get the combined AddrMode
3097 const ExtAddrMode &getAddrMode() const {
3098 return AddrModes[0];
3099 }
3100
3101 /// Add a new AddrMode if it's compatible with the AddrModes we already
3102 /// have.
3103 /// \return True iff we succeeded in doing so.
3104 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3105 // Take note of if we have any non-trivial AddrModes, as we need to detect
3106 // when all AddrModes are trivial as then we would introduce a phi or select
3107 // which just duplicates what's already there.
3108 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3109
3110 // If this is the first addrmode then everything is fine.
3111 if (AddrModes.empty()) {
3112 AddrModes.emplace_back(NewAddrMode);
3113 return true;
3114 }
3115
3116 // Figure out how different this is from the other address modes, which we
3117 // can do just by comparing against the first one given that we only care
3118 // about the cumulative difference.
3119 ExtAddrMode::FieldName ThisDifferentField =
3120 AddrModes[0].compare(NewAddrMode);
3121 if (DifferentField == ExtAddrMode::NoField)
3122 DifferentField = ThisDifferentField;
3123 else if (DifferentField != ThisDifferentField)
3124 DifferentField = ExtAddrMode::MultipleFields;
3125
3126 // If NewAddrMode differs in more than one dimension we cannot handle it.
3127 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3128
3129 // If Scale Field is different then we reject.
3130 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3131
3132 // We also must reject the case when base offset is different and
3133 // scale reg is not null, we cannot handle this case due to merge of
3134 // different offsets will be used as ScaleReg.
3135 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3136 !NewAddrMode.ScaledReg);
3137
3138 // We also must reject the case when GV is different and BaseReg installed
3139 // due to we want to use base reg as a merge of GV values.
3140 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3141 !NewAddrMode.HasBaseReg);
3142
3143 // Even if NewAddMode is the same we still need to collect it due to
3144 // original value is different. And later we will need all original values
3145 // as anchors during finding the common Phi node.
3146 if (CanHandle)
3147 AddrModes.emplace_back(NewAddrMode);
3148 else
3149 AddrModes.clear();
3150
3151 return CanHandle;
3152 }
3153
3154 /// Combine the addressing modes we've collected into a single
3155 /// addressing mode.
3156 /// \return True iff we successfully combined them or we only had one so
3157 /// didn't need to combine them anyway.
3158 bool combineAddrModes() {
3159 // If we have no AddrModes then they can't be combined.
3160 if (AddrModes.size() == 0)
12
Assuming the condition is false
13
Taking false branch
3161 return false;
3162
3163 // A single AddrMode can trivially be combined.
3164 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
14
Assuming the condition is false
15
Assuming field 'DifferentField' is not equal to NoField
16
Taking false branch
3165 return true;
3166
3167 // If the AddrModes we collected are all just equal to the value they are
3168 // derived from then combining them wouldn't do anything useful.
3169 if (AllAddrModesTrivial)
17
Assuming field 'AllAddrModesTrivial' is false
18
Taking false branch
3170 return false;
3171
3172 if (!addrModeCombiningAllowed())
19
Assuming the condition is false
20
Taking false branch
3173 return false;
3174
3175 // Build a map between <original value, basic block where we saw it> to
3176 // value of base register.
3177 // Bail out if there is no common type.
3178 FoldAddrToValueMapping Map;
3179 if (!initializeMap(Map))
21
Taking false branch
3180 return false;
3181
3182 Value *CommonValue = findCommon(Map);
22
Calling 'AddressingModeCombiner::findCommon'
3183 if (CommonValue)
3184 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3185 return CommonValue != nullptr;
3186 }
3187
3188private:
3189 /// Initialize Map with anchor values. For address seen
3190 /// we set the value of different field saw in this address.
3191 /// At the same time we find a common type for different field we will
3192 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3193 /// Return false if there is no common type found.
3194 bool initializeMap(FoldAddrToValueMapping &Map) {
3195 // Keep track of keys where the value is null. We will need to replace it
3196 // with constant null when we know the common type.
3197 SmallVector<Value *, 2> NullValue;
3198 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3199 for (auto &AM : AddrModes) {
3200 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3201 if (DV) {
3202 auto *Type = DV->getType();
3203 if (CommonType && CommonType != Type)
3204 return false;
3205 CommonType = Type;
3206 Map[AM.OriginalValue] = DV;
3207 } else {
3208 NullValue.push_back(AM.OriginalValue);
3209 }
3210 }
3211 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3211, __PRETTY_FUNCTION__))
;
3212 for (auto *V : NullValue)
3213 Map[V] = Constant::getNullValue(CommonType);
3214 return true;
3215 }
3216
3217 /// We have mapping between value A and other value B where B was a field in
3218 /// addressing mode represented by A. Also we have an original value C
3219 /// representing an address we start with. Traversing from C through phi and
3220 /// selects we ended up with A's in a map. This utility function tries to find
3221 /// a value V which is a field in addressing mode C and traversing through phi
3222 /// nodes and selects we will end up in corresponded values B in a map.
3223 /// The utility will create a new Phi/Selects if needed.
3224 // The simple example looks as follows:
3225 // BB1:
3226 // p1 = b1 + 40
3227 // br cond BB2, BB3
3228 // BB2:
3229 // p2 = b2 + 40
3230 // br BB3
3231 // BB3:
3232 // p = phi [p1, BB1], [p2, BB2]
3233 // v = load p
3234 // Map is
3235 // p1 -> b1
3236 // p2 -> b2
3237 // Request is
3238 // p -> ?
3239 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3240 Value *findCommon(FoldAddrToValueMapping &Map) {
3241 // Tracks the simplification of newly created phi nodes. The reason we use
3242 // this mapping is because we will add new created Phi nodes in AddrToBase.
3243 // Simplification of Phi nodes is recursive, so some Phi node may
3244 // be simplified after we added it to AddrToBase. In reality this
3245 // simplification is possible only if original phi/selects were not
3246 // simplified yet.
3247 // Using this mapping we can find the current value in AddrToBase.
3248 SimplificationTracker ST(SQ);
3249
3250 // First step, DFS to create PHI nodes for all intermediate blocks.
3251 // Also fill traverse order for the second step.
3252 SmallVector<Value *, 32> TraverseOrder;
3253 InsertPlaceholders(Map, TraverseOrder, ST);
3254
3255 // Second Step, fill new nodes by merged values and simplify if possible.
3256 FillPlaceholders(Map, TraverseOrder, ST);
3257
3258 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
23
Assuming the condition is false
3259 ST.destroyNewNodes(CommonType);
3260 return nullptr;
3261 }
3262
3263 // Now we'd like to match New Phi nodes to existed ones.
3264 unsigned PhiNotMatchedCount = 0;
3265 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
24
Calling 'AddressingModeCombiner::MatchPhiSet'
3266 ST.destroyNewNodes(CommonType);
3267 return nullptr;
3268 }
3269
3270 auto *Result = ST.Get(Map.find(Original)->second);
3271 if (Result) {
3272 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3273 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3274 }
3275 return Result;
3276 }
3277
3278 /// Try to match PHI node to Candidate.
3279 /// Matcher tracks the matched Phi nodes.
3280 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3281 SmallSetVector<PHIPair, 8> &Matcher,
3282 PhiNodeSet &PhiNodesToMatch) {
3283 SmallVector<PHIPair, 8> WorkList;
3284 Matcher.insert({ PHI, Candidate });
3285 SmallSet<PHINode *, 8> MatchedPHIs;
3286 MatchedPHIs.insert(PHI);
3287 WorkList.push_back({ PHI, Candidate });
3288 SmallSet<PHIPair, 8> Visited;
3289 while (!WorkList.empty()) {
3290 auto Item = WorkList.pop_back_val();
3291 if (!Visited.insert(Item).second)
3292 continue;
3293 // We iterate over all incoming values to Phi to compare them.
3294 // If values are different and both of them Phi and the first one is a
3295 // Phi we added (subject to match) and both of them is in the same basic
3296 // block then we can match our pair if values match. So we state that
3297 // these values match and add it to work list to verify that.
3298 for (auto B : Item.first->blocks()) {
3299 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3300 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3301 if (FirstValue == SecondValue)
3302 continue;
3303
3304 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3305 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3306
3307 // One of them is not Phi or
3308 // The first one is not Phi node from the set we'd like to match or
3309 // Phi nodes from different basic blocks then
3310 // we will not be able to match.
3311 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3312 FirstPhi->getParent() != SecondPhi->getParent())
3313 return false;
3314
3315 // If we already matched them then continue.
3316 if (Matcher.count({ FirstPhi, SecondPhi }))
3317 continue;
3318 // So the values are different and does not match. So we need them to
3319 // match. (But we register no more than one match per PHI node, so that
3320 // we won't later try to replace them twice.)
3321 if (MatchedPHIs.insert(FirstPhi).second)
3322 Matcher.insert({ FirstPhi, SecondPhi });
3323 // But me must check it.
3324 WorkList.push_back({ FirstPhi, SecondPhi });
3325 }
3326 }
3327 return true;
3328 }
3329
3330 /// For the given set of PHI nodes (in the SimplificationTracker) try
3331 /// to find their equivalents.
3332 /// Returns false if this matching fails and creation of new Phi is disabled.
3333 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3334 unsigned &PhiNotMatchedCount) {
3335 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3336 // order, so the replacements (ReplacePhi) are also done in a deterministic
3337 // order.
3338 SmallSetVector<PHIPair, 8> Matched;
3339 SmallPtrSet<PHINode *, 8> WillNotMatch;
3340 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3341 while (PhiNodesToMatch.size()) {
25
Loop condition is true. Entering loop body
3342 PHINode *PHI = *PhiNodesToMatch.begin();
3343
3344 // Add us, if no Phi nodes in the basic block we do not match.
3345 WillNotMatch.clear();
3346 WillNotMatch.insert(PHI);
3347
3348 // Traverse all Phis until we found equivalent or fail to do that.
3349 bool IsMatched = false;
3350 for (auto &P : PHI->getParent()->phis()) {
3351 if (&P == PHI)
26
Assuming the condition is false
27
Taking false branch
3352 continue;
3353 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
28
Assuming 'IsMatched' is true
29
Taking true branch
3354 break;
30
Execution continues on line 3362
3355 // If it does not match, collect all Phi nodes from matcher.
3356 // if we end up with no match, them all these Phi nodes will not match
3357 // later.
3358 for (auto M : Matched)
3359 WillNotMatch.insert(M.first);
3360 Matched.clear();
3361 }
3362 if (IsMatched
30.1
'IsMatched' is true
) {
31
Taking true branch
3363 // Replace all matched values and erase them.
3364 for (auto MV : Matched)
32
Assuming '__begin4' is not equal to '__end4'
3365 ST.ReplacePhi(MV.first, MV.second);
33
Calling 'SimplificationTracker::ReplacePhi'
3366 Matched.clear();
3367 continue;
3368 }
3369 // If we are not allowed to create new nodes then bail out.
3370 if (!AllowNewPhiNodes)
3371 return false;
3372 // Just remove all seen values in matcher. They will not match anything.
3373 PhiNotMatchedCount += WillNotMatch.size();
3374 for (auto *P : WillNotMatch)
3375 PhiNodesToMatch.erase(P);
3376 }
3377 return true;
3378 }
3379 /// Fill the placeholders with values from predecessors and simplify them.
3380 void FillPlaceholders(FoldAddrToValueMapping &Map,
3381 SmallVectorImpl<Value *> &TraverseOrder,
3382 SimplificationTracker &ST) {
3383 while (!TraverseOrder.empty()) {
3384 Value *Current = TraverseOrder.pop_back_val();
3385 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3385, __PRETTY_FUNCTION__))
;
3386 Value *V = Map[Current];
3387
3388 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3389 // CurrentValue also must be Select.
3390 auto *CurrentSelect = cast<SelectInst>(Current);
3391 auto *TrueValue = CurrentSelect->getTrueValue();
3392 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3392, __PRETTY_FUNCTION__))
;
3393 Select->setTrueValue(ST.Get(Map[TrueValue]));
3394 auto *FalseValue = CurrentSelect->getFalseValue();
3395 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3395, __PRETTY_FUNCTION__))
;
3396 Select->setFalseValue(ST.Get(Map[FalseValue]));
3397 } else {
3398 // Must be a Phi node then.
3399 auto *PHI = cast<PHINode>(V);
3400 // Fill the Phi node with values from predecessors.
3401 for (auto B : predecessors(PHI->getParent())) {
3402 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
3403 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3403, __PRETTY_FUNCTION__))
;
3404 PHI->addIncoming(ST.Get(Map[PV]), B);
3405 }
3406 }
3407 Map[Current] = ST.Simplify(V);
3408 }
3409 }
3410
3411 /// Starting from original value recursively iterates over def-use chain up to
3412 /// known ending values represented in a map. For each traversed phi/select
3413 /// inserts a placeholder Phi or Select.
3414 /// Reports all new created Phi/Select nodes by adding them to set.
3415 /// Also reports and order in what values have been traversed.
3416 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3417 SmallVectorImpl<Value *> &TraverseOrder,
3418 SimplificationTracker &ST) {
3419 SmallVector<Value *, 32> Worklist;
3420 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3421, __PRETTY_FUNCTION__))
3421 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3421, __PRETTY_FUNCTION__))
;
3422 auto *Dummy = UndefValue::get(CommonType);
3423 Worklist.push_back(Original);
3424 while (!Worklist.empty()) {
3425 Value *Current = Worklist.pop_back_val();
3426 // if it is already visited or it is an ending value then skip it.
3427 if (Map.find(Current) != Map.end())
3428 continue;
3429 TraverseOrder.push_back(Current);
3430
3431 // CurrentValue must be a Phi node or select. All others must be covered
3432 // by anchors.
3433 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3434 // Is it OK to get metadata from OrigSelect?!
3435 // Create a Select placeholder with dummy value.
3436 SelectInst *Select = SelectInst::Create(
3437 CurrentSelect->getCondition(), Dummy, Dummy,
3438 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3439 Map[Current] = Select;
3440 ST.insertNewSelect(Select);
3441 // We are interested in True and False values.
3442 Worklist.push_back(CurrentSelect->getTrueValue());
3443 Worklist.push_back(CurrentSelect->getFalseValue());
3444 } else {
3445 // It must be a Phi node then.
3446 PHINode *CurrentPhi = cast<PHINode>(Current);
3447 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3448 PHINode *PHI =
3449 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3450 Map[Current] = PHI;
3451 ST.insertNewPhi(PHI);
3452 for (Value *P : CurrentPhi->incoming_values())
3453 Worklist.push_back(P);
3454 }
3455 }
3456 }
3457
3458 bool addrModeCombiningAllowed() {
3459 if (DisableComplexAddrModes)
3460 return false;
3461 switch (DifferentField) {
3462 default:
3463 return false;
3464 case ExtAddrMode::BaseRegField:
3465 return AddrSinkCombineBaseReg;
3466 case ExtAddrMode::BaseGVField:
3467 return AddrSinkCombineBaseGV;
3468 case ExtAddrMode::BaseOffsField:
3469 return AddrSinkCombineBaseOffs;
3470 case ExtAddrMode::ScaledRegField:
3471 return AddrSinkCombineScaledReg;
3472 }
3473 }
3474};
3475} // end anonymous namespace
3476
3477/// Try adding ScaleReg*Scale to the current addressing mode.
3478/// Return true and update AddrMode if this addr mode is legal for the target,
3479/// false if not.
3480bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3481 unsigned Depth) {
3482 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3483 // mode. Just process that directly.
3484 if (Scale == 1)
3485 return matchAddr(ScaleReg, Depth);
3486
3487 // If the scale is 0, it takes nothing to add this.
3488 if (Scale == 0)
3489 return true;
3490
3491 // If we already have a scale of this value, we can add to it, otherwise, we
3492 // need an available scale field.
3493 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3494 return false;
3495
3496 ExtAddrMode TestAddrMode = AddrMode;
3497
3498 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3499 // [A+B + A*7] -> [B+A*8].
3500 TestAddrMode.Scale += Scale;
3501 TestAddrMode.ScaledReg = ScaleReg;
3502
3503 // If the new address isn't legal, bail out.
3504 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3505 return false;
3506
3507 // It was legal, so commit it.
3508 AddrMode = TestAddrMode;
3509
3510 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3511 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3512 // X*Scale + C*Scale to addr mode.
3513 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3514 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3515 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3516 TestAddrMode.InBounds = false;
3517 TestAddrMode.ScaledReg = AddLHS;
3518 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3519
3520 // If this addressing mode is legal, commit it and remember that we folded
3521 // this instruction.
3522 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3523 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3524 AddrMode = TestAddrMode;
3525 return true;
3526 }
3527 }
3528
3529 // Otherwise, not (x+c)*scale, just return what we have.
3530 return true;
3531}
3532
3533/// This is a little filter, which returns true if an addressing computation
3534/// involving I might be folded into a load/store accessing it.
3535/// This doesn't need to be perfect, but needs to accept at least
3536/// the set of instructions that MatchOperationAddr can.
3537static bool MightBeFoldableInst(Instruction *I) {
3538 switch (I->getOpcode()) {
3539 case Instruction::BitCast:
3540 case Instruction::AddrSpaceCast:
3541 // Don't touch identity bitcasts.
3542 if (I->getType() == I->getOperand(0)->getType())
3543 return false;
3544 return I->getType()->isIntOrPtrTy();
3545 case Instruction::PtrToInt:
3546 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3547 return true;
3548 case Instruction::IntToPtr:
3549 // We know the input is intptr_t, so this is foldable.
3550 return true;
3551 case Instruction::Add:
3552 return true;
3553 case Instruction::Mul:
3554 case Instruction::Shl:
3555 // Can only handle X*C and X << C.
3556 return isa<ConstantInt>(I->getOperand(1));
3557 case Instruction::GetElementPtr:
3558 return true;
3559 default:
3560 return false;
3561 }
3562}
3563
3564/// Check whether or not \p Val is a legal instruction for \p TLI.
3565/// \note \p Val is assumed to be the product of some type promotion.
3566/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3567/// to be legal, as the non-promoted value would have had the same state.
3568static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3569 const DataLayout &DL, Value *Val) {
3570 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3571 if (!PromotedInst)
3572 return false;
3573 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3574 // If the ISDOpcode is undefined, it was undefined before the promotion.
3575 if (!ISDOpcode)
3576 return true;
3577 // Otherwise, check if the promoted instruction is legal or not.
3578 return TLI.isOperationLegalOrCustom(
3579 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3580}
3581
3582namespace {
3583
3584/// Hepler class to perform type promotion.
3585class TypePromotionHelper {
3586 /// Utility function to add a promoted instruction \p ExtOpnd to
3587 /// \p PromotedInsts and record the type of extension we have seen.
3588 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3589 Instruction *ExtOpnd,
3590 bool IsSExt) {
3591 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3592 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3593 if (It != PromotedInsts.end()) {
3594 // If the new extension is same as original, the information in
3595 // PromotedInsts[ExtOpnd] is still correct.
3596 if (It->second.getInt() == ExtTy)
3597 return;
3598
3599 // Now the new extension is different from old extension, we make
3600 // the type information invalid by setting extension type to
3601 // BothExtension.
3602 ExtTy = BothExtension;
3603 }
3604 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3605 }
3606
3607 /// Utility function to query the original type of instruction \p Opnd
3608 /// with a matched extension type. If the extension doesn't match, we
3609 /// cannot use the information we had on the original type.
3610 /// BothExtension doesn't match any extension type.
3611 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3612 Instruction *Opnd,
3613 bool IsSExt) {
3614 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3615 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3616 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3617 return It->second.getPointer();
3618 return nullptr;
3619 }
3620
3621 /// Utility function to check whether or not a sign or zero extension
3622 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3623 /// either using the operands of \p Inst or promoting \p Inst.
3624 /// The type of the extension is defined by \p IsSExt.
3625 /// In other words, check if:
3626 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3627 /// #1 Promotion applies:
3628 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3629 /// #2 Operand reuses:
3630 /// ext opnd1 to ConsideredExtType.
3631 /// \p PromotedInsts maps the instructions to their type before promotion.
3632 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3633 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3634
3635 /// Utility function to determine if \p OpIdx should be promoted when
3636 /// promoting \p Inst.
3637 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3638 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3639 }
3640
3641 /// Utility function to promote the operand of \p Ext when this
3642 /// operand is a promotable trunc or sext or zext.
3643 /// \p PromotedInsts maps the instructions to their type before promotion.
3644 /// \p CreatedInstsCost[out] contains the cost of all instructions
3645 /// created to promote the operand of Ext.
3646 /// Newly added extensions are inserted in \p Exts.
3647 /// Newly added truncates are inserted in \p Truncs.
3648 /// Should never be called directly.
3649 /// \return The promoted value which is used instead of Ext.
3650 static Value *promoteOperandForTruncAndAnyExt(
3651 Instruction *Ext, TypePromotionTransaction &TPT,
3652 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3653 SmallVectorImpl<Instruction *> *Exts,
3654 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3655
3656 /// Utility function to promote the operand of \p Ext when this
3657 /// operand is promotable and is not a supported trunc or sext.
3658 /// \p PromotedInsts maps the instructions to their type before promotion.
3659 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3660 /// created to promote the operand of Ext.
3661 /// Newly added extensions are inserted in \p Exts.
3662 /// Newly added truncates are inserted in \p Truncs.
3663 /// Should never be called directly.
3664 /// \return The promoted value which is used instead of Ext.
3665 static Value *promoteOperandForOther(Instruction *Ext,
3666 TypePromotionTransaction &TPT,
3667 InstrToOrigTy &PromotedInsts,
3668 unsigned &CreatedInstsCost,
3669 SmallVectorImpl<Instruction *> *Exts,
3670 SmallVectorImpl<Instruction *> *Truncs,
3671 const TargetLowering &TLI, bool IsSExt);
3672
3673 /// \see promoteOperandForOther.
3674 static Value *signExtendOperandForOther(
3675 Instruction *Ext, TypePromotionTransaction &TPT,
3676 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3677 SmallVectorImpl<Instruction *> *Exts,
3678 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3679 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3680 Exts, Truncs, TLI, true);
3681 }
3682
3683 /// \see promoteOperandForOther.
3684 static Value *zeroExtendOperandForOther(
3685 Instruction *Ext, TypePromotionTransaction &TPT,
3686 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3687 SmallVectorImpl<Instruction *> *Exts,
3688 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3689 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3690 Exts, Truncs, TLI, false);
3691 }
3692
3693public:
3694 /// Type for the utility function that promotes the operand of Ext.
3695 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3696 InstrToOrigTy &PromotedInsts,
3697 unsigned &CreatedInstsCost,
3698 SmallVectorImpl<Instruction *> *Exts,
3699 SmallVectorImpl<Instruction *> *Truncs,
3700 const TargetLowering &TLI);
3701
3702 /// Given a sign/zero extend instruction \p Ext, return the appropriate
3703 /// action to promote the operand of \p Ext instead of using Ext.
3704 /// \return NULL if no promotable action is possible with the current
3705 /// sign extension.
3706 /// \p InsertedInsts keeps track of all the instructions inserted by the
3707 /// other CodeGenPrepare optimizations. This information is important
3708 /// because we do not want to promote these instructions as CodeGenPrepare
3709 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3710 /// \p PromotedInsts maps the instructions to their type before promotion.
3711 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3712 const TargetLowering &TLI,
3713 const InstrToOrigTy &PromotedInsts);
3714};
3715
3716} // end anonymous namespace
3717
3718bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3719 Type *ConsideredExtType,
3720 const InstrToOrigTy &PromotedInsts,
3721 bool IsSExt) {
3722 // The promotion helper does not know how to deal with vector types yet.
3723 // To be able to fix that, we would need to fix the places where we
3724 // statically extend, e.g., constants and such.
3725 if (Inst->getType()->isVectorTy())
3726 return false;
3727
3728 // We can always get through zext.
3729 if (isa<ZExtInst>(Inst))
3730 return true;
3731
3732 // sext(sext) is ok too.
3733 if (IsSExt && isa<SExtInst>(Inst))
3734 return true;
3735
3736 // We can get through binary operator, if it is legal. In other words, the
3737 // binary operator must have a nuw or nsw flag.
3738 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3739 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3740 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3741 (IsSExt && BinOp->hasNoSignedWrap())))
3742 return true;
3743
3744 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3745 if ((Inst->getOpcode() == Instruction::And ||
3746 Inst->getOpcode() == Instruction::Or))
3747 return true;
3748
3749 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3750 if (Inst->getOpcode() == Instruction::Xor) {
3751 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3752 // Make sure it is not a NOT.
3753 if (Cst && !Cst->getValue().isAllOnesValue())
3754 return true;
3755 }
3756
3757 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3758 // It may change a poisoned value into a regular value, like
3759 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3760 // poisoned value regular value
3761 // It should be OK since undef covers valid value.
3762 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3763 return true;
3764
3765 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3766 // It may change a poisoned value into a regular value, like
3767 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3768 // poisoned value regular value
3769 // It should be OK since undef covers valid value.
3770 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3771 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
3772 if (ExtInst->hasOneUse()) {
3773 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
3774 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3775 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3776 if (Cst &&
3777 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3778 return true;
3779 }
3780 }
3781 }
3782
3783 // Check if we can do the following simplification.
3784 // ext(trunc(opnd)) --> ext(opnd)
3785 if (!isa<TruncInst>(Inst))
3786 return false;
3787
3788 Value *OpndVal = Inst->getOperand(0);
3789 // Check if we can use this operand in the extension.
3790 // If the type is larger than the result type of the extension, we cannot.
3791 if (!OpndVal->getType()->isIntegerTy() ||
3792 OpndVal->getType()->getIntegerBitWidth() >
3793 ConsideredExtType->getIntegerBitWidth())
3794 return false;
3795
3796 // If the operand of the truncate is not an instruction, we will not have
3797 // any information on the dropped bits.
3798 // (Actually we could for constant but it is not worth the extra logic).
3799 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3800 if (!Opnd)
3801 return false;
3802
3803 // Check if the source of the type is narrow enough.
3804 // I.e., check that trunc just drops extended bits of the same kind of
3805 // the extension.
3806 // #1 get the type of the operand and check the kind of the extended bits.
3807 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3808 if (OpndType)
3809 ;
3810 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3811 OpndType = Opnd->getOperand(0)->getType();
3812 else
3813 return false;
3814
3815 // #2 check that the truncate just drops extended bits.
3816 return Inst->getType()->getIntegerBitWidth() >=
3817 OpndType->getIntegerBitWidth();
3818}
3819
3820TypePromotionHelper::Action TypePromotionHelper::getAction(
3821 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3822 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3823 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3824, __PRETTY_FUNCTION__))
3824 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 3824, __PRETTY_FUNCTION__))
;
3825 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3826 Type *ExtTy = Ext->getType();
3827 bool IsSExt = isa<SExtInst>(Ext);
3828 // If the operand of the extension is not an instruction, we cannot
3829 // get through.
3830 // If it, check we can get through.
3831 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3832 return nullptr;
3833
3834 // Do not promote if the operand has been added by codegenprepare.
3835 // Otherwise, it means we are undoing an optimization that is likely to be
3836 // redone, thus causing potential infinite loop.
3837 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3838 return nullptr;
3839
3840 // SExt or Trunc instructions.
3841 // Return the related handler.
3842 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3843 isa<ZExtInst>(ExtOpnd))
3844 return promoteOperandForTruncAndAnyExt;
3845
3846 // Regular instruction.
3847 // Abort early if we will have to insert non-free instructions.
3848 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3849 return nullptr;
3850 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3851}
3852
3853Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3854 Instruction *SExt, TypePromotionTransaction &TPT,
3855 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3856 SmallVectorImpl<Instruction *> *Exts,
3857 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3858 // By construction, the operand of SExt is an instruction. Otherwise we cannot
3859 // get through it and this method should not be called.
3860 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3861 Value *ExtVal = SExt;
3862 bool HasMergedNonFreeExt = false;
3863 if (isa<ZExtInst>(SExtOpnd)) {
3864 // Replace s|zext(zext(opnd))
3865 // => zext(opnd).
3866 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3867 Value *ZExt =
3868 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3869 TPT.replaceAllUsesWith(SExt, ZExt);
3870 TPT.eraseInstruction(SExt);
3871 ExtVal = ZExt;
3872 } else {
3873 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3874 // => z|sext(opnd).
3875 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3876 }
3877 CreatedInstsCost = 0;
3878
3879 // Remove dead code.
3880 if (SExtOpnd->use_empty())
3881 TPT.eraseInstruction(SExtOpnd);
3882
3883 // Check if the extension is still needed.
3884 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3885 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3886 if (ExtInst) {
3887 if (Exts)
3888 Exts->push_back(ExtInst);
3889 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3890 }
3891 return ExtVal;
3892 }
3893
3894 // At this point we have: ext ty opnd to ty.
3895 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3896 Value *NextVal = ExtInst->getOperand(0);
3897 TPT.eraseInstruction(ExtInst, NextVal);
3898 return NextVal;
3899}
3900
3901Value *TypePromotionHelper::promoteOperandForOther(
3902 Instruction *Ext, TypePromotionTransaction &TPT,
3903 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3904 SmallVectorImpl<Instruction *> *Exts,
3905 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3906 bool IsSExt) {
3907 // By construction, the operand of Ext is an instruction. Otherwise we cannot
3908 // get through it and this method should not be called.
3909 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3910 CreatedInstsCost = 0;
3911 if (!ExtOpnd->hasOneUse()) {
3912 // ExtOpnd will be promoted.
3913 // All its uses, but Ext, will need to use a truncated value of the
3914 // promoted version.
3915 // Create the truncate now.
3916 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3917 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3918 // Insert it just after the definition.
3919 ITrunc->moveAfter(ExtOpnd);
3920 if (Truncs)
3921 Truncs->push_back(ITrunc);
3922 }
3923
3924 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3925 // Restore the operand of Ext (which has been replaced by the previous call
3926 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3927 TPT.setOperand(Ext, 0, ExtOpnd);
3928 }
3929
3930 // Get through the Instruction:
3931 // 1. Update its type.
3932 // 2. Replace the uses of Ext by Inst.
3933 // 3. Extend each operand that needs to be extended.
3934
3935 // Remember the original type of the instruction before promotion.
3936 // This is useful to know that the high bits are sign extended bits.
3937 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
3938 // Step #1.
3939 TPT.mutateType(ExtOpnd, Ext->getType());
3940 // Step #2.
3941 TPT.replaceAllUsesWith(Ext, ExtOpnd);
3942 // Step #3.
3943 Instruction *ExtForOpnd = Ext;
3944
3945 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
3946 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3947 ++OpIdx) {
3948 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
3949 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3950 !shouldExtOperand(ExtOpnd, OpIdx)) {
3951 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
3952 continue;
3953 }
3954 // Check if we can statically extend the operand.
3955 Value *Opnd = ExtOpnd->getOperand(OpIdx);
3956 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3957 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3958 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3959 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3960 : Cst->getValue().zext(BitWidth);
3961 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3962 continue;
3963 }
3964 // UndefValue are typed, so we have to statically sign extend them.
3965 if (isa<UndefValue>(Opnd)) {
3966 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3967 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3968 continue;
3969 }
3970
3971 // Otherwise we have to explicitly sign extend the operand.
3972 // Check if Ext was reused to extend an operand.
3973 if (!ExtForOpnd) {
3974 // If yes, create a new one.
3975 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
3976 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3977 : TPT.createZExt(Ext, Opnd, Ext->getType());
3978 if (!isa<Instruction>(ValForExtOpnd)) {
3979 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3980 continue;
3981 }
3982 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3983 }
3984 if (Exts)
3985 Exts->push_back(ExtForOpnd);
3986 TPT.setOperand(ExtForOpnd, 0, Opnd);
3987
3988 // Move the sign extension before the insertion point.
3989 TPT.moveBefore(ExtForOpnd, ExtOpnd);
3990 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3991 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3992 // If more sext are required, new instructions will have to be created.
3993 ExtForOpnd = nullptr;
3994 }
3995 if (ExtForOpnd == Ext) {
3996 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
3997 TPT.eraseInstruction(Ext);
3998 }
3999 return ExtOpnd;
4000}
4001
4002/// Check whether or not promoting an instruction to a wider type is profitable.
4003/// \p NewCost gives the cost of extension instructions created by the
4004/// promotion.
4005/// \p OldCost gives the cost of extension instructions before the promotion
4006/// plus the number of instructions that have been
4007/// matched in the addressing mode the promotion.
4008/// \p PromotedOperand is the value that has been promoted.
4009/// \return True if the promotion is profitable, false otherwise.
4010bool AddressingModeMatcher::isPromotionProfitable(
4011 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4012 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4013 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4014 // The cost of the new extensions is greater than the cost of the
4015 // old extension plus what we folded.
4016 // This is not profitable.
4017 if (NewCost > OldCost)
4018 return false;
4019 if (NewCost < OldCost)
4020 return true;
4021 // The promotion is neutral but it may help folding the sign extension in
4022 // loads for instance.
4023 // Check that we did not create an illegal instruction.
4024 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4025}
4026
4027/// Given an instruction or constant expr, see if we can fold the operation
4028/// into the addressing mode. If so, update the addressing mode and return
4029/// true, otherwise return false without modifying AddrMode.
4030/// If \p MovedAway is not NULL, it contains the information of whether or
4031/// not AddrInst has to be folded into the addressing mode on success.
4032/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4033/// because it has been moved away.
4034/// Thus AddrInst must not be added in the matched instructions.
4035/// This state can happen when AddrInst is a sext, since it may be moved away.
4036/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4037/// not be referenced anymore.
4038bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4039 unsigned Depth,
4040 bool *MovedAway) {
4041 // Avoid exponential behavior on extremely deep expression trees.
4042 if (Depth >= 5) return false;
4043
4044 // By default, all matched instructions stay in place.
4045 if (MovedAway)
4046 *MovedAway = false;
4047
4048 switch (Opcode) {
4049 case Instruction::PtrToInt:
4050 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4051 return matchAddr(AddrInst->getOperand(0), Depth);
4052 case Instruction::IntToPtr: {
4053 auto AS = AddrInst->getType()->getPointerAddressSpace();
4054 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4055 // This inttoptr is a no-op if the integer type is pointer sized.
4056 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4057 return matchAddr(AddrInst->getOperand(0), Depth);
4058 return false;
4059 }
4060 case Instruction::BitCast:
4061 // BitCast is always a noop, and we can handle it as long as it is
4062 // int->int or pointer->pointer (we don't want int<->fp or something).
4063 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4064 // Don't touch identity bitcasts. These were probably put here by LSR,
4065 // and we don't want to mess around with them. Assume it knows what it
4066 // is doing.
4067 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4068 return matchAddr(AddrInst->getOperand(0), Depth);
4069 return false;
4070 case Instruction::AddrSpaceCast: {
4071 unsigned SrcAS
4072 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4073 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4074 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
4075 return matchAddr(AddrInst->getOperand(0), Depth);
4076 return false;
4077 }
4078 case Instruction::Add: {
4079 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4080 ExtAddrMode BackupAddrMode = AddrMode;
4081 unsigned OldSize = AddrModeInsts.size();
4082 // Start a transaction at this point.
4083 // The LHS may match but not the RHS.
4084 // Therefore, we need a higher level restoration point to undo partially
4085 // matched operation.
4086 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4087 TPT.getRestorationPoint();
4088
4089 AddrMode.InBounds = false;
4090 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4091 matchAddr(AddrInst->getOperand(0), Depth+1))
4092 return true;
4093
4094 // Restore the old addr mode info.
4095 AddrMode = BackupAddrMode;
4096 AddrModeInsts.resize(OldSize);
4097 TPT.rollback(LastKnownGood);
4098
4099 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4100 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4101 matchAddr(AddrInst->getOperand(1), Depth+1))
4102 return true;
4103
4104 // Otherwise we definitely can't merge the ADD in.
4105 AddrMode = BackupAddrMode;
4106 AddrModeInsts.resize(OldSize);
4107 TPT.rollback(LastKnownGood);
4108 break;
4109 }
4110 //case Instruction::Or:
4111 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4112 //break;
4113 case Instruction::Mul:
4114 case Instruction::Shl: {
4115 // Can only handle X*C and X << C.
4116 AddrMode.InBounds = false;
4117 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4118 if (!RHS || RHS->getBitWidth() > 64)
4119 return false;
4120 int64_t Scale = RHS->getSExtValue();
4121 if (Opcode == Instruction::Shl)
4122 Scale = 1LL << Scale;
4123
4124 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4125 }
4126 case Instruction::GetElementPtr: {
4127 // Scan the GEP. We check it if it contains constant offsets and at most
4128 // one variable offset.
4129 int VariableOperand = -1;
4130 unsigned VariableScale = 0;
4131
4132 int64_t ConstantOffset = 0;
4133 gep_type_iterator GTI = gep_type_begin(AddrInst);
4134 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4135 if (StructType *STy = GTI.getStructTypeOrNull()) {
4136 const StructLayout *SL = DL.getStructLayout(STy);
4137 unsigned Idx =
4138 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4139 ConstantOffset += SL->getElementOffset(Idx);
4140 } else {
4141 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
4142 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4143 const APInt &CVal = CI->getValue();
4144 if (CVal.getMinSignedBits() <= 64) {
4145 ConstantOffset += CVal.getSExtValue() * TypeSize;
4146 continue;
4147 }
4148 }
4149 if (TypeSize) { // Scales of zero don't do anything.
4150 // We only allow one variable index at the moment.
4151 if (VariableOperand != -1)
4152 return false;
4153
4154 // Remember the variable index.
4155 VariableOperand = i;
4156 VariableScale = TypeSize;
4157 }
4158 }
4159 }
4160
4161 // A common case is for the GEP to only do a constant offset. In this case,
4162 // just add it to the disp field and check validity.
4163 if (VariableOperand == -1) {
4164 AddrMode.BaseOffs += ConstantOffset;
4165 if (ConstantOffset == 0 ||
4166 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4167 // Check to see if we can fold the base pointer in too.
4168 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4169 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4170 AddrMode.InBounds = false;
4171 return true;
4172 }
4173 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4174 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4175 ConstantOffset > 0) {
4176 // Record GEPs with non-zero offsets as candidates for splitting in the
4177 // event that the offset cannot fit into the r+i addressing mode.
4178 // Simple and common case that only one GEP is used in calculating the
4179 // address for the memory access.
4180 Value *Base = AddrInst->getOperand(0);
4181 auto *BaseI = dyn_cast<Instruction>(Base);
4182 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4183 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4184 (BaseI && !isa<CastInst>(BaseI) &&
4185 !isa<GetElementPtrInst>(BaseI))) {
4186 // Make sure the parent block allows inserting non-PHI instructions
4187 // before the terminator.
4188 BasicBlock *Parent =
4189 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4190 if (!Parent->getTerminator()->isEHPad())
4191 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4192 }
4193 }
4194 AddrMode.BaseOffs -= ConstantOffset;
4195 return false;
4196 }
4197
4198 // Save the valid addressing mode in case we can't match.
4199 ExtAddrMode BackupAddrMode = AddrMode;
4200 unsigned OldSize = AddrModeInsts.size();
4201
4202 // See if the scale and offset amount is valid for this target.
4203 AddrMode.BaseOffs += ConstantOffset;
4204 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4205 AddrMode.InBounds = false;
4206
4207 // Match the base operand of the GEP.
4208 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4209 // If it couldn't be matched, just stuff the value in a register.
4210 if (AddrMode.HasBaseReg) {
4211 AddrMode = BackupAddrMode;
4212 AddrModeInsts.resize(OldSize);
4213 return false;
4214 }
4215 AddrMode.HasBaseReg = true;
4216 AddrMode.BaseReg = AddrInst->getOperand(0);
4217 }
4218
4219 // Match the remaining variable portion of the GEP.
4220 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4221 Depth)) {
4222 // If it couldn't be matched, try stuffing the base into a register
4223 // instead of matching it, and retrying the match of the scale.
4224 AddrMode = BackupAddrMode;
4225 AddrModeInsts.resize(OldSize);
4226 if (AddrMode.HasBaseReg)
4227 return false;
4228 AddrMode.HasBaseReg = true;
4229 AddrMode.BaseReg = AddrInst->getOperand(0);
4230 AddrMode.BaseOffs += ConstantOffset;
4231 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4232 VariableScale, Depth)) {
4233 // If even that didn't work, bail.
4234 AddrMode = BackupAddrMode;
4235 AddrModeInsts.resize(OldSize);
4236 return false;
4237 }
4238 }
4239
4240 return true;
4241 }
4242 case Instruction::SExt:
4243 case Instruction::ZExt: {
4244 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4245 if (!Ext)
4246 return false;
4247
4248 // Try to move this ext out of the way of the addressing mode.
4249 // Ask for a method for doing so.
4250 TypePromotionHelper::Action TPH =
4251 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4252 if (!TPH)
4253 return false;
4254
4255 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4256 TPT.getRestorationPoint();
4257 unsigned CreatedInstsCost = 0;
4258 unsigned ExtCost = !TLI.isExtFree(Ext);
4259 Value *PromotedOperand =
4260 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4261 // SExt has been moved away.
4262 // Thus either it will be rematched later in the recursive calls or it is
4263 // gone. Anyway, we must not fold it into the addressing mode at this point.
4264 // E.g.,
4265 // op = add opnd, 1
4266 // idx = ext op
4267 // addr = gep base, idx
4268 // is now:
4269 // promotedOpnd = ext opnd <- no match here
4270 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4271 // addr = gep base, op <- match
4272 if (MovedAway)
4273 *MovedAway = true;
4274
4275 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4276, __PRETTY_FUNCTION__))
4276 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4276, __PRETTY_FUNCTION__))
;
4277
4278 ExtAddrMode BackupAddrMode = AddrMode;
4279 unsigned OldSize = AddrModeInsts.size();
4280
4281 if (!matchAddr(PromotedOperand, Depth) ||
4282 // The total of the new cost is equal to the cost of the created
4283 // instructions.
4284 // The total of the old cost is equal to the cost of the extension plus
4285 // what we have saved in the addressing mode.
4286 !isPromotionProfitable(CreatedInstsCost,
4287 ExtCost + (AddrModeInsts.size() - OldSize),
4288 PromotedOperand)) {
4289 AddrMode = BackupAddrMode;
4290 AddrModeInsts.resize(OldSize);
4291 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4292 TPT.rollback(LastKnownGood);
4293 return false;
4294 }
4295 return true;
4296 }
4297 }
4298 return false;
4299}
4300
4301/// If we can, try to add the value of 'Addr' into the current addressing mode.
4302/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4303/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4304/// for the target.
4305///
4306bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4307 // Start a transaction at this point that we will rollback if the matching
4308 // fails.
4309 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4310 TPT.getRestorationPoint();
4311 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4312 // Fold in immediates if legal for the target.
4313 AddrMode.BaseOffs += CI->getSExtValue();
4314 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4315 return true;
4316 AddrMode.BaseOffs -= CI->getSExtValue();
4317 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4318 // If this is a global variable, try to fold it into the addressing mode.
4319 if (!AddrMode.BaseGV) {
4320 AddrMode.BaseGV = GV;
4321 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4322 return true;
4323 AddrMode.BaseGV = nullptr;
4324 }
4325 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4326 ExtAddrMode BackupAddrMode = AddrMode;
4327 unsigned OldSize = AddrModeInsts.size();
4328
4329 // Check to see if it is possible to fold this operation.
4330 bool MovedAway = false;
4331 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4332 // This instruction may have been moved away. If so, there is nothing
4333 // to check here.
4334 if (MovedAway)
4335 return true;
4336 // Okay, it's possible to fold this. Check to see if it is actually
4337 // *profitable* to do so. We use a simple cost model to avoid increasing
4338 // register pressure too much.
4339 if (I->hasOneUse() ||
4340 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4341 AddrModeInsts.push_back(I);
4342 return true;
4343 }
4344
4345 // It isn't profitable to do this, roll back.
4346 //cerr << "NOT FOLDING: " << *I;
4347 AddrMode = BackupAddrMode;
4348 AddrModeInsts.resize(OldSize);
4349 TPT.rollback(LastKnownGood);
4350 }
4351 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4352 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4353 return true;
4354 TPT.rollback(LastKnownGood);
4355 } else if (isa<ConstantPointerNull>(Addr)) {
4356 // Null pointer gets folded without affecting the addressing mode.
4357 return true;
4358 }
4359
4360 // Worse case, the target should support [reg] addressing modes. :)
4361 if (!AddrMode.HasBaseReg) {
4362 AddrMode.HasBaseReg = true;
4363 AddrMode.BaseReg = Addr;
4364 // Still check for legality in case the target supports [imm] but not [i+r].
4365 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4366 return true;
4367 AddrMode.HasBaseReg = false;
4368 AddrMode.BaseReg = nullptr;
4369 }
4370
4371 // If the base register is already taken, see if we can do [r+r].
4372 if (AddrMode.Scale == 0) {
4373 AddrMode.Scale = 1;
4374 AddrMode.ScaledReg = Addr;
4375 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4376 return true;
4377 AddrMode.Scale = 0;
4378 AddrMode.ScaledReg = nullptr;
4379 }
4380 // Couldn't match.
4381 TPT.rollback(LastKnownGood);
4382 return false;
4383}
4384
4385/// Check to see if all uses of OpVal by the specified inline asm call are due
4386/// to memory operands. If so, return true, otherwise return false.
4387static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4388 const TargetLowering &TLI,
4389 const TargetRegisterInfo &TRI) {
4390 const Function *F = CI->getFunction();
4391 TargetLowering::AsmOperandInfoVector TargetConstraints =
4392 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4393 ImmutableCallSite(CI));
4394
4395 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4396 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4397
4398 // Compute the constraint code and ConstraintType to use.
4399 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4400
4401 // If this asm operand is our Value*, and if it isn't an indirect memory
4402 // operand, we can't fold it!
4403 if (OpInfo.CallOperandVal == OpVal &&
4404 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4405 !OpInfo.isIndirect))
4406 return false;
4407 }
4408
4409 return true;
4410}
4411
4412// Max number of memory uses to look at before aborting the search to conserve
4413// compile time.
4414static constexpr int MaxMemoryUsesToScan = 20;
4415
4416/// Recursively walk all the uses of I until we find a memory use.
4417/// If we find an obviously non-foldable instruction, return true.
4418/// Add the ultimately found memory instructions to MemoryUses.
4419static bool FindAllMemoryUses(
4420 Instruction *I,
4421 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4422 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4423 const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4424 // If we already considered this instruction, we're done.
4425 if (!ConsideredInsts.insert(I).second)
4426 return false;
4427
4428 // If this is an obviously unfoldable instruction, bail out.
4429 if (!MightBeFoldableInst(I))
4430 return true;
4431
4432 const bool OptSize = I->getFunction()->hasOptSize();
4433
4434 // Loop over all the uses, recursively processing them.
4435 for (Use &U : I->uses()) {
4436 // Conservatively return true if we're seeing a large number or a deep chain
4437 // of users. This avoids excessive compilation times in pathological cases.
4438 if (SeenInsts++ >= MaxMemoryUsesToScan)
4439 return true;
4440
4441 Instruction *UserI = cast<Instruction>(U.getUser());
4442 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4443 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4444 continue;
4445 }
4446
4447 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4448 unsigned opNo = U.getOperandNo();
4449 if (opNo != StoreInst::getPointerOperandIndex())
4450 return true; // Storing addr, not into addr.
4451 MemoryUses.push_back(std::make_pair(SI, opNo));
4452 continue;
4453 }
4454
4455 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4456 unsigned opNo = U.getOperandNo();
4457 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4458 return true; // Storing addr, not into addr.
4459 MemoryUses.push_back(std::make_pair(RMW, opNo));
4460 continue;
4461 }
4462
4463 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4464 unsigned opNo = U.getOperandNo();
4465 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4466 return true; // Storing addr, not into addr.
4467 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4468 continue;
4469 }
4470
4471 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4472 // If this is a cold call, we can sink the addressing calculation into
4473 // the cold path. See optimizeCallInst
4474 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4475 continue;
4476
4477 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4478 if (!IA) return true;
4479
4480 // If this is a memory operand, we're cool, otherwise bail out.
4481 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4482 return true;
4483 continue;
4484 }
4485
4486 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4487 SeenInsts))
4488 return true;
4489 }
4490
4491 return false;
4492}
4493
4494/// Return true if Val is already known to be live at the use site that we're
4495/// folding it into. If so, there is no cost to include it in the addressing
4496/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4497/// instruction already.
4498bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4499 Value *KnownLive2) {
4500 // If Val is either of the known-live values, we know it is live!
4501 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4502 return true;
4503
4504 // All values other than instructions and arguments (e.g. constants) are live.
4505 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4506
4507 // If Val is a constant sized alloca in the entry block, it is live, this is
4508 // true because it is just a reference to the stack/frame pointer, which is
4509 // live for the whole function.
4510 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4511 if (AI->isStaticAlloca())
4512 return true;
4513
4514 // Check to see if this value is already used in the memory instruction's
4515 // block. If so, it's already live into the block at the very least, so we
4516 // can reasonably fold it.
4517 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4518}
4519
4520/// It is possible for the addressing mode of the machine to fold the specified
4521/// instruction into a load or store that ultimately uses it.
4522/// However, the specified instruction has multiple uses.
4523/// Given this, it may actually increase register pressure to fold it
4524/// into the load. For example, consider this code:
4525///
4526/// X = ...
4527/// Y = X+1
4528/// use(Y) -> nonload/store
4529/// Z = Y+1
4530/// load Z
4531///
4532/// In this case, Y has multiple uses, and can be folded into the load of Z
4533/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4534/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4535/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4536/// number of computations either.
4537///
4538/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4539/// X was live across 'load Z' for other reasons, we actually *would* want to
4540/// fold the addressing mode in the Z case. This would make Y die earlier.
4541bool AddressingModeMatcher::
4542isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4543 ExtAddrMode &AMAfter) {
4544 if (IgnoreProfitability) return true;
4545
4546 // AMBefore is the addressing mode before this instruction was folded into it,
4547 // and AMAfter is the addressing mode after the instruction was folded. Get
4548 // the set of registers referenced by AMAfter and subtract out those
4549 // referenced by AMBefore: this is the set of values which folding in this
4550 // address extends the lifetime of.
4551 //
4552 // Note that there are only two potential values being referenced here,
4553 // BaseReg and ScaleReg (global addresses are always available, as are any
4554 // folded immediates).
4555 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4556
4557 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4558 // lifetime wasn't extended by adding this instruction.
4559 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4560 BaseReg = nullptr;
4561 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4562 ScaledReg = nullptr;
4563
4564 // If folding this instruction (and it's subexprs) didn't extend any live
4565 // ranges, we're ok with it.
4566 if (!BaseReg && !ScaledReg)
4567 return true;
4568
4569 // If all uses of this instruction can have the address mode sunk into them,
4570 // we can remove the addressing mode and effectively trade one live register
4571 // for another (at worst.) In this context, folding an addressing mode into
4572 // the use is just a particularly nice way of sinking it.
4573 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4574 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4575 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4576 return false; // Has a non-memory, non-foldable use!
4577
4578 // Now that we know that all uses of this instruction are part of a chain of
4579 // computation involving only operations that could theoretically be folded
4580 // into a memory use, loop over each of these memory operation uses and see
4581 // if they could *actually* fold the instruction. The assumption is that
4582 // addressing modes are cheap and that duplicating the computation involved
4583 // many times is worthwhile, even on a fastpath. For sinking candidates
4584 // (i.e. cold call sites), this serves as a way to prevent excessive code
4585 // growth since most architectures have some reasonable small and fast way to
4586 // compute an effective address. (i.e LEA on x86)
4587 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4588 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4589 Instruction *User = MemoryUses[i].first;
4590 unsigned OpNo = MemoryUses[i].second;
4591
4592 // Get the access type of this use. If the use isn't a pointer, we don't
4593 // know what it accesses.
4594 Value *Address = User->getOperand(OpNo);
4595 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4596 if (!AddrTy)
4597 return false;
4598 Type *AddressAccessTy = AddrTy->getElementType();
4599 unsigned AS = AddrTy->getAddressSpace();
4600
4601 // Do a match against the root of this address, ignoring profitability. This
4602 // will tell us if the addressing mode for the memory operation will
4603 // *actually* cover the shared instruction.
4604 ExtAddrMode Result;
4605 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4606 0);
4607 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4608 TPT.getRestorationPoint();
4609 AddressingModeMatcher Matcher(
4610 MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4611 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4612 Matcher.IgnoreProfitability = true;
4613 bool Success = Matcher.matchAddr(Address, 0);
4614 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4614, __PRETTY_FUNCTION__))
;
4615
4616 // The match was to check the profitability, the changes made are not
4617 // part of the original matcher. Therefore, they should be dropped
4618 // otherwise the original matcher will not present the right state.
4619 TPT.rollback(LastKnownGood);
4620
4621 // If the match didn't cover I, then it won't be shared by it.
4622 if (!is_contained(MatchedAddrModeInsts, I))
4623 return false;
4624
4625 MatchedAddrModeInsts.clear();
4626 }
4627
4628 return true;
4629}
4630
4631/// Return true if the specified values are defined in a
4632/// different basic block than BB.
4633static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4634 if (Instruction *I = dyn_cast<Instruction>(V))
4635 return I->getParent() != BB;
4636 return false;
4637}
4638
4639/// Sink addressing mode computation immediate before MemoryInst if doing so
4640/// can be done without increasing register pressure. The need for the
4641/// register pressure constraint means this can end up being an all or nothing
4642/// decision for all uses of the same addressing computation.
4643///
4644/// Load and Store Instructions often have addressing modes that can do
4645/// significant amounts of computation. As such, instruction selection will try
4646/// to get the load or store to do as much computation as possible for the
4647/// program. The problem is that isel can only see within a single block. As
4648/// such, we sink as much legal addressing mode work into the block as possible.
4649///
4650/// This method is used to optimize both load/store and inline asms with memory
4651/// operands. It's also used to sink addressing computations feeding into cold
4652/// call sites into their (cold) basic block.
4653///
4654/// The motivation for handling sinking into cold blocks is that doing so can
4655/// both enable other address mode sinking (by satisfying the register pressure
4656/// constraint above), and reduce register pressure globally (by removing the
4657/// addressing mode computation from the fast path entirely.).
4658bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4659 Type *AccessTy, unsigned AddrSpace) {
4660 Value *Repl = Addr;
4661
4662 // Try to collapse single-value PHI nodes. This is necessary to undo
4663 // unprofitable PRE transformations.
4664 SmallVector<Value*, 8> worklist;
4665 SmallPtrSet<Value*, 16> Visited;
4666 worklist.push_back(Addr);
4667
4668 // Use a worklist to iteratively look through PHI and select nodes, and
4669 // ensure that the addressing mode obtained from the non-PHI/select roots of
4670 // the graph are compatible.
4671 bool PhiOrSelectSeen = false;
4672 SmallVector<Instruction*, 16> AddrModeInsts;
4673 const SimplifyQuery SQ(*DL, TLInfo);
4674 AddressingModeCombiner AddrModes(SQ, Addr);
4675 TypePromotionTransaction TPT(RemovedInsts);
4676 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4677 TPT.getRestorationPoint();
4678 while (!worklist.empty()) {
1
Loop condition is true. Entering loop body
4679 Value *V = worklist.back();
4680 worklist.pop_back();
4681
4682 // We allow traversing cyclic Phi nodes.
4683 // In case of success after this loop we ensure that traversing through
4684 // Phi nodes ends up with all cases to compute address of the form
4685 // BaseGV + Base + Scale * Index + Offset
4686 // where Scale and Offset are constans and BaseGV, Base and Index
4687 // are exactly the same Values in all cases.
4688 // It means that BaseGV, Scale and Offset dominate our memory instruction
4689 // and have the same value as they had in address computation represented
4690 // as Phi. So we can safely sink address computation to memory instruction.
4691 if (!Visited.insert(V).second)
2
Assuming field 'second' is true
3
Taking false branch
4692 continue;
4693
4694 // For a PHI node, push all of its incoming values.
4695 if (PHINode *P
4.1
'P' is null
= dyn_cast<PHINode>(V)) {
4
Assuming 'V' is not a 'PHINode'
5
Taking false branch
4696 for (Value *IncValue : P->incoming_values())
4697 worklist.push_back(IncValue);
4698 PhiOrSelectSeen = true;
4699 continue;
4700 }
4701 // Similar for select.
4702 if (SelectInst *SI
6.1
'SI' is null
= dyn_cast<SelectInst>(V)) {
6
Assuming 'V' is not a 'SelectInst'
7
Taking false branch
4703 worklist.push_back(SI->getFalseValue());
4704 worklist.push_back(SI->getTrueValue());
4705 PhiOrSelectSeen = true;
4706 continue;
4707 }
4708
4709 // For non-PHIs, determine the addressing mode being computed. Note that
4710 // the result may differ depending on what other uses our candidate
4711 // addressing instructions might have.
4712 AddrModeInsts.clear();
4713 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4714 0);
4715 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4716 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4717 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4718
4719 GetElementPtrInst *GEP = LargeOffsetGEP.first;
4720 if (GEP
7.1
'GEP' is null
&& !NewGEPBases.count(GEP)) {
8
Taking false branch
4721 // If splitting the underlying data structure can reduce the offset of a
4722 // GEP, collect the GEP. Skip the GEPs that are the new bases of
4723 // previously split data structures.
4724 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4725 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4726 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4727 }
4728
4729 NewAddrMode.OriginalValue = V;
4730 if (!AddrModes.addNewAddrMode(NewAddrMode))
9
Taking true branch
4731 break;
10
Execution continues on line 4737
4732 }
4733
4734 // Try to combine the AddrModes we've collected. If we couldn't collect any,
4735 // or we have multiple but either couldn't combine them or combining them
4736 // wouldn't do anything useful, bail out now.
4737 if (!AddrModes.combineAddrModes()) {
11
Calling 'AddressingModeCombiner::combineAddrModes'
4738 TPT.rollback(LastKnownGood);
4739 return false;
4740 }
4741 TPT.commit();
4742
4743 // Get the combined AddrMode (or the only AddrMode, if we only had one).
4744 ExtAddrMode AddrMode = AddrModes.getAddrMode();
4745
4746 // If all the instructions matched are already in this BB, don't do anything.
4747 // If we saw a Phi node then it is not local definitely, and if we saw a select
4748 // then we want to push the address calculation past it even if it's already
4749 // in this BB.
4750 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4751 return IsNonLocalValue(V, MemoryInst->getParent());
4752 })) {
4753 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
4754 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4755 return false;
4756 }
4757
4758 // Insert this computation right after this user. Since our caller is
4759 // scanning from the top of the BB to the bottom, reuse of the expr are
4760 // guaranteed to happen later.
4761 IRBuilder<> Builder(MemoryInst);
4762
4763 // Now that we determined the addressing expression we want to use and know
4764 // that we have to sink it into this block. Check to see if we have already
4765 // done this for some other load/store instr in this block. If so, reuse
4766 // the computation. Before attempting reuse, check if the address is valid
4767 // as it may have been erased.
4768
4769 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4770
4771 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4772 if (SunkAddr) {
4773 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4774 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4775 if (SunkAddr->getType() != Addr->getType())
4776 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4777 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
4778 TM && SubtargetInfo->addrSinkUsingGEPs())) {
4779 // By default, we use the GEP-based method when AA is used later. This
4780 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4781 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4782 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4783 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4784 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4785
4786 // First, find the pointer.
4787 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4788 ResultPtr = AddrMode.BaseReg;
4789 AddrMode.BaseReg = nullptr;
4790 }
4791
4792 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4793 // We can't add more than one pointer together, nor can we scale a
4794 // pointer (both of which seem meaningless).
4795 if (ResultPtr || AddrMode.Scale != 1)
4796 return false;
4797
4798 ResultPtr = AddrMode.ScaledReg;
4799 AddrMode.Scale = 0;
4800 }
4801
4802 // It is only safe to sign extend the BaseReg if we know that the math
4803 // required to create it did not overflow before we extend it. Since
4804 // the original IR value was tossed in favor of a constant back when
4805 // the AddrMode was created we need to bail out gracefully if widths
4806 // do not match instead of extending it.
4807 //
4808 // (See below for code to add the scale.)
4809 if (AddrMode.Scale) {
4810 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4811 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4812 cast<IntegerType>(ScaledRegTy)->getBitWidth())
4813 return false;
4814 }
4815
4816 if (AddrMode.BaseGV) {
4817 if (ResultPtr)
4818 return false;
4819
4820 ResultPtr = AddrMode.BaseGV;
4821 }
4822
4823 // If the real base value actually came from an inttoptr, then the matcher
4824 // will look through it and provide only the integer value. In that case,
4825 // use it here.
4826 if (!DL->isNonIntegralPointerType(Addr->getType())) {
4827 if (!ResultPtr && AddrMode.BaseReg) {
4828 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4829 "sunkaddr");
4830 AddrMode.BaseReg = nullptr;
4831 } else if (!ResultPtr && AddrMode.Scale == 1) {
4832 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4833 "sunkaddr");
4834 AddrMode.Scale = 0;
4835 }
4836 }
4837
4838 if (!ResultPtr &&
4839 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4840 SunkAddr = Constant::getNullValue(Addr->getType());
4841 } else if (!ResultPtr) {
4842 return false;
4843 } else {
4844 Type *I8PtrTy =
4845 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4846 Type *I8Ty = Builder.getInt8Ty();
4847
4848 // Start with the base register. Do this first so that subsequent address
4849 // matching finds it last, which will prevent it from trying to match it
4850 // as the scaled value in case it happens to be a mul. That would be
4851 // problematic if we've sunk a different mul for the scale, because then
4852 // we'd end up sinking both muls.
4853 if (AddrMode.BaseReg) {
4854 Value *V = AddrMode.BaseReg;
4855 if (V->getType() != IntPtrTy)
4856 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4857
4858 ResultIndex = V;
4859 }
4860
4861 // Add the scale value.
4862 if (AddrMode.Scale) {
4863 Value *V = AddrMode.ScaledReg;
4864 if (V->getType() == IntPtrTy) {
4865 // done.
4866 } else {
4867 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4869, __PRETTY_FUNCTION__))
4868 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4869, __PRETTY_FUNCTION__))
4869 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 4869, __PRETTY_FUNCTION__))
;
4870 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4871 }
4872
4873 if (AddrMode.Scale != 1)
4874 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4875 "sunkaddr");
4876 if (ResultIndex)
4877 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4878 else
4879 ResultIndex = V;
4880 }
4881
4882 // Add in the Base Offset if present.
4883 if (AddrMode.BaseOffs) {
4884 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4885 if (ResultIndex) {
4886 // We need to add this separately from the scale above to help with
4887 // SDAG consecutive load/store merging.
4888 if (ResultPtr->getType() != I8PtrTy)
4889 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4890 ResultPtr =
4891 AddrMode.InBounds
4892 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4893 "sunkaddr")
4894 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4895 }
4896
4897 ResultIndex = V;
4898 }
4899
4900 if (!ResultIndex) {
4901 SunkAddr = ResultPtr;
4902 } else {
4903 if (ResultPtr->getType() != I8PtrTy)
4904 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4905 SunkAddr =
4906 AddrMode.InBounds
4907 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4908 "sunkaddr")
4909 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4910 }
4911
4912 if (SunkAddr->getType() != Addr->getType())
4913 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4914 }
4915 } else {
4916 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
4917 // non-integral pointers, so in that case bail out now.
4918 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
4919 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
4920 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
4921 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
4922 if (DL->isNonIntegralPointerType(Addr->getType()) ||
4923 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
4924 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
4925 (AddrMode.BaseGV &&
4926 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
4927 return false;
4928
4929 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4930 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4931 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4932 Value *Result = nullptr;
4933
4934 // Start with the base register. Do this first so that subsequent address
4935 // matching finds it last, which will prevent it from trying to match it
4936 // as the scaled value in case it happens to be a mul. That would be
4937 // problematic if we've sunk a different mul for the scale, because then
4938 // we'd end up sinking both muls.
4939 if (AddrMode.BaseReg) {
4940 Value *V = AddrMode.BaseReg;
4941 if (V->getType()->isPointerTy())
4942 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4943 if (V->getType() != IntPtrTy)
4944 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4945 Result = V;
4946 }
4947
4948 // Add the scale value.
4949 if (AddrMode.Scale) {
4950 Value *V = AddrMode.ScaledReg;
4951 if (V->getType() == IntPtrTy) {
4952 // done.
4953 } else if (V->getType()->isPointerTy()) {
4954 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4955 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4956 cast<IntegerType>(V->getType())->getBitWidth()) {
4957 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4958 } else {
4959 // It is only safe to sign extend the BaseReg if we know that the math
4960 // required to create it did not overflow before we extend it. Since
4961 // the original IR value was tossed in favor of a constant back when
4962 // the AddrMode was created we need to bail out gracefully if widths
4963 // do not match instead of extending it.
4964 Instruction *I = dyn_cast_or_null<Instruction>(Result);
4965 if (I && (Result != AddrMode.BaseReg))
4966 I->eraseFromParent();
4967 return false;
4968 }
4969 if (AddrMode.Scale != 1)
4970 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4971 "sunkaddr");
4972 if (Result)
4973 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4974 else
4975 Result = V;
4976 }
4977
4978 // Add in the BaseGV if present.
4979 if (AddrMode.BaseGV) {
4980 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4981 if (Result)
4982 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4983 else
4984 Result = V;
4985 }
4986
4987 // Add in the Base Offset if present.
4988 if (AddrMode.BaseOffs) {
4989 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4990 if (Result)
4991 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4992 else
4993 Result = V;
4994 }
4995
4996 if (!Result)
4997 SunkAddr = Constant::getNullValue(Addr->getType());
4998 else
4999 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5000 }
5001
5002 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5003 // Store the newly computed address into the cache. In the case we reused a
5004 // value, this should be idempotent.
5005 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5006
5007 // If we have no uses, recursively delete the value and all dead instructions
5008 // using it.
5009 if (Repl->use_empty()) {
5010 // This can cause recursive deletion, which can invalidate our iterator.
5011 // Use a WeakTrackingVH to hold onto it in case this happens.
5012 Value *CurValue = &*CurInstIterator;
5013 WeakTrackingVH IterHandle(CurValue);
5014 BasicBlock *BB = CurInstIterator->getParent();
5015
5016 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
5017
5018 if (IterHandle != CurValue) {
5019 // If the iterator instruction was recursively deleted, start over at the
5020 // start of the block.
5021 CurInstIterator = BB->begin();
5022 SunkAddrs.clear();
5023 }
5024 }
5025 ++NumMemoryInsts;
5026 return true;
5027}
5028
5029/// If there are any memory operands, use OptimizeMemoryInst to sink their
5030/// address computing into the block when possible / profitable.
5031bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5032 bool MadeChange = false;
5033
5034 const TargetRegisterInfo *TRI =
5035 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5036 TargetLowering::AsmOperandInfoVector TargetConstraints =
5037 TLI->ParseConstraints(*DL, TRI, CS);
5038 unsigned ArgNo = 0;
5039 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5040 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5041
5042 // Compute the constraint code and ConstraintType to use.
5043 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5044
5045 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5046 OpInfo.isIndirect) {
5047 Value *OpVal = CS->getArgOperand(ArgNo++);
5048 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5049 } else if (OpInfo.Type == InlineAsm::isInput)
5050 ArgNo++;
5051 }
5052
5053 return MadeChange;
5054}
5055
5056/// Check if all the uses of \p Val are equivalent (or free) zero or
5057/// sign extensions.
5058static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5059 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 5059, __PRETTY_FUNCTION__))
;
5060 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5061 bool IsSExt = isa<SExtInst>(FirstUser);
5062 Type *ExtTy = FirstUser->getType();
5063 for (const User *U : Val->users()) {
5064 const Instruction *UI = cast<Instruction>(U);
5065 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5066 return false;
5067 Type *CurTy = UI->getType();
5068 // Same input and output types: Same instruction after CSE.
5069 if (CurTy == ExtTy)
5070 continue;
5071
5072 // If IsSExt is true, we are in this situation:
5073 // a = Val
5074 // b = sext ty1 a to ty2
5075 // c = sext ty1 a to ty3
5076 // Assuming ty2 is shorter than ty3, this could be turned into:
5077 // a = Val
5078 // b = sext ty1 a to ty2
5079 // c = sext ty2 b to ty3
5080 // However, the last sext is not free.
5081 if (IsSExt)
5082 return false;
5083
5084 // This is a ZExt, maybe this is free to extend from one type to another.
5085 // In that case, we would not account for a different use.
5086 Type *NarrowTy;
5087 Type *LargeTy;
5088 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5089 CurTy->getScalarType()->getIntegerBitWidth()) {
5090 NarrowTy = CurTy;
5091 LargeTy = ExtTy;
5092 } else {
5093 NarrowTy = ExtTy;
5094 LargeTy = CurTy;
5095 }
5096
5097 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5098 return false;
5099 }
5100 // All uses are the same or can be derived from one another for free.
5101 return true;
5102}
5103
5104/// Try to speculatively promote extensions in \p Exts and continue
5105/// promoting through newly promoted operands recursively as far as doing so is
5106/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5107/// When some promotion happened, \p TPT contains the proper state to revert
5108/// them.
5109///
5110/// \return true if some promotion happened, false otherwise.
5111bool CodeGenPrepare::tryToPromoteExts(
5112 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5113 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5114 unsigned CreatedInstsCost) {
5115 bool Promoted = false;
5116
5117 // Iterate over all the extensions to try to promote them.
5118 for (auto I : Exts) {
5119 // Early check if we directly have ext(load).
5120 if (isa<LoadInst>(I->getOperand(0))) {
5121 ProfitablyMovedExts.push_back(I);
5122 continue;
5123 }
5124
5125 // Check whether or not we want to do any promotion. The reason we have
5126 // this check inside the for loop is to catch the case where an extension
5127 // is directly fed by a load because in such case the extension can be moved
5128 // up without any promotion on its operands.
5129 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5130 return false;
5131
5132 // Get the action to perform the promotion.
5133 TypePromotionHelper::Action TPH =
5134 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5135 // Check if we can promote.
5136 if (!TPH) {
5137 // Save the current extension as we cannot move up through its operand.
5138 ProfitablyMovedExts.push_back(I);
5139 continue;
5140 }
5141
5142 // Save the current state.
5143 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5144 TPT.getRestorationPoint();
5145 SmallVector<Instruction *, 4> NewExts;
5146 unsigned NewCreatedInstsCost = 0;
5147 unsigned ExtCost = !TLI->isExtFree(I);
5148 // Promote.
5149 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5150 &NewExts, nullptr, *TLI);
5151 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 5152, __PRETTY_FUNCTION__))
5152 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 5152, __PRETTY_FUNCTION__))
;
5153
5154 // We would be able to merge only one extension in a load.
5155 // Therefore, if we have more than 1 new extension we heuristically
5156 // cut this search path, because it means we degrade the code quality.
5157 // With exactly 2, the transformation is neutral, because we will merge
5158 // one extension but leave one. However, we optimistically keep going,
5159 // because the new extension may be removed too.
5160 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5161 // FIXME: It would be possible to propagate a negative value instead of
5162 // conservatively ceiling it to 0.
5163 TotalCreatedInstsCost =
5164 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5165 if (!StressExtLdPromotion &&
5166 (TotalCreatedInstsCost > 1 ||
5167 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5168 // This promotion is not profitable, rollback to the previous state, and
5169 // save the current extension in ProfitablyMovedExts as the latest
5170 // speculative promotion turned out to be unprofitable.
5171 TPT.rollback(LastKnownGood);
5172 ProfitablyMovedExts.push_back(I);
5173 continue;
5174 }
5175 // Continue promoting NewExts as far as doing so is profitable.
5176 SmallVector<Instruction *, 2> NewlyMovedExts;
5177 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5178 bool NewPromoted = false;
5179 for (auto ExtInst : NewlyMovedExts) {
5180 Instruction *MovedExt = cast<Instruction>(ExtInst);
5181 Value *ExtOperand = MovedExt->getOperand(0);
5182 // If we have reached to a load, we need this extra profitability check
5183 // as it could potentially be merged into an ext(load).
5184 if (isa<LoadInst>(ExtOperand) &&
5185 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5186 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5187 continue;
5188
5189 ProfitablyMovedExts.push_back(MovedExt);
5190 NewPromoted = true;
5191 }
5192
5193 // If none of speculative promotions for NewExts is profitable, rollback
5194 // and save the current extension (I) as the last profitable extension.
5195 if (!NewPromoted) {
5196 TPT.rollback(LastKnownGood);
5197 ProfitablyMovedExts.push_back(I);
5198 continue;
5199 }
5200 // The promotion is profitable.
5201 Promoted = true;
5202 }
5203 return Promoted;
5204}
5205
5206/// Merging redundant sexts when one is dominating the other.
5207bool CodeGenPrepare::mergeSExts(Function &F) {
5208 bool Changed = false;
5209 for (auto &Entry : ValToSExtendedUses) {
5210 SExts &Insts = Entry.second;
5211 SExts CurPts;
5212 for (Instruction *Inst : Insts) {
5213 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5214 Inst->getOperand(0) != Entry.first)
5215 continue;
5216 bool inserted = false;
5217 for (auto &Pt : CurPts) {
5218 if (getDT(F).dominates(Inst, Pt)) {
5219 Pt->replaceAllUsesWith(Inst);
5220 RemovedInsts.insert(Pt);
5221 Pt->removeFromParent();
5222 Pt = Inst;
5223 inserted = true;
5224 Changed = true;
5225 break;
5226 }
5227 if (!getDT(F).dominates(Pt, Inst))
5228 // Give up if we need to merge in a common dominator as the
5229 // experiments show it is not profitable.
5230 continue;
5231 Inst->replaceAllUsesWith(Pt);
5232 RemovedInsts.insert(Inst);
5233 Inst->removeFromParent();
5234 inserted = true;
5235 Changed = true;
5236 break;
5237 }
5238 if (!inserted)
5239 CurPts.push_back(Inst);
5240 }
5241 }
5242 return Changed;
5243}
5244
5245// Spliting large data structures so that the GEPs accessing them can have
5246// smaller offsets so that they can be sunk to the same blocks as their users.
5247// For example, a large struct starting from %base is splitted into two parts
5248// where the second part starts from %new_base.
5249//
5250// Before:
5251// BB0:
5252// %base =
5253//
5254// BB1:
5255// %gep0 = gep %base, off0
5256// %gep1 = gep %base, off1
5257// %gep2 = gep %base, off2
5258//
5259// BB2:
5260// %load1 = load %gep0
5261// %load2 = load %gep1
5262// %load3 = load %gep2
5263//
5264// After:
5265// BB0:
5266// %base =
5267// %new_base = gep %base, off0
5268//
5269// BB1:
5270// %new_gep0 = %new_base
5271// %new_gep1 = gep %new_base, off1 - off0
5272// %new_gep2 = gep %new_base, off2 - off0
5273//
5274// BB2:
5275// %load1 = load i32, i32* %new_gep0
5276// %load2 = load i32, i32* %new_gep1
5277// %load3 = load i32, i32* %new_gep2
5278//
5279// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5280// their offsets are smaller enough to fit into the addressing mode.
5281bool CodeGenPrepare::splitLargeGEPOffsets() {
5282 bool Changed = false;
5283 for (auto &Entry : LargeOffsetGEPMap) {
5284 Value *OldBase = Entry.first;
5285 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5286 &LargeOffsetGEPs = Entry.second;
5287 auto compareGEPOffset =
5288 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5289 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5290 if (LHS.first == RHS.first)
5291 return false;
5292 if (LHS.second != RHS.second)
5293 return LHS.second < RHS.second;
5294 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5295 };
5296 // Sorting all the GEPs of the same data structures based on the offsets.
5297 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5298 LargeOffsetGEPs.erase(
5299 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5300 LargeOffsetGEPs.end());
5301 // Skip if all the GEPs have the same offsets.
5302 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5303 continue;
5304 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5305 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5306 Value *NewBaseGEP = nullptr;
5307
5308 auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5309 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5310 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5311 int64_t Offset = LargeOffsetGEP->second;
5312 if (Offset != BaseOffset) {
5313 TargetLowering::AddrMode AddrMode;
5314 AddrMode.BaseOffs = Offset - BaseOffset;
5315 // The result type of the GEP might not be the type of the memory
5316 // access.
5317 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5318 GEP->getResultElementType(),
5319 GEP->getAddressSpace())) {
5320 // We need to create a new base if the offset to the current base is
5321 // too large to fit into the addressing mode. So, a very large struct
5322 // may be splitted into several parts.
5323 BaseGEP = GEP;
5324 BaseOffset = Offset;
5325 NewBaseGEP = nullptr;
5326 }
5327 }
5328
5329 // Generate a new GEP to replace the current one.
5330 LLVMContext &Ctx = GEP->getContext();
5331 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5332 Type *I8PtrTy =
5333 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5334 Type *I8Ty = Type::getInt8Ty(Ctx);
5335
5336 if (!NewBaseGEP) {
5337 // Create a new base if we don't have one yet. Find the insertion
5338 // pointer for the new base first.
5339 BasicBlock::iterator NewBaseInsertPt;
5340 BasicBlock *NewBaseInsertBB;
5341 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5342 // If the base of the struct is an instruction, the new base will be
5343 // inserted close to it.
5344 NewBaseInsertBB = BaseI->getParent();
5345 if (isa<PHINode>(BaseI))
5346 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5347 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5348 NewBaseInsertBB =
5349 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5350 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5351 } else
5352 NewBaseInsertPt = std::next(BaseI->getIterator());
5353 } else {
5354 // If the current base is an argument or global value, the new base
5355 // will be inserted to the entry block.
5356 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5357 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5358 }
5359 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5360 // Create a new base.
5361 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5362 NewBaseGEP = OldBase;
5363 if (NewBaseGEP->getType() != I8PtrTy)
5364 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5365 NewBaseGEP =
5366 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5367 NewGEPBases.insert(NewBaseGEP);
5368 }
5369
5370 IRBuilder<> Builder(GEP);
5371 Value *NewGEP = NewBaseGEP;
5372 if (Offset == BaseOffset) {
5373 if (GEP->getType() != I8PtrTy)
5374 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5375 } else {
5376 // Calculate the new offset for the new GEP.
5377 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5378 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5379
5380 if (GEP->getType() != I8PtrTy)
5381 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5382 }
5383 GEP->replaceAllUsesWith(NewGEP);
5384 LargeOffsetGEPID.erase(GEP);
5385 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5386 GEP->eraseFromParent();
5387 Changed = true;
5388 }
5389 }
5390 return Changed;
5391}
5392
5393/// Return true, if an ext(load) can be formed from an extension in
5394/// \p MovedExts.
5395bool CodeGenPrepare::canFormExtLd(
5396 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5397 Instruction *&Inst, bool HasPromoted) {
5398 for (auto *MovedExtInst : MovedExts) {
5399 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5400 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5401 Inst = MovedExtInst;
5402 break;
5403 }
5404 }
5405 if (!LI)
5406 return false;
5407
5408 // If they're already in the same block, there's nothing to do.
5409 // Make the cheap checks first if we did not promote.
5410 // If we promoted, we need to check if it is indeed profitable.
5411 if (!HasPromoted && LI->getParent() == Inst->getParent())
5412 return false;
5413
5414 return TLI->isExtLoad(LI, Inst, *DL);
5415}
5416
5417/// Move a zext or sext fed by a load into the same basic block as the load,
5418/// unless conditions are unfavorable. This allows SelectionDAG to fold the
5419/// extend into the load.
5420///
5421/// E.g.,
5422/// \code
5423/// %ld = load i32* %addr
5424/// %add = add nuw i32 %ld, 4
5425/// %zext = zext i32 %add to i64
5426// \endcode
5427/// =>
5428/// \code
5429/// %ld = load i32* %addr
5430/// %zext = zext i32 %ld to i64
5431/// %add = add nuw i64 %zext, 4
5432/// \encode
5433/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5434/// allow us to match zext(load i32*) to i64.
5435///
5436/// Also, try to promote the computations used to obtain a sign extended
5437/// value used into memory accesses.
5438/// E.g.,
5439/// \code
5440/// a = add nsw i32 b, 3
5441/// d = sext i32 a to i64
5442/// e = getelementptr ..., i64 d
5443/// \endcode
5444/// =>
5445/// \code
5446/// f = sext i32 b to i64
5447/// a = add nsw i64 f, 3
5448/// e = getelementptr ..., i64 a
5449/// \endcode
5450///
5451/// \p Inst[in/out] the extension may be modified during the process if some
5452/// promotions apply.
5453bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5454 // ExtLoad formation and address type promotion infrastructure requires TLI to
5455 // be effective.
5456 if (!TLI)
5457 return false;
5458
5459 bool AllowPromotionWithoutCommonHeader = false;
5460 /// See if it is an interesting sext operations for the address type
5461 /// promotion before trying to promote it, e.g., the ones with the right
5462 /// type and used in memory accesses.
5463 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5464 *Inst, AllowPromotionWithoutCommonHeader);
5465 TypePromotionTransaction TPT(RemovedInsts);
5466 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5467 TPT.getRestorationPoint();
5468 SmallVector<Instruction *, 1> Exts;
5469 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5470 Exts.push_back(Inst);
5471
5472 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5473
5474 // Look for a load being extended.
5475 LoadInst *LI = nullptr;
5476 Instruction *ExtFedByLoad;
5477
5478 // Try to promote a chain of computation if it allows to form an extended
5479 // load.
5480 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5481 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 5481, __PRETTY_FUNCTION__))
;
5482 TPT.commit();
5483 // Move the extend into the same block as the load
5484 ExtFedByLoad->moveAfter(LI);
5485 // CGP does not check if the zext would be speculatively executed when moved
5486 // to the same basic block as the load. Preserving its original location
5487 // would pessimize the debugging experience, as well as negatively impact
5488 // the quality of sample pgo. We don't want to use "line 0" as that has a
5489 // size cost in the line-table section and logically the zext can be seen as
5490 // part of the load. Therefore we conservatively reuse the same debug
5491 // location for the load and the zext.
5492 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5493 ++NumExtsMoved;
5494 Inst = ExtFedByLoad;
5495 return true;
5496 }
5497
5498 // Continue promoting SExts if known as considerable depending on targets.
5499 if (ATPConsiderable &&
5500 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5501 HasPromoted, TPT, SpeculativelyMovedExts))
5502 return true;
5503
5504 TPT.rollback(LastKnownGood);
5505 return false;
5506}
5507
5508// Perform address type promotion if doing so is profitable.
5509// If AllowPromotionWithoutCommonHeader == false, we should find other sext
5510// instructions that sign extended the same initial value. However, if
5511// AllowPromotionWithoutCommonHeader == true, we expect promoting the
5512// extension is just profitable.
5513bool CodeGenPrepare::performAddressTypePromotion(
5514 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5515 bool HasPromoted, TypePromotionTransaction &TPT,
5516 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5517 bool Promoted = false;
5518 SmallPtrSet<Instruction *, 1> UnhandledExts;
5519 bool AllSeenFirst = true;
5520 for (auto I : SpeculativelyMovedExts) {
5521 Value *HeadOfChain = I->getOperand(0);
5522 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5523 SeenChainsForSExt.find(HeadOfChain);
5524 // If there is an unhandled SExt which has the same header, try to promote
5525 // it as well.
5526 if (AlreadySeen != SeenChainsForSExt.end()) {
5527 if (AlreadySeen->second != nullptr)
5528 UnhandledExts.insert(AlreadySeen->second);
5529 AllSeenFirst = false;
5530 }
5531 }
5532
5533 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5534 SpeculativelyMovedExts.size() == 1)) {
5535 TPT.commit();
5536 if (HasPromoted)
5537 Promoted = true;
5538 for (auto I : SpeculativelyMovedExts) {
5539 Value *HeadOfChain = I->getOperand(0);
5540 SeenChainsForSExt[HeadOfChain] = nullptr;
5541 ValToSExtendedUses[HeadOfChain].push_back(I);
5542 }
5543 // Update Inst as promotion happen.
5544 Inst = SpeculativelyMovedExts.pop_back_val();
5545 } else {
5546 // This is the first chain visited from the header, keep the current chain
5547 // as unhandled. Defer to promote this until we encounter another SExt
5548 // chain derived from the same header.
5549 for (auto I : SpeculativelyMovedExts) {
5550 Value *HeadOfChain = I->getOperand(0);
5551 SeenChainsForSExt[HeadOfChain] = Inst;
5552 }
5553 return false;
5554 }
5555
5556 if (!AllSeenFirst && !UnhandledExts.empty())
5557 for (auto VisitedSExt : UnhandledExts) {
5558 if (RemovedInsts.count(VisitedSExt))
5559 continue;
5560 TypePromotionTransaction TPT(RemovedInsts);
5561 SmallVector<Instruction *, 1> Exts;
5562 SmallVector<Instruction *, 2> Chains;
5563 Exts.push_back(VisitedSExt);
5564 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5565 TPT.commit();
5566 if (HasPromoted)
5567 Promoted = true;
5568 for (auto I : Chains) {
5569 Value *HeadOfChain = I->getOperand(0);
5570 // Mark this as handled.
5571 SeenChainsForSExt[HeadOfChain] = nullptr;
5572 ValToSExtendedUses[HeadOfChain].push_back(I);
5573 }
5574 }
5575 return Promoted;
5576}
5577
5578bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5579 BasicBlock *DefBB = I->getParent();
5580
5581 // If the result of a {s|z}ext and its source are both live out, rewrite all
5582 // other uses of the source with result of extension.
5583 Value *Src = I->getOperand(0);
5584 if (Src->hasOneUse())
5585 return false;
5586
5587 // Only do this xform if truncating is free.
5588 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5589 return false;
5590
5591 // Only safe to perform the optimization if the source is also defined in
5592 // this block.
5593 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5594 return false;
5595
5596 bool DefIsLiveOut = false;
5597 for (User *U : I->users()) {
5598 Instruction *UI = cast<Instruction>(U);
5599
5600 // Figure out which BB this ext is used in.
5601 BasicBlock *UserBB = UI->getParent();
5602 if (UserBB == DefBB) continue;
5603 DefIsLiveOut = true;
5604 break;
5605 }
5606 if (!DefIsLiveOut)
5607 return false;
5608
5609 // Make sure none of the uses are PHI nodes.
5610 for (User *U : Src->users()) {
5611 Instruction *UI = cast<Instruction>(U);
5612 BasicBlock *UserBB = UI->getParent();
5613 if (UserBB == DefBB) continue;
5614 // Be conservative. We don't want this xform to end up introducing
5615 // reloads just before load / store instructions.
5616 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5617 return false;
5618 }
5619
5620 // InsertedTruncs - Only insert one trunc in each block once.
5621 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5622
5623 bool MadeChange = false;
5624 for (Use &U : Src->uses()) {
5625 Instruction *User = cast<Instruction>(U.getUser());
5626
5627 // Figure out which BB this ext is used in.
5628 BasicBlock *UserBB = User->getParent();
5629 if (UserBB == DefBB) continue;
5630
5631 // Both src and def are live in this block. Rewrite the use.
5632 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5633
5634 if (!InsertedTrunc) {
5635 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5636 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/CodeGenPrepare.cpp"
, 5636, __PRETTY_FUNCTION__))
;
5637 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5638 InsertedInsts.insert(InsertedTrunc);
5639 }
5640
5641 // Replace a use of the {s|z}ext source with a use of the result.
5642 U = InsertedTrunc;
5643 ++NumExtUses;
5644 MadeChange = true;
5645 }
5646
5647 return MadeChange;
5648}
5649
5650// Find loads whose uses only use some of the loaded value's bits. Add an "and"
5651// just after the load if the target can fold this into one extload instruction,
5652// with the hope of eliminating some of the other later "and" instructions using
5653// the loaded value. "and"s that are made trivially redundant by the insertion
5654// of the new "and" are removed by this function, while others (e.g. those whose
5655// path from the load goes through a phi) are left for isel to potentially
5656// remove.
5657//
5658// For example:
5659//
5660// b0:
5661// x = load i32
5662// ...
5663// b1:
5664// y = and x, 0xff
5665// z = use y
5666//
5667// becomes:
5668//
5669// b0:
5670// x = load i32
5671// x' = and x, 0xff
5672// ...
5673// b1:
5674// z = use x'
5675//
5676// whereas:
5677//
5678// b0:
5679// x1 = load i32
5680// ...
5681// b1:
5682// x2 = load i32
5683// ...
5684// b2:
5685// x = phi x1, x2
5686// y = and x, 0xff
5687//
5688// becomes (after a call to optimizeLoadExt for each load):
5689//
5690// b0:
5691// x1 = load i32
5692// x1' = and x1, 0xff
5693// ...
5694// b1:
5695// x2 = load i32
5696// x2' = and x2, 0xff
5697// ...
5698// b2:
5699// x = phi x1', x2'
5700// y = and x, 0xff
5701bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5702 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
5703 return false;
5704
5705 // Skip loads we've already transformed.
5706 if (Load->hasOneUse() &&
5707 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5708 return false;
5709
5710 // Look at all uses of Load, looking through phis, to determine how many bits
5711 // of the loaded value are needed.
5712 SmallVector<Instruction *, 8> WorkList;
5713 SmallPtrSet<Instruction *, 16> Visited;
5714 SmallVector<Instruction *, 8> AndsToMaybeRemove;
5715 for (auto *U : Load->users())
5716 WorkList.push_back(cast<Instruction>(U));
5717
5718 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5719 unsigned BitWidth = LoadResultVT.getSizeInBits();
5720 APInt DemandBits(BitWidth, 0);
5721 APInt WidestAndBits(BitWidth, 0);
5722
5723 while (!WorkList.empty()) {
5724 Instruction *I = WorkList.back();
5725 WorkList.pop_back();
5726
5727 // Break use-def graph loops.
5728 if (!Visited.insert(I).second)
5729 continue;
5730
5731 // For a PHI node, push all of its users.
5732 if (auto *Phi = dyn_cast<PHINode>(I)) {
5733 for (auto *U : Phi->users())
5734 WorkList.push_back(cast<Instruction>(U));
5735 continue;
5736 }
5737
5738 switch (I->getOpcode()) {
5739 case Instruction::And: {
5740 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5741 if (!AndC)
5742 return false;
5743 APInt AndBits = AndC->getValue();
5744 DemandBits |= AndBits;
5745 // Keep track of the widest and mask we see.
5746 if (AndBits.ugt(WidestAndBits))
5747 WidestAndBits = AndBits;
5748 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5749 AndsToMaybeRemove.push_back(I);
5750 break;
5751 }
5752
5753 case Instruction::Shl: {
5754 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5755 if (!ShlC)
5756 return false;
5757 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5758 DemandBits.setLowBits(BitWidth - ShiftAmt);
5759 break;
5760 }
5761
5762 case Instruction::Trunc: {
5763 EVT TruncVT = TLI->getValueType(*DL, I->getType());
5764 unsigned TruncBitWidth = TruncVT.getSizeInBits();
5765 DemandBits.setLowBits(TruncBitWidth);
5766 break;
5767 }
5768
5769 default:
5770 return false;
5771 }
5772 }
5773
5774 uint32_t ActiveBits = DemandBits.getActiveBits();
5775 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5776 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5777 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5778 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5779 // followed by an AND.
5780 // TODO: Look into removing this restriction by fixing backends to either
5781 // return false for isLoadExtLegal for i1 or have them select this pattern to
5782 // a single instruction.
5783 //
5784 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5785 // mask, since these are the only ands that will be removed by isel.
5786 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
5787 WidestAndBits != DemandBits)
5788 return false;
5789
5790 LLVMContext &Ctx = Load->getType()->getContext();
5791 Type *TruncTy = Type::getIntNTy(Ctx,