Bug Summary

File:llvm/lib/CodeGen/CodeGenPrepare.cpp
Warning:line 3182, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/CodeGen/Analysis.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/SelectionDAGNodes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/CodeGen/ValueTypes.h"
42#include "llvm/Config/llvm-config.h"
43#include "llvm/IR/Argument.h"
44#include "llvm/IR/Attributes.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/CallSite.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DerivedTypes.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/GlobalVariable.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/InlineAsm.h"
58#include "llvm/IR/InstrTypes.h"
59#include "llvm/IR/Instruction.h"
60#include "llvm/IR/Instructions.h"
61#include "llvm/IR/IntrinsicInst.h"
62#include "llvm/IR/Intrinsics.h"
63#include "llvm/IR/IntrinsicsAArch64.h"
64#include "llvm/IR/IntrinsicsX86.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/MDBuilder.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PatternMatch.h"
70#include "llvm/IR/Statepoint.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
75#include "llvm/IR/ValueHandle.h"
76#include "llvm/IR/ValueMap.h"
77#include "llvm/InitializePasses.h"
78#include "llvm/Pass.h"
79#include "llvm/Support/BlockFrequency.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CommandLine.h"
83#include "llvm/Support/Compiler.h"
84#include "llvm/Support/Debug.h"
85#include "llvm/Support/ErrorHandling.h"
86#include "llvm/Support/MachineValueType.h"
87#include "llvm/Support/MathExtras.h"
88#include "llvm/Support/raw_ostream.h"
89#include "llvm/Target/TargetMachine.h"
90#include "llvm/Target/TargetOptions.h"
91#include "llvm/Transforms/Utils/BasicBlockUtils.h"
92#include "llvm/Transforms/Utils/BypassSlowDivision.h"
93#include "llvm/Transforms/Utils/Local.h"
94#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
95#include "llvm/Transforms/Utils/SizeOpts.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <utility>
103#include <vector>
104
105using namespace llvm;
106using namespace llvm::PatternMatch;
107
108#define DEBUG_TYPE"codegenprepare" "codegenprepare"
109
110STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated"}
;
111STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated"}
;
112STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts"}
;
113STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
114 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
;
115STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
116 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
;
117STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
118 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
;
119STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
120 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
121 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
;
122STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
123 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
124 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
;
125STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads"}
;
126STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized"}
;
127STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
128 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
;
129STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized"}
;
130STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated"}
;
131STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved"}
;
132STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
}
;
133STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
}
;
134
135static cl::opt<bool> DisableBranchOpts(
136 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
137 cl::desc("Disable branch optimizations in CodeGenPrepare"));
138
139static cl::opt<bool>
140 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
141 cl::desc("Disable GC optimizations in CodeGenPrepare"));
142
143static cl::opt<bool> DisableSelectToBranch(
144 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
145 cl::desc("Disable select to branch conversion."));
146
147static cl::opt<bool> AddrSinkUsingGEPs(
148 "addr-sink-using-gep", cl::Hidden, cl::init(true),
149 cl::desc("Address sinking in CGP using GEPs."));
150
151static cl::opt<bool> EnableAndCmpSinking(
152 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
153 cl::desc("Enable sinkinig and/cmp into branches."));
154
155static cl::opt<bool> DisableStoreExtract(
156 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
157 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
158
159static cl::opt<bool> StressStoreExtract(
160 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
161 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
162
163static cl::opt<bool> DisableExtLdPromotion(
164 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
165 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
166 "CodeGenPrepare"));
167
168static cl::opt<bool> StressExtLdPromotion(
169 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
170 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
171 "optimization in CodeGenPrepare"));
172
173static cl::opt<bool> DisablePreheaderProtect(
174 "disable-preheader-prot", cl::Hidden, cl::init(false),
175 cl::desc("Disable protection against removing loop preheaders"));
176
177static cl::opt<bool> ProfileGuidedSectionPrefix(
178 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
179 cl::desc("Use profile info to add section prefix for hot/cold functions"));
180
181static cl::opt<unsigned> FreqRatioToSkipMerge(
182 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
183 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
184 "(frequency of destination block) is greater than this ratio"));
185
186static cl::opt<bool> ForceSplitStore(
187 "force-split-store", cl::Hidden, cl::init(false),
188 cl::desc("Force store splitting no matter what the target query says."));
189
190static cl::opt<bool>
191EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
192 cl::desc("Enable merging of redundant sexts when one is dominating"
193 " the other."), cl::init(true));
194
195static cl::opt<bool> DisableComplexAddrModes(
196 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
197 cl::desc("Disables combining addressing modes with different parts "
198 "in optimizeMemoryInst."));
199
200static cl::opt<bool>
201AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
202 cl::desc("Allow creation of Phis in Address sinking."));
203
204static cl::opt<bool>
205AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
206 cl::desc("Allow creation of selects in Address sinking."));
207
208static cl::opt<bool> AddrSinkCombineBaseReg(
209 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
210 cl::desc("Allow combining of BaseReg field in Address sinking."));
211
212static cl::opt<bool> AddrSinkCombineBaseGV(
213 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
214 cl::desc("Allow combining of BaseGV field in Address sinking."));
215
216static cl::opt<bool> AddrSinkCombineBaseOffs(
217 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
218 cl::desc("Allow combining of BaseOffs field in Address sinking."));
219
220static cl::opt<bool> AddrSinkCombineScaledReg(
221 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
222 cl::desc("Allow combining of ScaledReg field in Address sinking."));
223
224static cl::opt<bool>
225 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
226 cl::init(true),
227 cl::desc("Enable splitting large offset of GEP."));
228
229static cl::opt<bool> EnableICMP_EQToICMP_ST(
230 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
231 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
232
233namespace {
234
235enum ExtType {
236 ZeroExtension, // Zero extension has been seen.
237 SignExtension, // Sign extension has been seen.
238 BothExtension // This extension type is used if we saw sext after
239 // ZeroExtension had been set, or if we saw zext after
240 // SignExtension had been set. It makes the type
241 // information of a promoted instruction invalid.
242};
243
244using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
245using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
246using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
247using SExts = SmallVector<Instruction *, 16>;
248using ValueToSExts = DenseMap<Value *, SExts>;
249
250class TypePromotionTransaction;
251
252 class CodeGenPrepare : public FunctionPass {
253 const TargetMachine *TM = nullptr;
254 const TargetSubtargetInfo *SubtargetInfo;
255 const TargetLowering *TLI = nullptr;
256 const TargetRegisterInfo *TRI;
257 const TargetTransformInfo *TTI = nullptr;
258 const TargetLibraryInfo *TLInfo;
259 const LoopInfo *LI;
260 std::unique_ptr<BlockFrequencyInfo> BFI;
261 std::unique_ptr<BranchProbabilityInfo> BPI;
262 ProfileSummaryInfo *PSI;
263
264 /// As we scan instructions optimizing them, this is the next instruction
265 /// to optimize. Transforms that can invalidate this should update it.
266 BasicBlock::iterator CurInstIterator;
267
268 /// Keeps track of non-local addresses that have been sunk into a block.
269 /// This allows us to avoid inserting duplicate code for blocks with
270 /// multiple load/stores of the same address. The usage of WeakTrackingVH
271 /// enables SunkAddrs to be treated as a cache whose entries can be
272 /// invalidated if a sunken address computation has been erased.
273 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
274
275 /// Keeps track of all instructions inserted for the current function.
276 SetOfInstrs InsertedInsts;
277
278 /// Keeps track of the type of the related instruction before their
279 /// promotion for the current function.
280 InstrToOrigTy PromotedInsts;
281
282 /// Keep track of instructions removed during promotion.
283 SetOfInstrs RemovedInsts;
284
285 /// Keep track of sext chains based on their initial value.
286 DenseMap<Value *, Instruction *> SeenChainsForSExt;
287
288 /// Keep track of GEPs accessing the same data structures such as structs or
289 /// arrays that are candidates to be split later because of their large
290 /// size.
291 MapVector<
292 AssertingVH<Value>,
293 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
294 LargeOffsetGEPMap;
295
296 /// Keep track of new GEP base after splitting the GEPs having large offset.
297 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
298
299 /// Map serial numbers to Large offset GEPs.
300 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
301
302 /// Keep track of SExt promoted.
303 ValueToSExts ValToSExtendedUses;
304
305 /// True if the function has the OptSize attribute.
306 bool OptSize;
307
308 /// DataLayout for the Function being processed.
309 const DataLayout *DL = nullptr;
310
311 /// Building the dominator tree can be expensive, so we only build it
312 /// lazily and update it when required.
313 std::unique_ptr<DominatorTree> DT;
314
315 public:
316 static char ID; // Pass identification, replacement for typeid
317
318 CodeGenPrepare() : FunctionPass(ID) {
319 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
320 }
321
322 bool runOnFunction(Function &F) override;
323
324 StringRef getPassName() const override { return "CodeGen Prepare"; }
325
326 void getAnalysisUsage(AnalysisUsage &AU) const override {
327 // FIXME: When we can selectively preserve passes, preserve the domtree.
328 AU.addRequired<ProfileSummaryInfoWrapperPass>();
329 AU.addRequired<TargetLibraryInfoWrapperPass>();
330 AU.addRequired<TargetPassConfig>();
331 AU.addRequired<TargetTransformInfoWrapperPass>();
332 AU.addRequired<LoopInfoWrapperPass>();
333 }
334
335 private:
336 template <typename F>
337 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
338 // Substituting can cause recursive simplifications, which can invalidate
339 // our iterator. Use a WeakTrackingVH to hold onto it in case this
340 // happens.
341 Value *CurValue = &*CurInstIterator;
342 WeakTrackingVH IterHandle(CurValue);
343
344 f();
345
346 // If the iterator instruction was recursively deleted, start over at the
347 // start of the block.
348 if (IterHandle != CurValue) {
349 CurInstIterator = BB->begin();
350 SunkAddrs.clear();
351 }
352 }
353
354 // Get the DominatorTree, building if necessary.
355 DominatorTree &getDT(Function &F) {
356 if (!DT)
357 DT = std::make_unique<DominatorTree>(F);
358 return *DT;
359 }
360
361 bool eliminateFallThrough(Function &F);
362 bool eliminateMostlyEmptyBlocks(Function &F);
363 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
364 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
365 void eliminateMostlyEmptyBlock(BasicBlock *BB);
366 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
367 bool isPreheader);
368 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
369 bool optimizeInst(Instruction *I, bool &ModifiedDT);
370 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
371 Type *AccessTy, unsigned AddrSpace);
372 bool optimizeInlineAsmInst(CallInst *CS);
373 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
374 bool optimizeExt(Instruction *&I);
375 bool optimizeExtUses(Instruction *I);
376 bool optimizeLoadExt(LoadInst *Load);
377 bool optimizeShiftInst(BinaryOperator *BO);
378 bool optimizeSelectInst(SelectInst *SI);
379 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
380 bool optimizeSwitchInst(SwitchInst *SI);
381 bool optimizeExtractElementInst(Instruction *Inst);
382 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
383 bool fixupDbgValue(Instruction *I);
384 bool placeDbgValues(Function &F);
385 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
386 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
387 bool tryToPromoteExts(TypePromotionTransaction &TPT,
388 const SmallVectorImpl<Instruction *> &Exts,
389 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
390 unsigned CreatedInstsCost = 0);
391 bool mergeSExts(Function &F);
392 bool splitLargeGEPOffsets();
393 bool performAddressTypePromotion(
394 Instruction *&Inst,
395 bool AllowPromotionWithoutCommonHeader,
396 bool HasPromoted, TypePromotionTransaction &TPT,
397 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
398 bool splitBranchCondition(Function &F, bool &ModifiedDT);
399 bool simplifyOffsetableRelocate(Instruction &I);
400
401 bool tryToSinkFreeOperands(Instruction *I);
402 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
403 Value *Arg1, CmpInst *Cmp,
404 Intrinsic::ID IID);
405 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
406 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
407 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
408 };
409
410} // end anonymous namespace
411
412char CodeGenPrepare::ID = 0;
413
414INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
415 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
416INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
417INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
418 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
419
420FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
421
422bool CodeGenPrepare::runOnFunction(Function &F) {
423 if (skipFunction(F))
424 return false;
425
426 DL = &F.getParent()->getDataLayout();
427
428 bool EverMadeChange = false;
429 // Clear per function information.
430 InsertedInsts.clear();
431 PromotedInsts.clear();
432
433 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
434 SubtargetInfo = TM->getSubtargetImpl(F);
435 TLI = SubtargetInfo->getTargetLowering();
436 TRI = SubtargetInfo->getRegisterInfo();
437 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
438 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
439 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
440 BPI.reset(new BranchProbabilityInfo(F, *LI));
441 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
442 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
443 OptSize = F.hasOptSize();
444 if (ProfileGuidedSectionPrefix) {
445 if (PSI->isFunctionHotInCallGraph(&F, *BFI))
446 F.setSectionPrefix(".hot");
447 else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
448 F.setSectionPrefix(".unlikely");
449 }
450
451 /// This optimization identifies DIV instructions that can be
452 /// profitably bypassed and carried out with a shorter, faster divide.
453 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
454 const DenseMap<unsigned int, unsigned int> &BypassWidths =
455 TLI->getBypassSlowDivWidths();
456 BasicBlock* BB = &*F.begin();
457 while (BB != nullptr) {
458 // bypassSlowDivision may create new BBs, but we don't want to reapply the
459 // optimization to those blocks.
460 BasicBlock* Next = BB->getNextNode();
461 // F.hasOptSize is already checked in the outer if statement.
462 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
463 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
464 BB = Next;
465 }
466 }
467
468 // Eliminate blocks that contain only PHI nodes and an
469 // unconditional branch.
470 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
471
472 bool ModifiedDT = false;
473 if (!DisableBranchOpts)
474 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
475
476 // Split some critical edges where one of the sources is an indirect branch,
477 // to help generate sane code for PHIs involving such edges.
478 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
479
480 bool MadeChange = true;
481 while (MadeChange) {
482 MadeChange = false;
483 DT.reset();
484 for (Function::iterator I = F.begin(); I != F.end(); ) {
485 BasicBlock *BB = &*I++;
486 bool ModifiedDTOnIteration = false;
487 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
488
489 // Restart BB iteration if the dominator tree of the Function was changed
490 if (ModifiedDTOnIteration)
491 break;
492 }
493 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
494 MadeChange |= mergeSExts(F);
495 if (!LargeOffsetGEPMap.empty())
496 MadeChange |= splitLargeGEPOffsets();
497
498 // Really free removed instructions during promotion.
499 for (Instruction *I : RemovedInsts)
500 I->deleteValue();
501
502 EverMadeChange |= MadeChange;
503 SeenChainsForSExt.clear();
504 ValToSExtendedUses.clear();
505 RemovedInsts.clear();
506 LargeOffsetGEPMap.clear();
507 LargeOffsetGEPID.clear();
508 }
509
510 SunkAddrs.clear();
511
512 if (!DisableBranchOpts) {
513 MadeChange = false;
514 // Use a set vector to get deterministic iteration order. The order the
515 // blocks are removed may affect whether or not PHI nodes in successors
516 // are removed.
517 SmallSetVector<BasicBlock*, 8> WorkList;
518 for (BasicBlock &BB : F) {
519 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
520 MadeChange |= ConstantFoldTerminator(&BB, true);
521 if (!MadeChange) continue;
522
523 for (SmallVectorImpl<BasicBlock*>::iterator
524 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
525 if (pred_begin(*II) == pred_end(*II))
526 WorkList.insert(*II);
527 }
528
529 // Delete the dead blocks and any of their dead successors.
530 MadeChange |= !WorkList.empty();
531 while (!WorkList.empty()) {
532 BasicBlock *BB = WorkList.pop_back_val();
533 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
534
535 DeleteDeadBlock(BB);
536
537 for (SmallVectorImpl<BasicBlock*>::iterator
538 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
539 if (pred_begin(*II) == pred_end(*II))
540 WorkList.insert(*II);
541 }
542
543 // Merge pairs of basic blocks with unconditional branches, connected by
544 // a single edge.
545 if (EverMadeChange || MadeChange)
546 MadeChange |= eliminateFallThrough(F);
547
548 EverMadeChange |= MadeChange;
549 }
550
551 if (!DisableGCOpts) {
552 SmallVector<Instruction *, 2> Statepoints;
553 for (BasicBlock &BB : F)
554 for (Instruction &I : BB)
555 if (isStatepoint(I))
556 Statepoints.push_back(&I);
557 for (auto &I : Statepoints)
558 EverMadeChange |= simplifyOffsetableRelocate(*I);
559 }
560
561 // Do this last to clean up use-before-def scenarios introduced by other
562 // preparatory transforms.
563 EverMadeChange |= placeDbgValues(F);
564
565 return EverMadeChange;
566}
567
568/// Merge basic blocks which are connected by a single edge, where one of the
569/// basic blocks has a single successor pointing to the other basic block,
570/// which has a single predecessor.
571bool CodeGenPrepare::eliminateFallThrough(Function &F) {
572 bool Changed = false;
573 // Scan all of the blocks in the function, except for the entry block.
574 // Use a temporary array to avoid iterator being invalidated when
575 // deleting blocks.
576 SmallVector<WeakTrackingVH, 16> Blocks;
577 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
578 Blocks.push_back(&Block);
579
580 for (auto &Block : Blocks) {
581 auto *BB = cast_or_null<BasicBlock>(Block);
582 if (!BB)
583 continue;
584 // If the destination block has a single pred, then this is a trivial
585 // edge, just collapse it.
586 BasicBlock *SinglePred = BB->getSinglePredecessor();
587
588 // Don't merge if BB's address is taken.
589 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
590
591 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
592 if (Term && !Term->isConditional()) {
593 Changed = true;
594 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
595
596 // Merge BB into SinglePred and delete it.
597 MergeBlockIntoPredecessor(BB);
598 }
599 }
600 return Changed;
601}
602
603/// Find a destination block from BB if BB is mergeable empty block.
604BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
605 // If this block doesn't end with an uncond branch, ignore it.
606 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
607 if (!BI || !BI->isUnconditional())
608 return nullptr;
609
610 // If the instruction before the branch (skipping debug info) isn't a phi
611 // node, then other stuff is happening here.
612 BasicBlock::iterator BBI = BI->getIterator();
613 if (BBI != BB->begin()) {
614 --BBI;
615 while (isa<DbgInfoIntrinsic>(BBI)) {
616 if (BBI == BB->begin())
617 break;
618 --BBI;
619 }
620 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
621 return nullptr;
622 }
623
624 // Do not break infinite loops.
625 BasicBlock *DestBB = BI->getSuccessor(0);
626 if (DestBB == BB)
627 return nullptr;
628
629 if (!canMergeBlocks(BB, DestBB))
630 DestBB = nullptr;
631
632 return DestBB;
633}
634
635/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
636/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
637/// edges in ways that are non-optimal for isel. Start by eliminating these
638/// blocks so we can split them the way we want them.
639bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
640 SmallPtrSet<BasicBlock *, 16> Preheaders;
641 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
642 while (!LoopList.empty()) {
643 Loop *L = LoopList.pop_back_val();
644 LoopList.insert(LoopList.end(), L->begin(), L->end());
645 if (BasicBlock *Preheader = L->getLoopPreheader())
646 Preheaders.insert(Preheader);
647 }
648
649 bool MadeChange = false;
650 // Copy blocks into a temporary array to avoid iterator invalidation issues
651 // as we remove them.
652 // Note that this intentionally skips the entry block.
653 SmallVector<WeakTrackingVH, 16> Blocks;
654 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
655 Blocks.push_back(&Block);
656
657 for (auto &Block : Blocks) {
658 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
659 if (!BB)
660 continue;
661 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
662 if (!DestBB ||
663 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
664 continue;
665
666 eliminateMostlyEmptyBlock(BB);
667 MadeChange = true;
668 }
669 return MadeChange;
670}
671
672bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
673 BasicBlock *DestBB,
674 bool isPreheader) {
675 // Do not delete loop preheaders if doing so would create a critical edge.
676 // Loop preheaders can be good locations to spill registers. If the
677 // preheader is deleted and we create a critical edge, registers may be
678 // spilled in the loop body instead.
679 if (!DisablePreheaderProtect && isPreheader &&
680 !(BB->getSinglePredecessor() &&
681 BB->getSinglePredecessor()->getSingleSuccessor()))
682 return false;
683
684 // Skip merging if the block's successor is also a successor to any callbr
685 // that leads to this block.
686 // FIXME: Is this really needed? Is this a correctness issue?
687 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
688 if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
689 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
690 if (DestBB == CBI->getSuccessor(i))
691 return false;
692 }
693
694 // Try to skip merging if the unique predecessor of BB is terminated by a
695 // switch or indirect branch instruction, and BB is used as an incoming block
696 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
697 // add COPY instructions in the predecessor of BB instead of BB (if it is not
698 // merged). Note that the critical edge created by merging such blocks wont be
699 // split in MachineSink because the jump table is not analyzable. By keeping
700 // such empty block (BB), ISel will place COPY instructions in BB, not in the
701 // predecessor of BB.
702 BasicBlock *Pred = BB->getUniquePredecessor();
703 if (!Pred ||
704 !(isa<SwitchInst>(Pred->getTerminator()) ||
705 isa<IndirectBrInst>(Pred->getTerminator())))
706 return true;
707
708 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
709 return true;
710
711 // We use a simple cost heuristic which determine skipping merging is
712 // profitable if the cost of skipping merging is less than the cost of
713 // merging : Cost(skipping merging) < Cost(merging BB), where the
714 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
715 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
716 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
717 // Freq(Pred) / Freq(BB) > 2.
718 // Note that if there are multiple empty blocks sharing the same incoming
719 // value for the PHIs in the DestBB, we consider them together. In such
720 // case, Cost(merging BB) will be the sum of their frequencies.
721
722 if (!isa<PHINode>(DestBB->begin()))
723 return true;
724
725 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
726
727 // Find all other incoming blocks from which incoming values of all PHIs in
728 // DestBB are the same as the ones from BB.
729 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
730 ++PI) {
731 BasicBlock *DestBBPred = *PI;
732 if (DestBBPred == BB)
733 continue;
734
735 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
736 return DestPN.getIncomingValueForBlock(BB) ==
737 DestPN.getIncomingValueForBlock(DestBBPred);
738 }))
739 SameIncomingValueBBs.insert(DestBBPred);
740 }
741
742 // See if all BB's incoming values are same as the value from Pred. In this
743 // case, no reason to skip merging because COPYs are expected to be place in
744 // Pred already.
745 if (SameIncomingValueBBs.count(Pred))
746 return true;
747
748 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
749 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
750
751 for (auto SameValueBB : SameIncomingValueBBs)
752 if (SameValueBB->getUniquePredecessor() == Pred &&
753 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
754 BBFreq += BFI->getBlockFreq(SameValueBB);
755
756 return PredFreq.getFrequency() <=
757 BBFreq.getFrequency() * FreqRatioToSkipMerge;
758}
759
760/// Return true if we can merge BB into DestBB if there is a single
761/// unconditional branch between them, and BB contains no other non-phi
762/// instructions.
763bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
764 const BasicBlock *DestBB) const {
765 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
766 // the successor. If there are more complex condition (e.g. preheaders),
767 // don't mess around with them.
768 for (const PHINode &PN : BB->phis()) {
769 for (const User *U : PN.users()) {
770 const Instruction *UI = cast<Instruction>(U);
771 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
772 return false;
773 // If User is inside DestBB block and it is a PHINode then check
774 // incoming value. If incoming value is not from BB then this is
775 // a complex condition (e.g. preheaders) we want to avoid here.
776 if (UI->getParent() == DestBB) {
777 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
778 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
779 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
780 if (Insn && Insn->getParent() == BB &&
781 Insn->getParent() != UPN->getIncomingBlock(I))
782 return false;
783 }
784 }
785 }
786 }
787
788 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
789 // and DestBB may have conflicting incoming values for the block. If so, we
790 // can't merge the block.
791 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
792 if (!DestBBPN) return true; // no conflict.
793
794 // Collect the preds of BB.
795 SmallPtrSet<const BasicBlock*, 16> BBPreds;
796 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
797 // It is faster to get preds from a PHI than with pred_iterator.
798 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
799 BBPreds.insert(BBPN->getIncomingBlock(i));
800 } else {
801 BBPreds.insert(pred_begin(BB), pred_end(BB));
802 }
803
804 // Walk the preds of DestBB.
805 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
806 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
807 if (BBPreds.count(Pred)) { // Common predecessor?
808 for (const PHINode &PN : DestBB->phis()) {
809 const Value *V1 = PN.getIncomingValueForBlock(Pred);
810 const Value *V2 = PN.getIncomingValueForBlock(BB);
811
812 // If V2 is a phi node in BB, look up what the mapped value will be.
813 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
814 if (V2PN->getParent() == BB)
815 V2 = V2PN->getIncomingValueForBlock(Pred);
816
817 // If there is a conflict, bail out.
818 if (V1 != V2) return false;
819 }
820 }
821 }
822
823 return true;
824}
825
826/// Eliminate a basic block that has only phi's and an unconditional branch in
827/// it.
828void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
829 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
830 BasicBlock *DestBB = BI->getSuccessor(0);
831
832 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
833 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
834
835 // If the destination block has a single pred, then this is a trivial edge,
836 // just collapse it.
837 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
838 if (SinglePred != DestBB) {
839 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 840, __PRETTY_FUNCTION__))
840 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 840, __PRETTY_FUNCTION__))
;
841 // Merge DestBB into SinglePred/BB and delete it.
842 MergeBlockIntoPredecessor(DestBB);
843 // Note: BB(=SinglePred) will not be deleted on this path.
844 // DestBB(=its single successor) is the one that was deleted.
845 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
846 return;
847 }
848 }
849
850 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
851 // to handle the new incoming edges it is about to have.
852 for (PHINode &PN : DestBB->phis()) {
853 // Remove the incoming value for BB, and remember it.
854 Value *InVal = PN.removeIncomingValue(BB, false);
855
856 // Two options: either the InVal is a phi node defined in BB or it is some
857 // value that dominates BB.
858 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
859 if (InValPhi && InValPhi->getParent() == BB) {
860 // Add all of the input values of the input PHI as inputs of this phi.
861 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
862 PN.addIncoming(InValPhi->getIncomingValue(i),
863 InValPhi->getIncomingBlock(i));
864 } else {
865 // Otherwise, add one instance of the dominating value for each edge that
866 // we will be adding.
867 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
868 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
869 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
870 } else {
871 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
872 PN.addIncoming(InVal, *PI);
873 }
874 }
875 }
876
877 // The PHIs are now updated, change everything that refers to BB to use
878 // DestBB and remove BB.
879 BB->replaceAllUsesWith(DestBB);
880 BB->eraseFromParent();
881 ++NumBlocksElim;
882
883 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
884}
885
886// Computes a map of base pointer relocation instructions to corresponding
887// derived pointer relocation instructions given a vector of all relocate calls
888static void computeBaseDerivedRelocateMap(
889 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
890 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
891 &RelocateInstMap) {
892 // Collect information in two maps: one primarily for locating the base object
893 // while filling the second map; the second map is the final structure holding
894 // a mapping between Base and corresponding Derived relocate calls
895 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
896 for (auto *ThisRelocate : AllRelocateCalls) {
897 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
898 ThisRelocate->getDerivedPtrIndex());
899 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
900 }
901 for (auto &Item : RelocateIdxMap) {
902 std::pair<unsigned, unsigned> Key = Item.first;
903 if (Key.first == Key.second)
904 // Base relocation: nothing to insert
905 continue;
906
907 GCRelocateInst *I = Item.second;
908 auto BaseKey = std::make_pair(Key.first, Key.first);
909
910 // We're iterating over RelocateIdxMap so we cannot modify it.
911 auto MaybeBase = RelocateIdxMap.find(BaseKey);
912 if (MaybeBase == RelocateIdxMap.end())
913 // TODO: We might want to insert a new base object relocate and gep off
914 // that, if there are enough derived object relocates.
915 continue;
916
917 RelocateInstMap[MaybeBase->second].push_back(I);
918 }
919}
920
921// Accepts a GEP and extracts the operands into a vector provided they're all
922// small integer constants
923static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
924 SmallVectorImpl<Value *> &OffsetV) {
925 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
926 // Only accept small constant integer operands
927 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
928 if (!Op || Op->getZExtValue() > 20)
929 return false;
930 }
931
932 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
933 OffsetV.push_back(GEP->getOperand(i));
934 return true;
935}
936
937// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
938// replace, computes a replacement, and affects it.
939static bool
940simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
941 const SmallVectorImpl<GCRelocateInst *> &Targets) {
942 bool MadeChange = false;
943 // We must ensure the relocation of derived pointer is defined after
944 // relocation of base pointer. If we find a relocation corresponding to base
945 // defined earlier than relocation of base then we move relocation of base
946 // right before found relocation. We consider only relocation in the same
947 // basic block as relocation of base. Relocations from other basic block will
948 // be skipped by optimization and we do not care about them.
949 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
950 &*R != RelocatedBase; ++R)
951 if (auto RI = dyn_cast<GCRelocateInst>(R))
952 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
953 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
954 RelocatedBase->moveBefore(RI);
955 break;
956 }
957
958 for (GCRelocateInst *ToReplace : Targets) {
959 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 960, __PRETTY_FUNCTION__))
960 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 960, __PRETTY_FUNCTION__))
;
961 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
962 // A duplicate relocate call. TODO: coalesce duplicates.
963 continue;
964 }
965
966 if (RelocatedBase->getParent() != ToReplace->getParent()) {
967 // Base and derived relocates are in different basic blocks.
968 // In this case transform is only valid when base dominates derived
969 // relocate. However it would be too expensive to check dominance
970 // for each such relocate, so we skip the whole transformation.
971 continue;
972 }
973
974 Value *Base = ToReplace->getBasePtr();
975 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
976 if (!Derived || Derived->getPointerOperand() != Base)
977 continue;
978
979 SmallVector<Value *, 2> OffsetV;
980 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
981 continue;
982
983 // Create a Builder and replace the target callsite with a gep
984 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 985, __PRETTY_FUNCTION__))
985 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 985, __PRETTY_FUNCTION__))
;
986
987 // Insert after RelocatedBase
988 IRBuilder<> Builder(RelocatedBase->getNextNode());
989 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
990
991 // If gc_relocate does not match the actual type, cast it to the right type.
992 // In theory, there must be a bitcast after gc_relocate if the type does not
993 // match, and we should reuse it to get the derived pointer. But it could be
994 // cases like this:
995 // bb1:
996 // ...
997 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
998 // br label %merge
999 //
1000 // bb2:
1001 // ...
1002 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
1003 // br label %merge
1004 //
1005 // merge:
1006 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1007 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1008 //
1009 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1010 // no matter there is already one or not. In this way, we can handle all cases, and
1011 // the extra bitcast should be optimized away in later passes.
1012 Value *ActualRelocatedBase = RelocatedBase;
1013 if (RelocatedBase->getType() != Base->getType()) {
1014 ActualRelocatedBase =
1015 Builder.CreateBitCast(RelocatedBase, Base->getType());
1016 }
1017 Value *Replacement = Builder.CreateGEP(
1018 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1019 Replacement->takeName(ToReplace);
1020 // If the newly generated derived pointer's type does not match the original derived
1021 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1022 Value *ActualReplacement = Replacement;
1023 if (Replacement->getType() != ToReplace->getType()) {
1024 ActualReplacement =
1025 Builder.CreateBitCast(Replacement, ToReplace->getType());
1026 }
1027 ToReplace->replaceAllUsesWith(ActualReplacement);
1028 ToReplace->eraseFromParent();
1029
1030 MadeChange = true;
1031 }
1032 return MadeChange;
1033}
1034
1035// Turns this:
1036//
1037// %base = ...
1038// %ptr = gep %base + 15
1039// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1040// %base' = relocate(%tok, i32 4, i32 4)
1041// %ptr' = relocate(%tok, i32 4, i32 5)
1042// %val = load %ptr'
1043//
1044// into this:
1045//
1046// %base = ...
1047// %ptr = gep %base + 15
1048// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1049// %base' = gc.relocate(%tok, i32 4, i32 4)
1050// %ptr' = gep %base' + 15
1051// %val = load %ptr'
1052bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1053 bool MadeChange = false;
1054 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1055
1056 for (auto *U : I.users())
1057 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1058 // Collect all the relocate calls associated with a statepoint
1059 AllRelocateCalls.push_back(Relocate);
1060
1061 // We need at least one base pointer relocation + one derived pointer
1062 // relocation to mangle
1063 if (AllRelocateCalls.size() < 2)
1064 return false;
1065
1066 // RelocateInstMap is a mapping from the base relocate instruction to the
1067 // corresponding derived relocate instructions
1068 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1069 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1070 if (RelocateInstMap.empty())
1071 return false;
1072
1073 for (auto &Item : RelocateInstMap)
1074 // Item.first is the RelocatedBase to offset against
1075 // Item.second is the vector of Targets to replace
1076 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1077 return MadeChange;
1078}
1079
1080/// Sink the specified cast instruction into its user blocks.
1081static bool SinkCast(CastInst *CI) {
1082 BasicBlock *DefBB = CI->getParent();
1083
1084 /// InsertedCasts - Only insert a cast in each block once.
1085 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1086
1087 bool MadeChange = false;
1088 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1089 UI != E; ) {
1090 Use &TheUse = UI.getUse();
1091 Instruction *User = cast<Instruction>(*UI);
1092
1093 // Figure out which BB this cast is used in. For PHI's this is the
1094 // appropriate predecessor block.
1095 BasicBlock *UserBB = User->getParent();
1096 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1097 UserBB = PN->getIncomingBlock(TheUse);
1098 }
1099
1100 // Preincrement use iterator so we don't invalidate it.
1101 ++UI;
1102
1103 // The first insertion point of a block containing an EH pad is after the
1104 // pad. If the pad is the user, we cannot sink the cast past the pad.
1105 if (User->isEHPad())
1106 continue;
1107
1108 // If the block selected to receive the cast is an EH pad that does not
1109 // allow non-PHI instructions before the terminator, we can't sink the
1110 // cast.
1111 if (UserBB->getTerminator()->isEHPad())
1112 continue;
1113
1114 // If this user is in the same block as the cast, don't change the cast.
1115 if (UserBB == DefBB) continue;
1116
1117 // If we have already inserted a cast into this block, use it.
1118 CastInst *&InsertedCast = InsertedCasts[UserBB];
1119
1120 if (!InsertedCast) {
1121 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1122 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1122, __PRETTY_FUNCTION__))
;
1123 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1124 CI->getType(), "", &*InsertPt);
1125 InsertedCast->setDebugLoc(CI->getDebugLoc());
1126 }
1127
1128 // Replace a use of the cast with a use of the new cast.
1129 TheUse = InsertedCast;
1130 MadeChange = true;
1131 ++NumCastUses;
1132 }
1133
1134 // If we removed all uses, nuke the cast.
1135 if (CI->use_empty()) {
1136 salvageDebugInfo(*CI);
1137 CI->eraseFromParent();
1138 MadeChange = true;
1139 }
1140
1141 return MadeChange;
1142}
1143
1144/// If the specified cast instruction is a noop copy (e.g. it's casting from
1145/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1146/// reduce the number of virtual registers that must be created and coalesced.
1147///
1148/// Return true if any changes are made.
1149static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1150 const DataLayout &DL) {
1151 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1152 // than sinking only nop casts, but is helpful on some platforms.
1153 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1154 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1155 ASC->getDestAddressSpace()))
1156 return false;
1157 }
1158
1159 // If this is a noop copy,
1160 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1161 EVT DstVT = TLI.getValueType(DL, CI->getType());
1162
1163 // This is an fp<->int conversion?
1164 if (SrcVT.isInteger() != DstVT.isInteger())
1165 return false;
1166
1167 // If this is an extension, it will be a zero or sign extension, which
1168 // isn't a noop.
1169 if (SrcVT.bitsLT(DstVT)) return false;
1170
1171 // If these values will be promoted, find out what they will be promoted
1172 // to. This helps us consider truncates on PPC as noop copies when they
1173 // are.
1174 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1175 TargetLowering::TypePromoteInteger)
1176 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1177 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1178 TargetLowering::TypePromoteInteger)
1179 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1180
1181 // If, after promotion, these are the same types, this is a noop copy.
1182 if (SrcVT != DstVT)
1183 return false;
1184
1185 return SinkCast(CI);
1186}
1187
1188bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1189 Value *Arg0, Value *Arg1,
1190 CmpInst *Cmp,
1191 Intrinsic::ID IID) {
1192 if (BO->getParent() != Cmp->getParent()) {
1193 // We used to use a dominator tree here to allow multi-block optimization.
1194 // But that was problematic because:
1195 // 1. It could cause a perf regression by hoisting the math op into the
1196 // critical path.
1197 // 2. It could cause a perf regression by creating a value that was live
1198 // across multiple blocks and increasing register pressure.
1199 // 3. Use of a dominator tree could cause large compile-time regression.
1200 // This is because we recompute the DT on every change in the main CGP
1201 // run-loop. The recomputing is probably unnecessary in many cases, so if
1202 // that was fixed, using a DT here would be ok.
1203 return false;
1204 }
1205
1206 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1207 if (BO->getOpcode() == Instruction::Add &&
1208 IID == Intrinsic::usub_with_overflow) {
1209 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1209, __PRETTY_FUNCTION__))
;
1210 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1211 }
1212
1213 // Insert at the first instruction of the pair.
1214 Instruction *InsertPt = nullptr;
1215 for (Instruction &Iter : *Cmp->getParent()) {
1216 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1217 // the overflow intrinsic are defined.
1218 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1219 InsertPt = &Iter;
1220 break;
1221 }
1222 }
1223 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1223, __PRETTY_FUNCTION__))
;
1224
1225 IRBuilder<> Builder(InsertPt);
1226 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1227 if (BO->getOpcode() != Instruction::Xor) {
1228 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1229 BO->replaceAllUsesWith(Math);
1230 } else
1231 assert(BO->hasOneUse() &&((BO->hasOneUse() && "Patterns with XOr should use the BO only in the compare"
) ? static_cast<void> (0) : __assert_fail ("BO->hasOneUse() && \"Patterns with XOr should use the BO only in the compare\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1232, __PRETTY_FUNCTION__))
1232 "Patterns with XOr should use the BO only in the compare")((BO->hasOneUse() && "Patterns with XOr should use the BO only in the compare"
) ? static_cast<void> (0) : __assert_fail ("BO->hasOneUse() && \"Patterns with XOr should use the BO only in the compare\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1232, __PRETTY_FUNCTION__))
;
1233 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1234 Cmp->replaceAllUsesWith(OV);
1235 Cmp->eraseFromParent();
1236 BO->eraseFromParent();
1237 return true;
1238}
1239
1240/// Match special-case patterns that check for unsigned add overflow.
1241static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1242 BinaryOperator *&Add) {
1243 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1244 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1245 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1246
1247 // We are not expecting non-canonical/degenerate code. Just bail out.
1248 if (isa<Constant>(A))
1249 return false;
1250
1251 ICmpInst::Predicate Pred = Cmp->getPredicate();
1252 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1253 B = ConstantInt::get(B->getType(), 1);
1254 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1255 B = ConstantInt::get(B->getType(), -1);
1256 else
1257 return false;
1258
1259 // Check the users of the variable operand of the compare looking for an add
1260 // with the adjusted constant.
1261 for (User *U : A->users()) {
1262 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1263 Add = cast<BinaryOperator>(U);
1264 return true;
1265 }
1266 }
1267 return false;
1268}
1269
1270/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1271/// intrinsic. Return true if any changes were made.
1272bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1273 bool &ModifiedDT) {
1274 Value *A, *B;
1275 BinaryOperator *Add;
1276 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1277 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1278 return false;
1279 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1280 A = Add->getOperand(0);
1281 B = Add->getOperand(1);
1282 }
1283
1284 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1285 TLI->getValueType(*DL, Add->getType()),
1286 Add->hasNUsesOrMore(2)))
1287 return false;
1288
1289 // We don't want to move around uses of condition values this late, so we
1290 // check if it is legal to create the call to the intrinsic in the basic
1291 // block containing the icmp.
1292 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1293 return false;
1294
1295 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1296 Intrinsic::uadd_with_overflow))
1297 return false;
1298
1299 // Reset callers - do not crash by iterating over a dead instruction.
1300 ModifiedDT = true;
1301 return true;
1302}
1303
1304bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1305 bool &ModifiedDT) {
1306 // We are not expecting non-canonical/degenerate code. Just bail out.
1307 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1308 if (isa<Constant>(A) && isa<Constant>(B))
1309 return false;
1310
1311 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1312 ICmpInst::Predicate Pred = Cmp->getPredicate();
1313 if (Pred == ICmpInst::ICMP_UGT) {
1314 std::swap(A, B);
1315 Pred = ICmpInst::ICMP_ULT;
1316 }
1317 // Convert special-case: (A == 0) is the same as (A u< 1).
1318 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1319 B = ConstantInt::get(B->getType(), 1);
1320 Pred = ICmpInst::ICMP_ULT;
1321 }
1322 // Convert special-case: (A != 0) is the same as (0 u< A).
1323 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1324 std::swap(A, B);
1325 Pred = ICmpInst::ICMP_ULT;
1326 }
1327 if (Pred != ICmpInst::ICMP_ULT)
1328 return false;
1329
1330 // Walk the users of a variable operand of a compare looking for a subtract or
1331 // add with that same operand. Also match the 2nd operand of the compare to
1332 // the add/sub, but that may be a negated constant operand of an add.
1333 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1334 BinaryOperator *Sub = nullptr;
1335 for (User *U : CmpVariableOperand->users()) {
1336 // A - B, A u< B --> usubo(A, B)
1337 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1338 Sub = cast<BinaryOperator>(U);
1339 break;
1340 }
1341
1342 // A + (-C), A u< C (canonicalized form of (sub A, C))
1343 const APInt *CmpC, *AddC;
1344 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1345 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1346 Sub = cast<BinaryOperator>(U);
1347 break;
1348 }
1349 }
1350 if (!Sub)
1351 return false;
1352
1353 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1354 TLI->getValueType(*DL, Sub->getType()),
1355 Sub->hasNUsesOrMore(2)))
1356 return false;
1357
1358 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1359 Cmp, Intrinsic::usub_with_overflow))
1360 return false;
1361
1362 // Reset callers - do not crash by iterating over a dead instruction.
1363 ModifiedDT = true;
1364 return true;
1365}
1366
1367/// Sink the given CmpInst into user blocks to reduce the number of virtual
1368/// registers that must be created and coalesced. This is a clear win except on
1369/// targets with multiple condition code registers (PowerPC), where it might
1370/// lose; some adjustment may be wanted there.
1371///
1372/// Return true if any changes are made.
1373static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1374 if (TLI.hasMultipleConditionRegisters())
1375 return false;
1376
1377 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1378 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1379 return false;
1380
1381 // Only insert a cmp in each block once.
1382 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1383
1384 bool MadeChange = false;
1385 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1386 UI != E; ) {
1387 Use &TheUse = UI.getUse();
1388 Instruction *User = cast<Instruction>(*UI);
1389
1390 // Preincrement use iterator so we don't invalidate it.
1391 ++UI;
1392
1393 // Don't bother for PHI nodes.
1394 if (isa<PHINode>(User))
1395 continue;
1396
1397 // Figure out which BB this cmp is used in.
1398 BasicBlock *UserBB = User->getParent();
1399 BasicBlock *DefBB = Cmp->getParent();
1400
1401 // If this user is in the same block as the cmp, don't change the cmp.
1402 if (UserBB == DefBB) continue;
1403
1404 // If we have already inserted a cmp into this block, use it.
1405 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1406
1407 if (!InsertedCmp) {
1408 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1409 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1409, __PRETTY_FUNCTION__))
;
1410 InsertedCmp =
1411 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1412 Cmp->getOperand(0), Cmp->getOperand(1), "",
1413 &*InsertPt);
1414 // Propagate the debug info.
1415 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1416 }
1417
1418 // Replace a use of the cmp with a use of the new cmp.
1419 TheUse = InsertedCmp;
1420 MadeChange = true;
1421 ++NumCmpUses;
1422 }
1423
1424 // If we removed all uses, nuke the cmp.
1425 if (Cmp->use_empty()) {
1426 Cmp->eraseFromParent();
1427 MadeChange = true;
1428 }
1429
1430 return MadeChange;
1431}
1432
1433/// For pattern like:
1434///
1435/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1436/// ...
1437/// DomBB:
1438/// ...
1439/// br DomCond, TrueBB, CmpBB
1440/// CmpBB: (with DomBB being the single predecessor)
1441/// ...
1442/// Cmp = icmp eq CmpOp0, CmpOp1
1443/// ...
1444///
1445/// It would use two comparison on targets that lowering of icmp sgt/slt is
1446/// different from lowering of icmp eq (PowerPC). This function try to convert
1447/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1448/// After that, DomCond and Cmp can use the same comparison so reduce one
1449/// comparison.
1450///
1451/// Return true if any changes are made.
1452static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
1453 const TargetLowering &TLI) {
1454 if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1455 return false;
1456
1457 ICmpInst::Predicate Pred = Cmp->getPredicate();
1458 if (Pred != ICmpInst::ICMP_EQ)
1459 return false;
1460
1461 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1462 // icmp slt/sgt would introduce more redundant LLVM IR.
1463 for (User *U : Cmp->users()) {
1464 if (isa<BranchInst>(U))
1465 continue;
1466 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1467 continue;
1468 return false;
1469 }
1470
1471 // This is a cheap/incomplete check for dominance - just match a single
1472 // predecessor with a conditional branch.
1473 BasicBlock *CmpBB = Cmp->getParent();
1474 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1475 if (!DomBB)
1476 return false;
1477
1478 // We want to ensure that the only way control gets to the comparison of
1479 // interest is that a less/greater than comparison on the same operands is
1480 // false.
1481 Value *DomCond;
1482 BasicBlock *TrueBB, *FalseBB;
1483 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1484 return false;
1485 if (CmpBB != FalseBB)
1486 return false;
1487
1488 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1489 ICmpInst::Predicate DomPred;
1490 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1491 return false;
1492 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1493 return false;
1494
1495 // Convert the equality comparison to the opposite of the dominating
1496 // comparison and swap the direction for all branch/select users.
1497 // We have conceptually converted:
1498 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1499 // to
1500 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1501 // And similarly for branches.
1502 for (User *U : Cmp->users()) {
1503 if (auto *BI = dyn_cast<BranchInst>(U)) {
1504 assert(BI->isConditional() && "Must be conditional")((BI->isConditional() && "Must be conditional") ? static_cast
<void> (0) : __assert_fail ("BI->isConditional() && \"Must be conditional\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1504, __PRETTY_FUNCTION__))
;
1505 BI->swapSuccessors();
1506 continue;
1507 }
1508 if (auto *SI = dyn_cast<SelectInst>(U)) {
1509 // Swap operands
1510 SI->swapValues();
1511 SI->swapProfMetadata();
1512 continue;
1513 }
1514 llvm_unreachable("Must be a branch or a select")::llvm::llvm_unreachable_internal("Must be a branch or a select"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1514)
;
1515 }
1516 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1517 return true;
1518}
1519
1520bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1521 if (sinkCmpExpression(Cmp, *TLI))
1522 return true;
1523
1524 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1525 return true;
1526
1527 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1528 return true;
1529
1530 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1531 return true;
1532
1533 return false;
1534}
1535
1536/// Duplicate and sink the given 'and' instruction into user blocks where it is
1537/// used in a compare to allow isel to generate better code for targets where
1538/// this operation can be combined.
1539///
1540/// Return true if any changes are made.
1541static bool sinkAndCmp0Expression(Instruction *AndI,
1542 const TargetLowering &TLI,
1543 SetOfInstrs &InsertedInsts) {
1544 // Double-check that we're not trying to optimize an instruction that was
1545 // already optimized by some other part of this pass.
1546 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1547, __PRETTY_FUNCTION__))
1547 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1547, __PRETTY_FUNCTION__))
;
1548 (void) InsertedInsts;
1549
1550 // Nothing to do for single use in same basic block.
1551 if (AndI->hasOneUse() &&
1552 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1553 return false;
1554
1555 // Try to avoid cases where sinking/duplicating is likely to increase register
1556 // pressure.
1557 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1558 !isa<ConstantInt>(AndI->getOperand(1)) &&
1559 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1560 return false;
1561
1562 for (auto *U : AndI->users()) {
1563 Instruction *User = cast<Instruction>(U);
1564
1565 // Only sink 'and' feeding icmp with 0.
1566 if (!isa<ICmpInst>(User))
1567 return false;
1568
1569 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1570 if (!CmpC || !CmpC->isZero())
1571 return false;
1572 }
1573
1574 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1575 return false;
1576
1577 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1578 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1579
1580 // Push the 'and' into the same block as the icmp 0. There should only be
1581 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1582 // others, so we don't need to keep track of which BBs we insert into.
1583 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1584 UI != E; ) {
1585 Use &TheUse = UI.getUse();
1586 Instruction *User = cast<Instruction>(*UI);
1587
1588 // Preincrement use iterator so we don't invalidate it.
1589 ++UI;
1590
1591 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1592
1593 // Keep the 'and' in the same place if the use is already in the same block.
1594 Instruction *InsertPt =
1595 User->getParent() == AndI->getParent() ? AndI : User;
1596 Instruction *InsertedAnd =
1597 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1598 AndI->getOperand(1), "", InsertPt);
1599 // Propagate the debug info.
1600 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1601
1602 // Replace a use of the 'and' with a use of the new 'and'.
1603 TheUse = InsertedAnd;
1604 ++NumAndUses;
1605 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1606 }
1607
1608 // We removed all uses, nuke the and.
1609 AndI->eraseFromParent();
1610 return true;
1611}
1612
1613/// Check if the candidates could be combined with a shift instruction, which
1614/// includes:
1615/// 1. Truncate instruction
1616/// 2. And instruction and the imm is a mask of the low bits:
1617/// imm & (imm+1) == 0
1618static bool isExtractBitsCandidateUse(Instruction *User) {
1619 if (!isa<TruncInst>(User)) {
1620 if (User->getOpcode() != Instruction::And ||
1621 !isa<ConstantInt>(User->getOperand(1)))
1622 return false;
1623
1624 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1625
1626 if ((Cimm & (Cimm + 1)).getBoolValue())
1627 return false;
1628 }
1629 return true;
1630}
1631
1632/// Sink both shift and truncate instruction to the use of truncate's BB.
1633static bool
1634SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1635 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1636 const TargetLowering &TLI, const DataLayout &DL) {
1637 BasicBlock *UserBB = User->getParent();
1638 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1639 auto *TruncI = cast<TruncInst>(User);
1640 bool MadeChange = false;
1641
1642 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1643 TruncE = TruncI->user_end();
1644 TruncUI != TruncE;) {
1645
1646 Use &TruncTheUse = TruncUI.getUse();
1647 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1648 // Preincrement use iterator so we don't invalidate it.
1649
1650 ++TruncUI;
1651
1652 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1653 if (!ISDOpcode)
1654 continue;
1655
1656 // If the use is actually a legal node, there will not be an
1657 // implicit truncate.
1658 // FIXME: always querying the result type is just an
1659 // approximation; some nodes' legality is determined by the
1660 // operand or other means. There's no good way to find out though.
1661 if (TLI.isOperationLegalOrCustom(
1662 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1663 continue;
1664
1665 // Don't bother for PHI nodes.
1666 if (isa<PHINode>(TruncUser))
1667 continue;
1668
1669 BasicBlock *TruncUserBB = TruncUser->getParent();
1670
1671 if (UserBB == TruncUserBB)
1672 continue;
1673
1674 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1675 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1676
1677 if (!InsertedShift && !InsertedTrunc) {
1678 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1679 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1679, __PRETTY_FUNCTION__))
;
1680 // Sink the shift
1681 if (ShiftI->getOpcode() == Instruction::AShr)
1682 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1683 "", &*InsertPt);
1684 else
1685 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1686 "", &*InsertPt);
1687 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1688
1689 // Sink the trunc
1690 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1691 TruncInsertPt++;
1692 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1692, __PRETTY_FUNCTION__))
;
1693
1694 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1695 TruncI->getType(), "", &*TruncInsertPt);
1696 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1697
1698 MadeChange = true;
1699
1700 TruncTheUse = InsertedTrunc;
1701 }
1702 }
1703 return MadeChange;
1704}
1705
1706/// Sink the shift *right* instruction into user blocks if the uses could
1707/// potentially be combined with this shift instruction and generate BitExtract
1708/// instruction. It will only be applied if the architecture supports BitExtract
1709/// instruction. Here is an example:
1710/// BB1:
1711/// %x.extract.shift = lshr i64 %arg1, 32
1712/// BB2:
1713/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1714/// ==>
1715///
1716/// BB2:
1717/// %x.extract.shift.1 = lshr i64 %arg1, 32
1718/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1719///
1720/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1721/// instruction.
1722/// Return true if any changes are made.
1723static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1724 const TargetLowering &TLI,
1725 const DataLayout &DL) {
1726 BasicBlock *DefBB = ShiftI->getParent();
1727
1728 /// Only insert instructions in each block once.
1729 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1730
1731 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1732
1733 bool MadeChange = false;
1734 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1735 UI != E;) {
1736 Use &TheUse = UI.getUse();
1737 Instruction *User = cast<Instruction>(*UI);
1738 // Preincrement use iterator so we don't invalidate it.
1739 ++UI;
1740
1741 // Don't bother for PHI nodes.
1742 if (isa<PHINode>(User))
1743 continue;
1744
1745 if (!isExtractBitsCandidateUse(User))
1746 continue;
1747
1748 BasicBlock *UserBB = User->getParent();
1749
1750 if (UserBB == DefBB) {
1751 // If the shift and truncate instruction are in the same BB. The use of
1752 // the truncate(TruncUse) may still introduce another truncate if not
1753 // legal. In this case, we would like to sink both shift and truncate
1754 // instruction to the BB of TruncUse.
1755 // for example:
1756 // BB1:
1757 // i64 shift.result = lshr i64 opnd, imm
1758 // trunc.result = trunc shift.result to i16
1759 //
1760 // BB2:
1761 // ----> We will have an implicit truncate here if the architecture does
1762 // not have i16 compare.
1763 // cmp i16 trunc.result, opnd2
1764 //
1765 if (isa<TruncInst>(User) && shiftIsLegal
1766 // If the type of the truncate is legal, no truncate will be
1767 // introduced in other basic blocks.
1768 &&
1769 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1770 MadeChange =
1771 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1772
1773 continue;
1774 }
1775 // If we have already inserted a shift into this block, use it.
1776 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1777
1778 if (!InsertedShift) {
1779 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1780 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1780, __PRETTY_FUNCTION__))
;
1781
1782 if (ShiftI->getOpcode() == Instruction::AShr)
1783 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1784 "", &*InsertPt);
1785 else
1786 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1787 "", &*InsertPt);
1788 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1789
1790 MadeChange = true;
1791 }
1792
1793 // Replace a use of the shift with a use of the new shift.
1794 TheUse = InsertedShift;
1795 }
1796
1797 // If we removed all uses, or there are none, nuke the shift.
1798 if (ShiftI->use_empty()) {
1799 salvageDebugInfo(*ShiftI);
1800 ShiftI->eraseFromParent();
1801 MadeChange = true;
1802 }
1803
1804 return MadeChange;
1805}
1806
1807/// If counting leading or trailing zeros is an expensive operation and a zero
1808/// input is defined, add a check for zero to avoid calling the intrinsic.
1809///
1810/// We want to transform:
1811/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1812///
1813/// into:
1814/// entry:
1815/// %cmpz = icmp eq i64 %A, 0
1816/// br i1 %cmpz, label %cond.end, label %cond.false
1817/// cond.false:
1818/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1819/// br label %cond.end
1820/// cond.end:
1821/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1822///
1823/// If the transform is performed, return true and set ModifiedDT to true.
1824static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1825 const TargetLowering *TLI,
1826 const DataLayout *DL,
1827 bool &ModifiedDT) {
1828 // If a zero input is undefined, it doesn't make sense to despeculate that.
1829 if (match(CountZeros->getOperand(1), m_One()))
1830 return false;
1831
1832 // If it's cheap to speculate, there's nothing to do.
1833 auto IntrinsicID = CountZeros->getIntrinsicID();
1834 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1835 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1836 return false;
1837
1838 // Only handle legal scalar cases. Anything else requires too much work.
1839 Type *Ty = CountZeros->getType();
1840 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1841 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1842 return false;
1843
1844 // The intrinsic will be sunk behind a compare against zero and branch.
1845 BasicBlock *StartBlock = CountZeros->getParent();
1846 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1847
1848 // Create another block after the count zero intrinsic. A PHI will be added
1849 // in this block to select the result of the intrinsic or the bit-width
1850 // constant if the input to the intrinsic is zero.
1851 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1852 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1853
1854 // Set up a builder to create a compare, conditional branch, and PHI.
1855 IRBuilder<> Builder(CountZeros->getContext());
1856 Builder.SetInsertPoint(StartBlock->getTerminator());
1857 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1858
1859 // Replace the unconditional branch that was created by the first split with
1860 // a compare against zero and a conditional branch.
1861 Value *Zero = Constant::getNullValue(Ty);
1862 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1863 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1864 StartBlock->getTerminator()->eraseFromParent();
1865
1866 // Create a PHI in the end block to select either the output of the intrinsic
1867 // or the bit width of the operand.
1868 Builder.SetInsertPoint(&EndBlock->front());
1869 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1870 CountZeros->replaceAllUsesWith(PN);
1871 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1872 PN->addIncoming(BitWidth, StartBlock);
1873 PN->addIncoming(CountZeros, CallBlock);
1874
1875 // We are explicitly handling the zero case, so we can set the intrinsic's
1876 // undefined zero argument to 'true'. This will also prevent reprocessing the
1877 // intrinsic; we only despeculate when a zero input is defined.
1878 CountZeros->setArgOperand(1, Builder.getTrue());
1879 ModifiedDT = true;
1880 return true;
1881}
1882
1883bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1884 BasicBlock *BB = CI->getParent();
1885
1886 // Lower inline assembly if we can.
1887 // If we found an inline asm expession, and if the target knows how to
1888 // lower it to normal LLVM code, do so now.
1889 if (isa<InlineAsm>(CI->getCalledValue())) {
1890 if (TLI->ExpandInlineAsm(CI)) {
1891 // Avoid invalidating the iterator.
1892 CurInstIterator = BB->begin();
1893 // Avoid processing instructions out of order, which could cause
1894 // reuse before a value is defined.
1895 SunkAddrs.clear();
1896 return true;
1897 }
1898 // Sink address computing for memory operands into the block.
1899 if (optimizeInlineAsmInst(CI))
1900 return true;
1901 }
1902
1903 // Align the pointer arguments to this call if the target thinks it's a good
1904 // idea
1905 unsigned MinSize, PrefAlign;
1906 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1907 for (auto &Arg : CI->arg_operands()) {
1908 // We want to align both objects whose address is used directly and
1909 // objects whose address is used in casts and GEPs, though it only makes
1910 // sense for GEPs if the offset is a multiple of the desired alignment and
1911 // if size - offset meets the size threshold.
1912 if (!Arg->getType()->isPointerTy())
1913 continue;
1914 APInt Offset(DL->getIndexSizeInBits(
1915 cast<PointerType>(Arg->getType())->getAddressSpace()),
1916 0);
1917 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1918 uint64_t Offset2 = Offset.getLimitedValue();
1919 if ((Offset2 & (PrefAlign-1)) != 0)
1920 continue;
1921 AllocaInst *AI;
1922 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1923 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1924 AI->setAlignment(MaybeAlign(PrefAlign));
1925 // Global variables can only be aligned if they are defined in this
1926 // object (i.e. they are uniquely initialized in this object), and
1927 // over-aligning global variables that have an explicit section is
1928 // forbidden.
1929 GlobalVariable *GV;
1930 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1931 GV->getPointerAlignment(*DL) < PrefAlign &&
1932 DL->getTypeAllocSize(GV->getValueType()) >=
1933 MinSize + Offset2)
1934 GV->setAlignment(MaybeAlign(PrefAlign));
1935 }
1936 // If this is a memcpy (or similar) then we may be able to improve the
1937 // alignment
1938 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1939 unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1940 if (DestAlign > MI->getDestAlignment())
1941 MI->setDestAlignment(DestAlign);
1942 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1943 unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1944 if (SrcAlign > MTI->getSourceAlignment())
1945 MTI->setSourceAlignment(SrcAlign);
1946 }
1947 }
1948 }
1949
1950 // If we have a cold call site, try to sink addressing computation into the
1951 // cold block. This interacts with our handling for loads and stores to
1952 // ensure that we can fold all uses of a potential addressing computation
1953 // into their uses. TODO: generalize this to work over profiling data
1954 if (CI->hasFnAttr(Attribute::Cold) &&
1955 !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
1956 for (auto &Arg : CI->arg_operands()) {
1957 if (!Arg->getType()->isPointerTy())
1958 continue;
1959 unsigned AS = Arg->getType()->getPointerAddressSpace();
1960 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1961 }
1962
1963 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1964 if (II) {
1965 switch (II->getIntrinsicID()) {
1966 default: break;
1967 case Intrinsic::experimental_widenable_condition: {
1968 // Give up on future widening oppurtunties so that we can fold away dead
1969 // paths and merge blocks before going into block-local instruction
1970 // selection.
1971 if (II->use_empty()) {
1972 II->eraseFromParent();
1973 return true;
1974 }
1975 Constant *RetVal = ConstantInt::getTrue(II->getContext());
1976 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1977 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1978 });
1979 return true;
1980 }
1981 case Intrinsic::objectsize:
1982 llvm_unreachable("llvm.objectsize.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.objectsize.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1982)
;
1983 case Intrinsic::is_constant:
1984 llvm_unreachable("llvm.is.constant.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.is.constant.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1984)
;
1985 case Intrinsic::aarch64_stlxr:
1986 case Intrinsic::aarch64_stxr: {
1987 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1988 if (!ExtVal || !ExtVal->hasOneUse() ||
1989 ExtVal->getParent() == CI->getParent())
1990 return false;
1991 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1992 ExtVal->moveBefore(CI);
1993 // Mark this instruction as "inserted by CGP", so that other
1994 // optimizations don't touch it.
1995 InsertedInsts.insert(ExtVal);
1996 return true;
1997 }
1998
1999 case Intrinsic::launder_invariant_group:
2000 case Intrinsic::strip_invariant_group: {
2001 Value *ArgVal = II->getArgOperand(0);
2002 auto it = LargeOffsetGEPMap.find(II);
2003 if (it != LargeOffsetGEPMap.end()) {
2004 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2005 // Make sure not to have to deal with iterator invalidation
2006 // after possibly adding ArgVal to LargeOffsetGEPMap.
2007 auto GEPs = std::move(it->second);
2008 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2009 LargeOffsetGEPMap.erase(II);
2010 }
2011
2012 II->replaceAllUsesWith(ArgVal);
2013 II->eraseFromParent();
2014 return true;
2015 }
2016 case Intrinsic::cttz:
2017 case Intrinsic::ctlz:
2018 // If counting zeros is expensive, try to avoid it.
2019 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2020 case Intrinsic::dbg_value:
2021 return fixupDbgValue(II);
2022 case Intrinsic::vscale: {
2023 // If datalayout has no special restrictions on vector data layout,
2024 // replace `llvm.vscale` by an equivalent constant expression
2025 // to benefit from cheap constant propagation.
2026 Type *ScalableVectorTy =
2027 VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
2028 if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) {
2029 auto Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
2030 auto One = ConstantInt::getSigned(II->getType(), 1);
2031 auto *CGep =
2032 ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
2033 II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
2034 II->eraseFromParent();
2035 return true;
2036 }
2037 }
2038 }
2039
2040 SmallVector<Value *, 2> PtrOps;
2041 Type *AccessTy;
2042 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2043 while (!PtrOps.empty()) {
2044 Value *PtrVal = PtrOps.pop_back_val();
2045 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2046 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2047 return true;
2048 }
2049 }
2050
2051 // From here on out we're working with named functions.
2052 if (!CI->getCalledFunction()) return false;
2053
2054 // Lower all default uses of _chk calls. This is very similar
2055 // to what InstCombineCalls does, but here we are only lowering calls
2056 // to fortified library functions (e.g. __memcpy_chk) that have the default
2057 // "don't know" as the objectsize. Anything else should be left alone.
2058 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2059 IRBuilder<> Builder(CI);
2060 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2061 CI->replaceAllUsesWith(V);
2062 CI->eraseFromParent();
2063 return true;
2064 }
2065
2066 return false;
2067}
2068
2069/// Look for opportunities to duplicate return instructions to the predecessor
2070/// to enable tail call optimizations. The case it is currently looking for is:
2071/// @code
2072/// bb0:
2073/// %tmp0 = tail call i32 @f0()
2074/// br label %return
2075/// bb1:
2076/// %tmp1 = tail call i32 @f1()
2077/// br label %return
2078/// bb2:
2079/// %tmp2 = tail call i32 @f2()
2080/// br label %return
2081/// return:
2082/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2083/// ret i32 %retval
2084/// @endcode
2085///
2086/// =>
2087///
2088/// @code
2089/// bb0:
2090/// %tmp0 = tail call i32 @f0()
2091/// ret i32 %tmp0
2092/// bb1:
2093/// %tmp1 = tail call i32 @f1()
2094/// ret i32 %tmp1
2095/// bb2:
2096/// %tmp2 = tail call i32 @f2()
2097/// ret i32 %tmp2
2098/// @endcode
2099bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
2100 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2101 if (!RetI)
2102 return false;
2103
2104 PHINode *PN = nullptr;
2105 ExtractValueInst *EVI = nullptr;
2106 BitCastInst *BCI = nullptr;
2107 Value *V = RetI->getReturnValue();
2108 if (V) {
2109 BCI = dyn_cast<BitCastInst>(V);
2110 if (BCI)
2111 V = BCI->getOperand(0);
2112
2113 EVI = dyn_cast<ExtractValueInst>(V);
2114 if (EVI) {
2115 V = EVI->getOperand(0);
2116 if (!std::all_of(EVI->idx_begin(), EVI->idx_end(),
2117 [](unsigned idx) { return idx == 0; }))
2118 return false;
2119 }
2120
2121 PN = dyn_cast<PHINode>(V);
2122 if (!PN)
2123 return false;
2124 }
2125
2126 if (PN && PN->getParent() != BB)
2127 return false;
2128
2129 // Make sure there are no instructions between the PHI and return, or that the
2130 // return is the first instruction in the block.
2131 if (PN) {
2132 BasicBlock::iterator BI = BB->begin();
2133 // Skip over debug and the bitcast.
2134 do {
2135 ++BI;
2136 } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI);
2137 if (&*BI != RetI)
2138 return false;
2139 } else {
2140 BasicBlock::iterator BI = BB->begin();
2141 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2142 if (&*BI != RetI)
2143 return false;
2144 }
2145
2146 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2147 /// call.
2148 const Function *F = BB->getParent();
2149 SmallVector<BasicBlock*, 4> TailCallBBs;
2150 if (PN) {
2151 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2152 // Look through bitcasts.
2153 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2154 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2155 BasicBlock *PredBB = PN->getIncomingBlock(I);
2156 // Make sure the phi value is indeed produced by the tail call.
2157 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2158 TLI->mayBeEmittedAsTailCall(CI) &&
2159 attributesPermitTailCall(F, CI, RetI, *TLI))
2160 TailCallBBs.push_back(PredBB);
2161 }
2162 } else {
2163 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2164 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2165 if (!VisitedBBs.insert(*PI).second)
2166 continue;
2167
2168 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2169 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2170 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2171 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2172 if (RI == RE)
2173 continue;
2174
2175 CallInst *CI = dyn_cast<CallInst>(&*RI);
2176 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2177 attributesPermitTailCall(F, CI, RetI, *TLI))
2178 TailCallBBs.push_back(*PI);
2179 }
2180 }
2181
2182 bool Changed = false;
2183 for (auto const &TailCallBB : TailCallBBs) {
2184 // Make sure the call instruction is followed by an unconditional branch to
2185 // the return block.
2186 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2187 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2188 continue;
2189
2190 // Duplicate the return into TailCallBB.
2191 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2192 ModifiedDT = Changed = true;
2193 ++NumRetsDup;
2194 }
2195
2196 // If we eliminated all predecessors of the block, delete the block now.
2197 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2198 BB->eraseFromParent();
2199
2200 return Changed;
2201}
2202
2203//===----------------------------------------------------------------------===//
2204// Memory Optimization
2205//===----------------------------------------------------------------------===//
2206
2207namespace {
2208
2209/// This is an extended version of TargetLowering::AddrMode
2210/// which holds actual Value*'s for register values.
2211struct ExtAddrMode : public TargetLowering::AddrMode {
2212 Value *BaseReg = nullptr;
2213 Value *ScaledReg = nullptr;
2214 Value *OriginalValue = nullptr;
2215 bool InBounds = true;
2216
2217 enum FieldName {
2218 NoField = 0x00,
2219 BaseRegField = 0x01,
2220 BaseGVField = 0x02,
2221 BaseOffsField = 0x04,
2222 ScaledRegField = 0x08,
2223 ScaleField = 0x10,
2224 MultipleFields = 0xff
2225 };
2226
2227
2228 ExtAddrMode() = default;
2229
2230 void print(raw_ostream &OS) const;
2231 void dump() const;
2232
2233 FieldName compare(const ExtAddrMode &other) {
2234 // First check that the types are the same on each field, as differing types
2235 // is something we can't cope with later on.
2236 if (BaseReg && other.BaseReg &&
2237 BaseReg->getType() != other.BaseReg->getType())
2238 return MultipleFields;
2239 if (BaseGV && other.BaseGV &&
2240 BaseGV->getType() != other.BaseGV->getType())
2241 return MultipleFields;
2242 if (ScaledReg && other.ScaledReg &&
2243 ScaledReg->getType() != other.ScaledReg->getType())
2244 return MultipleFields;
2245
2246 // Conservatively reject 'inbounds' mismatches.
2247 if (InBounds != other.InBounds)
2248 return MultipleFields;
2249
2250 // Check each field to see if it differs.
2251 unsigned Result = NoField;
2252 if (BaseReg != other.BaseReg)
2253 Result |= BaseRegField;
2254 if (BaseGV != other.BaseGV)
2255 Result |= BaseGVField;
2256 if (BaseOffs != other.BaseOffs)
2257 Result |= BaseOffsField;
2258 if (ScaledReg != other.ScaledReg)
2259 Result |= ScaledRegField;
2260 // Don't count 0 as being a different scale, because that actually means
2261 // unscaled (which will already be counted by having no ScaledReg).
2262 if (Scale && other.Scale && Scale != other.Scale)
2263 Result |= ScaleField;
2264
2265 if (countPopulation(Result) > 1)
2266 return MultipleFields;
2267 else
2268 return static_cast<FieldName>(Result);
2269 }
2270
2271 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2272 // with no offset.
2273 bool isTrivial() {
2274 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2275 // trivial if at most one of these terms is nonzero, except that BaseGV and
2276 // BaseReg both being zero actually means a null pointer value, which we
2277 // consider to be 'non-zero' here.
2278 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2279 }
2280
2281 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2282 switch (Field) {
2283 default:
2284 return nullptr;
2285 case BaseRegField:
2286 return BaseReg;
2287 case BaseGVField:
2288 return BaseGV;
2289 case ScaledRegField:
2290 return ScaledReg;
2291 case BaseOffsField:
2292 return ConstantInt::get(IntPtrTy, BaseOffs);
2293 }
2294 }
2295
2296 void SetCombinedField(FieldName Field, Value *V,
2297 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2298 switch (Field) {
2299 default:
2300 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2300)
;
2301 break;
2302 case ExtAddrMode::BaseRegField:
2303 BaseReg = V;
2304 break;
2305 case ExtAddrMode::BaseGVField:
2306 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2307 // in the BaseReg field.
2308 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2308, __PRETTY_FUNCTION__))
;
2309 BaseReg = V;
2310 BaseGV = nullptr;
2311 break;
2312 case ExtAddrMode::ScaledRegField:
2313 ScaledReg = V;
2314 // If we have a mix of scaled and unscaled addrmodes then we want scale
2315 // to be the scale and not zero.
2316 if (!Scale)
2317 for (const ExtAddrMode &AM : AddrModes)
2318 if (AM.Scale) {
2319 Scale = AM.Scale;
2320 break;
2321 }
2322 break;
2323 case ExtAddrMode::BaseOffsField:
2324 // The offset is no longer a constant, so it goes in ScaledReg with a
2325 // scale of 1.
2326 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2326, __PRETTY_FUNCTION__))
;
2327 ScaledReg = V;
2328 Scale = 1;
2329 BaseOffs = 0;
2330 break;
2331 }
2332 }
2333};
2334
2335} // end anonymous namespace
2336
2337#ifndef NDEBUG
2338static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2339 AM.print(OS);
2340 return OS;
2341}
2342#endif
2343
2344#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2345void ExtAddrMode::print(raw_ostream &OS) const {
2346 bool NeedPlus = false;
2347 OS << "[";
2348 if (InBounds)
2349 OS << "inbounds ";
2350 if (BaseGV) {
2351 OS << (NeedPlus ? " + " : "")
2352 << "GV:";
2353 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2354 NeedPlus = true;
2355 }
2356
2357 if (BaseOffs) {
2358 OS << (NeedPlus ? " + " : "")
2359 << BaseOffs;
2360 NeedPlus = true;
2361 }
2362
2363 if (BaseReg) {
2364 OS << (NeedPlus ? " + " : "")
2365 << "Base:";
2366 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2367 NeedPlus = true;
2368 }
2369 if (Scale) {
2370 OS << (NeedPlus ? " + " : "")
2371 << Scale << "*";
2372 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2373 }
2374
2375 OS << ']';
2376}
2377
2378LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2379 print(dbgs());
2380 dbgs() << '\n';
2381}
2382#endif
2383
2384namespace {
2385
2386/// This class provides transaction based operation on the IR.
2387/// Every change made through this class is recorded in the internal state and
2388/// can be undone (rollback) until commit is called.
2389class TypePromotionTransaction {
2390 /// This represents the common interface of the individual transaction.
2391 /// Each class implements the logic for doing one specific modification on
2392 /// the IR via the TypePromotionTransaction.
2393 class TypePromotionAction {
2394 protected:
2395 /// The Instruction modified.
2396 Instruction *Inst;
2397
2398 public:
2399 /// Constructor of the action.
2400 /// The constructor performs the related action on the IR.
2401 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2402
2403 virtual ~TypePromotionAction() = default;
2404
2405 /// Undo the modification done by this action.
2406 /// When this method is called, the IR must be in the same state as it was
2407 /// before this action was applied.
2408 /// \pre Undoing the action works if and only if the IR is in the exact same
2409 /// state as it was directly after this action was applied.
2410 virtual void undo() = 0;
2411
2412 /// Advocate every change made by this action.
2413 /// When the results on the IR of the action are to be kept, it is important
2414 /// to call this function, otherwise hidden information may be kept forever.
2415 virtual void commit() {
2416 // Nothing to be done, this action is not doing anything.
2417 }
2418 };
2419
2420 /// Utility to remember the position of an instruction.
2421 class InsertionHandler {
2422 /// Position of an instruction.
2423 /// Either an instruction:
2424 /// - Is the first in a basic block: BB is used.
2425 /// - Has a previous instruction: PrevInst is used.
2426 union {
2427 Instruction *PrevInst;
2428 BasicBlock *BB;
2429 } Point;
2430
2431 /// Remember whether or not the instruction had a previous instruction.
2432 bool HasPrevInstruction;
2433
2434 public:
2435 /// Record the position of \p Inst.
2436 InsertionHandler(Instruction *Inst) {
2437 BasicBlock::iterator It = Inst->getIterator();
2438 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2439 if (HasPrevInstruction)
2440 Point.PrevInst = &*--It;
2441 else
2442 Point.BB = Inst->getParent();
2443 }
2444
2445 /// Insert \p Inst at the recorded position.
2446 void insert(Instruction *Inst) {
2447 if (HasPrevInstruction) {
2448 if (Inst->getParent())
2449 Inst->removeFromParent();
2450 Inst->insertAfter(Point.PrevInst);
2451 } else {
2452 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2453 if (Inst->getParent())
2454 Inst->moveBefore(Position);
2455 else
2456 Inst->insertBefore(Position);
2457 }
2458 }
2459 };
2460
2461 /// Move an instruction before another.
2462 class InstructionMoveBefore : public TypePromotionAction {
2463 /// Original position of the instruction.
2464 InsertionHandler Position;
2465
2466 public:
2467 /// Move \p Inst before \p Before.
2468 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2469 : TypePromotionAction(Inst), Position(Inst) {
2470 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2471 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2472 Inst->moveBefore(Before);
2473 }
2474
2475 /// Move the instruction back to its original position.
2476 void undo() override {
2477 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2478 Position.insert(Inst);
2479 }
2480 };
2481
2482 /// Set the operand of an instruction with a new value.
2483 class OperandSetter : public TypePromotionAction {
2484 /// Original operand of the instruction.
2485 Value *Origin;
2486
2487 /// Index of the modified instruction.
2488 unsigned Idx;
2489
2490 public:
2491 /// Set \p Idx operand of \p Inst with \p NewVal.
2492 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2493 : TypePromotionAction(Inst), Idx(Idx) {
2494 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2495 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2496 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2497 Origin = Inst->getOperand(Idx);
2498 Inst->setOperand(Idx, NewVal);
2499 }
2500
2501 /// Restore the original value of the instruction.
2502 void undo() override {
2503 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2504 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2505 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2506 Inst->setOperand(Idx, Origin);
2507 }
2508 };
2509
2510 /// Hide the operands of an instruction.
2511 /// Do as if this instruction was not using any of its operands.
2512 class OperandsHider : public TypePromotionAction {
2513 /// The list of original operands.
2514 SmallVector<Value *, 4> OriginalValues;
2515
2516 public:
2517 /// Remove \p Inst from the uses of the operands of \p Inst.
2518 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2519 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2520 unsigned NumOpnds = Inst->getNumOperands();
2521 OriginalValues.reserve(NumOpnds);
2522 for (unsigned It = 0; It < NumOpnds; ++It) {
2523 // Save the current operand.
2524 Value *Val = Inst->getOperand(It);
2525 OriginalValues.push_back(Val);
2526 // Set a dummy one.
2527 // We could use OperandSetter here, but that would imply an overhead
2528 // that we are not willing to pay.
2529 Inst->setOperand(It, UndefValue::get(Val->getType()));
2530 }
2531 }
2532
2533 /// Restore the original list of uses.
2534 void undo() override {
2535 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2536 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2537 Inst->setOperand(It, OriginalValues[It]);
2538 }
2539 };
2540
2541 /// Build a truncate instruction.
2542 class TruncBuilder : public TypePromotionAction {
2543 Value *Val;
2544
2545 public:
2546 /// Build a truncate instruction of \p Opnd producing a \p Ty
2547 /// result.
2548 /// trunc Opnd to Ty.
2549 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2550 IRBuilder<> Builder(Opnd);
2551 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2552 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2553 }
2554
2555 /// Get the built value.
2556 Value *getBuiltValue() { return Val; }
2557
2558 /// Remove the built instruction.
2559 void undo() override {
2560 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2561 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2562 IVal->eraseFromParent();
2563 }
2564 };
2565
2566 /// Build a sign extension instruction.
2567 class SExtBuilder : public TypePromotionAction {
2568 Value *Val;
2569
2570 public:
2571 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2572 /// result.
2573 /// sext Opnd to Ty.
2574 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2575 : TypePromotionAction(InsertPt) {
2576 IRBuilder<> Builder(InsertPt);
2577 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2578 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2579 }
2580
2581 /// Get the built value.
2582 Value *getBuiltValue() { return Val; }
2583
2584 /// Remove the built instruction.
2585 void undo() override {
2586 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2587 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2588 IVal->eraseFromParent();
2589 }
2590 };
2591
2592 /// Build a zero extension instruction.
2593 class ZExtBuilder : public TypePromotionAction {
2594 Value *Val;
2595
2596 public:
2597 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2598 /// result.
2599 /// zext Opnd to Ty.
2600 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2601 : TypePromotionAction(InsertPt) {
2602 IRBuilder<> Builder(InsertPt);
2603 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2604 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2605 }
2606
2607 /// Get the built value.
2608 Value *getBuiltValue() { return Val; }
2609
2610 /// Remove the built instruction.
2611 void undo() override {
2612 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2613 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2614 IVal->eraseFromParent();
2615 }
2616 };
2617
2618 /// Mutate an instruction to another type.
2619 class TypeMutator : public TypePromotionAction {
2620 /// Record the original type.
2621 Type *OrigTy;
2622
2623 public:
2624 /// Mutate the type of \p Inst into \p NewTy.
2625 TypeMutator(Instruction *Inst, Type *NewTy)
2626 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2627 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2628 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2629 Inst->mutateType(NewTy);
2630 }
2631
2632 /// Mutate the instruction back to its original type.
2633 void undo() override {
2634 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2635 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2636 Inst->mutateType(OrigTy);
2637 }
2638 };
2639
2640 /// Replace the uses of an instruction by another instruction.
2641 class UsesReplacer : public TypePromotionAction {
2642 /// Helper structure to keep track of the replaced uses.
2643 struct InstructionAndIdx {
2644 /// The instruction using the instruction.
2645 Instruction *Inst;
2646
2647 /// The index where this instruction is used for Inst.
2648 unsigned Idx;
2649
2650 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2651 : Inst(Inst), Idx(Idx) {}
2652 };
2653
2654 /// Keep track of the original uses (pair Instruction, Index).
2655 SmallVector<InstructionAndIdx, 4> OriginalUses;
2656 /// Keep track of the debug users.
2657 SmallVector<DbgValueInst *, 1> DbgValues;
2658
2659 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2660
2661 public:
2662 /// Replace all the use of \p Inst by \p New.
2663 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2664 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2665 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2666 // Record the original uses.
2667 for (Use &U : Inst->uses()) {
2668 Instruction *UserI = cast<Instruction>(U.getUser());
2669 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2670 }
2671 // Record the debug uses separately. They are not in the instruction's
2672 // use list, but they are replaced by RAUW.
2673 findDbgValues(DbgValues, Inst);
2674
2675 // Now, we can replace the uses.
2676 Inst->replaceAllUsesWith(New);
2677 }
2678
2679 /// Reassign the original uses of Inst to Inst.
2680 void undo() override {
2681 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2682 for (use_iterator UseIt = OriginalUses.begin(),
2683 EndIt = OriginalUses.end();
2684 UseIt != EndIt; ++UseIt) {
2685 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2686 }
2687 // RAUW has replaced all original uses with references to the new value,
2688 // including the debug uses. Since we are undoing the replacements,
2689 // the original debug uses must also be reinstated to maintain the
2690 // correctness and utility of debug value instructions.
2691 for (auto *DVI: DbgValues) {
2692 LLVMContext &Ctx = Inst->getType()->getContext();
2693 auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2694 DVI->setOperand(0, MV);
2695 }
2696 }
2697 };
2698
2699 /// Remove an instruction from the IR.
2700 class InstructionRemover : public TypePromotionAction {
2701 /// Original position of the instruction.
2702 InsertionHandler Inserter;
2703
2704 /// Helper structure to hide all the link to the instruction. In other
2705 /// words, this helps to do as if the instruction was removed.
2706 OperandsHider Hider;
2707
2708 /// Keep track of the uses replaced, if any.
2709 UsesReplacer *Replacer = nullptr;
2710
2711 /// Keep track of instructions removed.
2712 SetOfInstrs &RemovedInsts;
2713
2714 public:
2715 /// Remove all reference of \p Inst and optionally replace all its
2716 /// uses with New.
2717 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2718 /// \pre If !Inst->use_empty(), then New != nullptr
2719 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2720 Value *New = nullptr)
2721 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2722 RemovedInsts(RemovedInsts) {
2723 if (New)
2724 Replacer = new UsesReplacer(Inst, New);
2725 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2726 RemovedInsts.insert(Inst);
2727 /// The instructions removed here will be freed after completing
2728 /// optimizeBlock() for all blocks as we need to keep track of the
2729 /// removed instructions during promotion.
2730 Inst->removeFromParent();
2731 }
2732
2733 ~InstructionRemover() override { delete Replacer; }
2734
2735 /// Resurrect the instruction and reassign it to the proper uses if
2736 /// new value was provided when build this action.
2737 void undo() override {
2738 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2739 Inserter.insert(Inst);
2740 if (Replacer)
2741 Replacer->undo();
2742 Hider.undo();
2743 RemovedInsts.erase(Inst);
2744 }
2745 };
2746
2747public:
2748 /// Restoration point.
2749 /// The restoration point is a pointer to an action instead of an iterator
2750 /// because the iterator may be invalidated but not the pointer.
2751 using ConstRestorationPt = const TypePromotionAction *;
2752
2753 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2754 : RemovedInsts(RemovedInsts) {}
2755
2756 /// Advocate every changes made in that transaction.
2757 void commit();
2758
2759 /// Undo all the changes made after the given point.
2760 void rollback(ConstRestorationPt Point);
2761
2762 /// Get the current restoration point.
2763 ConstRestorationPt getRestorationPoint() const;
2764
2765 /// \name API for IR modification with state keeping to support rollback.
2766 /// @{
2767 /// Same as Instruction::setOperand.
2768 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2769
2770 /// Same as Instruction::eraseFromParent.
2771 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2772
2773 /// Same as Value::replaceAllUsesWith.
2774 void replaceAllUsesWith(Instruction *Inst, Value *New);
2775
2776 /// Same as Value::mutateType.
2777 void mutateType(Instruction *Inst, Type *NewTy);
2778
2779 /// Same as IRBuilder::createTrunc.
2780 Value *createTrunc(Instruction *Opnd, Type *Ty);
2781
2782 /// Same as IRBuilder::createSExt.
2783 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2784
2785 /// Same as IRBuilder::createZExt.
2786 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2787
2788 /// Same as Instruction::moveBefore.
2789 void moveBefore(Instruction *Inst, Instruction *Before);
2790 /// @}
2791
2792private:
2793 /// The ordered list of actions made so far.
2794 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2795
2796 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2797
2798 SetOfInstrs &RemovedInsts;
2799};
2800
2801} // end anonymous namespace
2802
2803void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2804 Value *NewVal) {
2805 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
2806 Inst, Idx, NewVal));
2807}
2808
2809void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2810 Value *NewVal) {
2811 Actions.push_back(
2812 std::make_unique<TypePromotionTransaction::InstructionRemover>(
2813 Inst, RemovedInsts, NewVal));
2814}
2815
2816void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2817 Value *New) {
2818 Actions.push_back(
2819 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2820}
2821
2822void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2823 Actions.push_back(
2824 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2825}
2826
2827Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2828 Type *Ty) {
2829 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2830 Value *Val = Ptr->getBuiltValue();
2831 Actions.push_back(std::move(Ptr));
2832 return Val;
2833}
2834
2835Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2836 Value *Opnd, Type *Ty) {
2837 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2838 Value *Val = Ptr->getBuiltValue();
2839 Actions.push_back(std::move(Ptr));
2840 return Val;
2841}
2842
2843Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2844 Value *Opnd, Type *Ty) {
2845 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2846 Value *Val = Ptr->getBuiltValue();
2847 Actions.push_back(std::move(Ptr));
2848 return Val;
2849}
2850
2851void TypePromotionTransaction::moveBefore(Instruction *Inst,
2852 Instruction *Before) {
2853 Actions.push_back(
2854 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2855 Inst, Before));
2856}
2857
2858TypePromotionTransaction::ConstRestorationPt
2859TypePromotionTransaction::getRestorationPoint() const {
2860 return !Actions.empty() ? Actions.back().get() : nullptr;
2861}
2862
2863void TypePromotionTransaction::commit() {
2864 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2865 ++It)
2866 (*It)->commit();
2867 Actions.clear();
2868}
2869
2870void TypePromotionTransaction::rollback(
2871 TypePromotionTransaction::ConstRestorationPt Point) {
2872 while (!Actions.empty() && Point != Actions.back().get()) {
2873 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2874 Curr->undo();
2875 }
2876}
2877
2878namespace {
2879
2880/// A helper class for matching addressing modes.
2881///
2882/// This encapsulates the logic for matching the target-legal addressing modes.
2883class AddressingModeMatcher {
2884 SmallVectorImpl<Instruction*> &AddrModeInsts;
2885 const TargetLowering &TLI;
2886 const TargetRegisterInfo &TRI;
2887 const DataLayout &DL;
2888
2889 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2890 /// the memory instruction that we're computing this address for.
2891 Type *AccessTy;
2892 unsigned AddrSpace;
2893 Instruction *MemoryInst;
2894
2895 /// This is the addressing mode that we're building up. This is
2896 /// part of the return value of this addressing mode matching stuff.
2897 ExtAddrMode &AddrMode;
2898
2899 /// The instructions inserted by other CodeGenPrepare optimizations.
2900 const SetOfInstrs &InsertedInsts;
2901
2902 /// A map from the instructions to their type before promotion.
2903 InstrToOrigTy &PromotedInsts;
2904
2905 /// The ongoing transaction where every action should be registered.
2906 TypePromotionTransaction &TPT;
2907
2908 // A GEP which has too large offset to be folded into the addressing mode.
2909 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2910
2911 /// This is set to true when we should not do profitability checks.
2912 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2913 bool IgnoreProfitability;
2914
2915 /// True if we are optimizing for size.
2916 bool OptSize;
2917
2918 ProfileSummaryInfo *PSI;
2919 BlockFrequencyInfo *BFI;
2920
2921 AddressingModeMatcher(
2922 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2923 const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2924 ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2925 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2926 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
2927 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
2928 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2929 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2930 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2931 PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
2932 OptSize(OptSize), PSI(PSI), BFI(BFI) {
2933 IgnoreProfitability = false;
2934 }
2935
2936public:
2937 /// Find the maximal addressing mode that a load/store of V can fold,
2938 /// give an access type of AccessTy. This returns a list of involved
2939 /// instructions in AddrModeInsts.
2940 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2941 /// optimizations.
2942 /// \p PromotedInsts maps the instructions to their type before promotion.
2943 /// \p The ongoing transaction where every action should be registered.
2944 static ExtAddrMode
2945 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2946 SmallVectorImpl<Instruction *> &AddrModeInsts,
2947 const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2948 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2949 TypePromotionTransaction &TPT,
2950 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
2951 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
2952 ExtAddrMode Result;
2953
2954 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2955 MemoryInst, Result, InsertedInsts,
2956 PromotedInsts, TPT, LargeOffsetGEP,
2957 OptSize, PSI, BFI)
2958 .matchAddr(V, 0);
2959 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2959, __PRETTY_FUNCTION__))
;
2960 return Result;
2961 }
2962
2963private:
2964 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2965 bool matchAddr(Value *Addr, unsigned Depth);
2966 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2967 bool *MovedAway = nullptr);
2968 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2969 ExtAddrMode &AMBefore,
2970 ExtAddrMode &AMAfter);
2971 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2972 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2973 Value *PromotedOperand) const;
2974};
2975
2976class PhiNodeSet;
2977
2978/// An iterator for PhiNodeSet.
2979class PhiNodeSetIterator {
2980 PhiNodeSet * const Set;
2981 size_t CurrentIndex = 0;
2982
2983public:
2984 /// The constructor. Start should point to either a valid element, or be equal
2985 /// to the size of the underlying SmallVector of the PhiNodeSet.
2986 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2987 PHINode * operator*() const;
2988 PhiNodeSetIterator& operator++();
2989 bool operator==(const PhiNodeSetIterator &RHS) const;
2990 bool operator!=(const PhiNodeSetIterator &RHS) const;
2991};
2992
2993/// Keeps a set of PHINodes.
2994///
2995/// This is a minimal set implementation for a specific use case:
2996/// It is very fast when there are very few elements, but also provides good
2997/// performance when there are many. It is similar to SmallPtrSet, but also
2998/// provides iteration by insertion order, which is deterministic and stable
2999/// across runs. It is also similar to SmallSetVector, but provides removing
3000/// elements in O(1) time. This is achieved by not actually removing the element
3001/// from the underlying vector, so comes at the cost of using more memory, but
3002/// that is fine, since PhiNodeSets are used as short lived objects.
3003class PhiNodeSet {
3004 friend class PhiNodeSetIterator;
3005
3006 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3007 using iterator = PhiNodeSetIterator;
3008
3009 /// Keeps the elements in the order of their insertion in the underlying
3010 /// vector. To achieve constant time removal, it never deletes any element.
3011 SmallVector<PHINode *, 32> NodeList;
3012
3013 /// Keeps the elements in the underlying set implementation. This (and not the
3014 /// NodeList defined above) is the source of truth on whether an element
3015 /// is actually in the collection.
3016 MapType NodeMap;
3017
3018 /// Points to the first valid (not deleted) element when the set is not empty
3019 /// and the value is not zero. Equals to the size of the underlying vector
3020 /// when the set is empty. When the value is 0, as in the beginning, the
3021 /// first element may or may not be valid.
3022 size_t FirstValidElement = 0;
3023
3024public:
3025 /// Inserts a new element to the collection.
3026 /// \returns true if the element is actually added, i.e. was not in the
3027 /// collection before the operation.
3028 bool insert(PHINode *Ptr) {
3029 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3030 NodeList.push_back(Ptr);
3031 return true;
3032 }
3033 return false;
3034 }
3035
3036 /// Removes the element from the collection.
3037 /// \returns whether the element is actually removed, i.e. was in the
3038 /// collection before the operation.
3039 bool erase(PHINode *Ptr) {
3040 auto it = NodeMap.find(Ptr);
3041 if (it != NodeMap.end()) {
3042 NodeMap.erase(Ptr);
3043 SkipRemovedElements(FirstValidElement);
3044 return true;
3045 }
3046 return false;
3047 }
3048
3049 /// Removes all elements and clears the collection.
3050 void clear() {
3051 NodeMap.clear();
3052 NodeList.clear();
3053 FirstValidElement = 0;
3054 }
3055
3056 /// \returns an iterator that will iterate the elements in the order of
3057 /// insertion.
3058 iterator begin() {
3059 if (FirstValidElement == 0)
3060 SkipRemovedElements(FirstValidElement);
3061 return PhiNodeSetIterator(this, FirstValidElement);
3062 }
3063
3064 /// \returns an iterator that points to the end of the collection.
3065 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3066
3067 /// Returns the number of elements in the collection.
3068 size_t size() const {
3069 return NodeMap.size();
3070 }
3071
3072 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3073 size_t count(PHINode *Ptr) const {
3074 return NodeMap.count(Ptr);
3075 }
3076
3077private:
3078 /// Updates the CurrentIndex so that it will point to a valid element.
3079 ///
3080 /// If the element of NodeList at CurrentIndex is valid, it does not
3081 /// change it. If there are no more valid elements, it updates CurrentIndex
3082 /// to point to the end of the NodeList.
3083 void SkipRemovedElements(size_t &CurrentIndex) {
3084 while (CurrentIndex < NodeList.size()) {
3085 auto it = NodeMap.find(NodeList[CurrentIndex]);
3086 // If the element has been deleted and added again later, NodeMap will
3087 // point to a different index, so CurrentIndex will still be invalid.
3088 if (it != NodeMap.end() && it->second == CurrentIndex)
3089 break;
3090 ++CurrentIndex;
3091 }
3092 }
3093};
3094
3095PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3096 : Set(Set), CurrentIndex(Start) {}
3097
3098PHINode * PhiNodeSetIterator::operator*() const {
3099 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3100, __PRETTY_FUNCTION__))
3100 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3100, __PRETTY_FUNCTION__))
;
3101 return Set->NodeList[CurrentIndex];
3102}
3103
3104PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
3105 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3106, __PRETTY_FUNCTION__))
3106 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3106, __PRETTY_FUNCTION__))
;
3107 ++CurrentIndex;
3108 Set->SkipRemovedElements(CurrentIndex);
3109 return *this;
3110}
3111
3112bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3113 return CurrentIndex == RHS.CurrentIndex;
3114}
3115
3116bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3117 return !((*this) == RHS);
3118}
3119
3120/// Keep track of simplification of Phi nodes.
3121/// Accept the set of all phi nodes and erase phi node from this set
3122/// if it is simplified.
3123class SimplificationTracker {
3124 DenseMap<Value *, Value *> Storage;
3125 const SimplifyQuery &SQ;
3126 // Tracks newly created Phi nodes. The elements are iterated by insertion
3127 // order.
3128 PhiNodeSet AllPhiNodes;
3129 // Tracks newly created Select nodes.
3130 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3131
3132public:
3133 SimplificationTracker(const SimplifyQuery &sq)
3134 : SQ(sq) {}
3135
3136 Value *Get(Value *V) {
3137 do {
3138 auto SV = Storage.find(V);
3139 if (SV == Storage.end())
3140 return V;
3141 V = SV->second;
3142 } while (true);
3143 }
3144
3145 Value *Simplify(Value *Val) {
3146 SmallVector<Value *, 32> WorkList;
3147 SmallPtrSet<Value *, 32> Visited;
3148 WorkList.push_back(Val);
3149 while (!WorkList.empty()) {
3150 auto P = WorkList.pop_back_val();
3151 if (!Visited.insert(P).second)
3152 continue;
3153 if (auto *PI = dyn_cast<Instruction>(P))
3154 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3155 for (auto *U : PI->users())
3156 WorkList.push_back(cast<Value>(U));
3157 Put(PI, V);
3158 PI->replaceAllUsesWith(V);
3159 if (auto *PHI = dyn_cast<PHINode>(PI))
3160 AllPhiNodes.erase(PHI);
3161 if (auto *Select = dyn_cast<SelectInst>(PI))
3162 AllSelectNodes.erase(Select);
3163 PI->eraseFromParent();
3164 }
3165 }
3166 return Get(Val);
3167 }
3168
3169 void Put(Value *From, Value *To) {
3170 Storage.insert({ From, To });
3171 }
3172
3173 void ReplacePhi(PHINode *From, PHINode *To) {
3174 Value* OldReplacement = Get(From);
3175 while (OldReplacement != From) {
34
Assuming 'OldReplacement' is not equal to 'From'
35
Loop condition is true. Entering loop body
38
Assuming 'OldReplacement' is not equal to 'From'
39
Loop condition is true. Entering loop body
42
Assuming 'OldReplacement' is equal to 'From'
43
Loop condition is false. Execution continues on line 3180
3176 From = To;
40
Null pointer value stored to 'From'
3177 To = dyn_cast<PHINode>(OldReplacement);
36
Assuming 'OldReplacement' is not a 'PHINode'
37
Null pointer value stored to 'To'
41
Assuming 'OldReplacement' is a 'PHINode'
3178 OldReplacement = Get(From);
3179 }
3180 assert(To && Get(To) == To && "Replacement PHI node is already replaced.")((To && Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("To && Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3180, __PRETTY_FUNCTION__))
;
44
Assuming the condition is true
45
'?' condition is true
3181 Put(From, To);
3182 From->replaceAllUsesWith(To);
46
Called C++ object pointer is null
3183 AllPhiNodes.erase(From);
3184 From->eraseFromParent();
3185 }
3186
3187 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3188
3189 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3190
3191 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3192
3193 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3194
3195 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3196
3197 void destroyNewNodes(Type *CommonType) {
3198 // For safe erasing, replace the uses with dummy value first.
3199 auto Dummy = UndefValue::get(CommonType);
3200 for (auto I : AllPhiNodes) {
3201 I->replaceAllUsesWith(Dummy);
3202 I->eraseFromParent();
3203 }
3204 AllPhiNodes.clear();
3205 for (auto I : AllSelectNodes) {
3206 I->replaceAllUsesWith(Dummy);
3207 I->eraseFromParent();
3208 }
3209 AllSelectNodes.clear();
3210 }
3211};
3212
3213/// A helper class for combining addressing modes.
3214class AddressingModeCombiner {
3215 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3216 typedef std::pair<PHINode *, PHINode *> PHIPair;
3217
3218private:
3219 /// The addressing modes we've collected.
3220 SmallVector<ExtAddrMode, 16> AddrModes;
3221
3222 /// The field in which the AddrModes differ, when we have more than one.
3223 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3224
3225 /// Are the AddrModes that we have all just equal to their original values?
3226 bool AllAddrModesTrivial = true;
3227
3228 /// Common Type for all different fields in addressing modes.
3229 Type *CommonType;
3230
3231 /// SimplifyQuery for simplifyInstruction utility.
3232 const SimplifyQuery &SQ;
3233
3234 /// Original Address.
3235 Value *Original;
3236
3237public:
3238 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3239 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3240
3241 /// Get the combined AddrMode
3242 const ExtAddrMode &getAddrMode() const {
3243 return AddrModes[0];
3244 }
3245
3246 /// Add a new AddrMode if it's compatible with the AddrModes we already
3247 /// have.
3248 /// \return True iff we succeeded in doing so.
3249 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3250 // Take note of if we have any non-trivial AddrModes, as we need to detect
3251 // when all AddrModes are trivial as then we would introduce a phi or select
3252 // which just duplicates what's already there.
3253 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3254
3255 // If this is the first addrmode then everything is fine.
3256 if (AddrModes.empty()) {
3257 AddrModes.emplace_back(NewAddrMode);
3258 return true;
3259 }
3260
3261 // Figure out how different this is from the other address modes, which we
3262 // can do just by comparing against the first one given that we only care
3263 // about the cumulative difference.
3264 ExtAddrMode::FieldName ThisDifferentField =
3265 AddrModes[0].compare(NewAddrMode);
3266 if (DifferentField == ExtAddrMode::NoField)
3267 DifferentField = ThisDifferentField;
3268 else if (DifferentField != ThisDifferentField)
3269 DifferentField = ExtAddrMode::MultipleFields;
3270
3271 // If NewAddrMode differs in more than one dimension we cannot handle it.
3272 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3273
3274 // If Scale Field is different then we reject.
3275 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3276
3277 // We also must reject the case when base offset is different and
3278 // scale reg is not null, we cannot handle this case due to merge of
3279 // different offsets will be used as ScaleReg.
3280 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3281 !NewAddrMode.ScaledReg);
3282
3283 // We also must reject the case when GV is different and BaseReg installed
3284 // due to we want to use base reg as a merge of GV values.
3285 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3286 !NewAddrMode.HasBaseReg);
3287
3288 // Even if NewAddMode is the same we still need to collect it due to
3289 // original value is different. And later we will need all original values
3290 // as anchors during finding the common Phi node.
3291 if (CanHandle)
3292 AddrModes.emplace_back(NewAddrMode);
3293 else
3294 AddrModes.clear();
3295
3296 return CanHandle;
3297 }
3298
3299 /// Combine the addressing modes we've collected into a single
3300 /// addressing mode.
3301 /// \return True iff we successfully combined them or we only had one so
3302 /// didn't need to combine them anyway.
3303 bool combineAddrModes() {
3304 // If we have no AddrModes then they can't be combined.
3305 if (AddrModes.size() == 0)
12
Assuming the condition is false
13
Taking false branch
3306 return false;
3307
3308 // A single AddrMode can trivially be combined.
3309 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
14
Assuming the condition is false
15
Assuming field 'DifferentField' is not equal to NoField
16
Taking false branch
3310 return true;
3311
3312 // If the AddrModes we collected are all just equal to the value they are
3313 // derived from then combining them wouldn't do anything useful.
3314 if (AllAddrModesTrivial)
17
Assuming field 'AllAddrModesTrivial' is false
18
Taking false branch
3315 return false;
3316
3317 if (!addrModeCombiningAllowed())
19
Assuming the condition is false
20
Taking false branch
3318 return false;
3319
3320 // Build a map between <original value, basic block where we saw it> to
3321 // value of base register.
3322 // Bail out if there is no common type.
3323 FoldAddrToValueMapping Map;
3324 if (!initializeMap(Map))
21
Taking false branch
3325 return false;
3326
3327 Value *CommonValue = findCommon(Map);
22
Calling 'AddressingModeCombiner::findCommon'
3328 if (CommonValue)
3329 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3330 return CommonValue != nullptr;
3331 }
3332
3333private:
3334 /// Initialize Map with anchor values. For address seen
3335 /// we set the value of different field saw in this address.
3336 /// At the same time we find a common type for different field we will
3337 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3338 /// Return false if there is no common type found.
3339 bool initializeMap(FoldAddrToValueMapping &Map) {
3340 // Keep track of keys where the value is null. We will need to replace it
3341 // with constant null when we know the common type.
3342 SmallVector<Value *, 2> NullValue;
3343 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3344 for (auto &AM : AddrModes) {
3345 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3346 if (DV) {
3347 auto *Type = DV->getType();
3348 if (CommonType && CommonType != Type)
3349 return false;
3350 CommonType = Type;
3351 Map[AM.OriginalValue] = DV;
3352 } else {
3353 NullValue.push_back(AM.OriginalValue);
3354 }
3355 }
3356 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3356, __PRETTY_FUNCTION__))
;
3357 for (auto *V : NullValue)
3358 Map[V] = Constant::getNullValue(CommonType);
3359 return true;
3360 }
3361
3362 /// We have mapping between value A and other value B where B was a field in
3363 /// addressing mode represented by A. Also we have an original value C
3364 /// representing an address we start with. Traversing from C through phi and
3365 /// selects we ended up with A's in a map. This utility function tries to find
3366 /// a value V which is a field in addressing mode C and traversing through phi
3367 /// nodes and selects we will end up in corresponded values B in a map.
3368 /// The utility will create a new Phi/Selects if needed.
3369 // The simple example looks as follows:
3370 // BB1:
3371 // p1 = b1 + 40
3372 // br cond BB2, BB3
3373 // BB2:
3374 // p2 = b2 + 40
3375 // br BB3
3376 // BB3:
3377 // p = phi [p1, BB1], [p2, BB2]
3378 // v = load p
3379 // Map is
3380 // p1 -> b1
3381 // p2 -> b2
3382 // Request is
3383 // p -> ?
3384 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3385 Value *findCommon(FoldAddrToValueMapping &Map) {
3386 // Tracks the simplification of newly created phi nodes. The reason we use
3387 // this mapping is because we will add new created Phi nodes in AddrToBase.
3388 // Simplification of Phi nodes is recursive, so some Phi node may
3389 // be simplified after we added it to AddrToBase. In reality this
3390 // simplification is possible only if original phi/selects were not
3391 // simplified yet.
3392 // Using this mapping we can find the current value in AddrToBase.
3393 SimplificationTracker ST(SQ);
3394
3395 // First step, DFS to create PHI nodes for all intermediate blocks.
3396 // Also fill traverse order for the second step.
3397 SmallVector<Value *, 32> TraverseOrder;
3398 InsertPlaceholders(Map, TraverseOrder, ST);
3399
3400 // Second Step, fill new nodes by merged values and simplify if possible.
3401 FillPlaceholders(Map, TraverseOrder, ST);
3402
3403 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
23
Assuming the condition is false
3404 ST.destroyNewNodes(CommonType);
3405 return nullptr;
3406 }
3407
3408 // Now we'd like to match New Phi nodes to existed ones.
3409 unsigned PhiNotMatchedCount = 0;
3410 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
24
Calling 'AddressingModeCombiner::MatchPhiSet'
3411 ST.destroyNewNodes(CommonType);
3412 return nullptr;
3413 }
3414
3415 auto *Result = ST.Get(Map.find(Original)->second);
3416 if (Result) {
3417 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3418 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3419 }
3420 return Result;
3421 }
3422
3423 /// Try to match PHI node to Candidate.
3424 /// Matcher tracks the matched Phi nodes.
3425 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3426 SmallSetVector<PHIPair, 8> &Matcher,
3427 PhiNodeSet &PhiNodesToMatch) {
3428 SmallVector<PHIPair, 8> WorkList;
3429 Matcher.insert({ PHI, Candidate });
3430 SmallSet<PHINode *, 8> MatchedPHIs;
3431 MatchedPHIs.insert(PHI);
3432 WorkList.push_back({ PHI, Candidate });
3433 SmallSet<PHIPair, 8> Visited;
3434 while (!WorkList.empty()) {
3435 auto Item = WorkList.pop_back_val();
3436 if (!Visited.insert(Item).second)
3437 continue;
3438 // We iterate over all incoming values to Phi to compare them.
3439 // If values are different and both of them Phi and the first one is a
3440 // Phi we added (subject to match) and both of them is in the same basic
3441 // block then we can match our pair if values match. So we state that
3442 // these values match and add it to work list to verify that.
3443 for (auto B : Item.first->blocks()) {
3444 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3445 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3446 if (FirstValue == SecondValue)
3447 continue;
3448
3449 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3450 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3451
3452 // One of them is not Phi or
3453 // The first one is not Phi node from the set we'd like to match or
3454 // Phi nodes from different basic blocks then
3455 // we will not be able to match.
3456 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3457 FirstPhi->getParent() != SecondPhi->getParent())
3458 return false;
3459
3460 // If we already matched them then continue.
3461 if (Matcher.count({ FirstPhi, SecondPhi }))
3462 continue;
3463 // So the values are different and does not match. So we need them to
3464 // match. (But we register no more than one match per PHI node, so that
3465 // we won't later try to replace them twice.)
3466 if (MatchedPHIs.insert(FirstPhi).second)
3467 Matcher.insert({ FirstPhi, SecondPhi });
3468 // But me must check it.
3469 WorkList.push_back({ FirstPhi, SecondPhi });
3470 }
3471 }
3472 return true;
3473 }
3474
3475 /// For the given set of PHI nodes (in the SimplificationTracker) try
3476 /// to find their equivalents.
3477 /// Returns false if this matching fails and creation of new Phi is disabled.
3478 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3479 unsigned &PhiNotMatchedCount) {
3480 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3481 // order, so the replacements (ReplacePhi) are also done in a deterministic
3482 // order.
3483 SmallSetVector<PHIPair, 8> Matched;
3484 SmallPtrSet<PHINode *, 8> WillNotMatch;
3485 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3486 while (PhiNodesToMatch.size()) {
25
Loop condition is true. Entering loop body
3487 PHINode *PHI = *PhiNodesToMatch.begin();
3488
3489 // Add us, if no Phi nodes in the basic block we do not match.
3490 WillNotMatch.clear();
3491 WillNotMatch.insert(PHI);
3492
3493 // Traverse all Phis until we found equivalent or fail to do that.
3494 bool IsMatched = false;
3495 for (auto &P : PHI->getParent()->phis()) {
3496 if (&P == PHI)
26
Assuming the condition is false
27
Taking false branch
3497 continue;
3498 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
28
Assuming 'IsMatched' is true
29
Taking true branch
3499 break;
30
Execution continues on line 3507
3500 // If it does not match, collect all Phi nodes from matcher.
3501 // if we end up with no match, them all these Phi nodes will not match
3502 // later.
3503 for (auto M : Matched)
3504 WillNotMatch.insert(M.first);
3505 Matched.clear();
3506 }
3507 if (IsMatched
30.1
'IsMatched' is true
) {
31
Taking true branch
3508 // Replace all matched values and erase them.
3509 for (auto MV : Matched)
32
Assuming '__begin4' is not equal to '__end4'
3510 ST.ReplacePhi(MV.first, MV.second);
33
Calling 'SimplificationTracker::ReplacePhi'
3511 Matched.clear();
3512 continue;
3513 }
3514 // If we are not allowed to create new nodes then bail out.
3515 if (!AllowNewPhiNodes)
3516 return false;
3517 // Just remove all seen values in matcher. They will not match anything.
3518 PhiNotMatchedCount += WillNotMatch.size();
3519 for (auto *P : WillNotMatch)
3520 PhiNodesToMatch.erase(P);
3521 }
3522 return true;
3523 }
3524 /// Fill the placeholders with values from predecessors and simplify them.
3525 void FillPlaceholders(FoldAddrToValueMapping &Map,
3526 SmallVectorImpl<Value *> &TraverseOrder,
3527 SimplificationTracker &ST) {
3528 while (!TraverseOrder.empty()) {
3529 Value *Current = TraverseOrder.pop_back_val();
3530 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3530, __PRETTY_FUNCTION__))
;
3531 Value *V = Map[Current];
3532
3533 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3534 // CurrentValue also must be Select.
3535 auto *CurrentSelect = cast<SelectInst>(Current);
3536 auto *TrueValue = CurrentSelect->getTrueValue();
3537 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3537, __PRETTY_FUNCTION__))
;
3538 Select->setTrueValue(ST.Get(Map[TrueValue]));
3539 auto *FalseValue = CurrentSelect->getFalseValue();
3540 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3540, __PRETTY_FUNCTION__))
;
3541 Select->setFalseValue(ST.Get(Map[FalseValue]));
3542 } else {
3543 // Must be a Phi node then.
3544 auto *PHI = cast<PHINode>(V);
3545 // Fill the Phi node with values from predecessors.
3546 for (auto B : predecessors(PHI->getParent())) {
3547 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
3548 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3548, __PRETTY_FUNCTION__))
;
3549 PHI->addIncoming(ST.Get(Map[PV]), B);
3550 }
3551 }
3552 Map[Current] = ST.Simplify(V);
3553 }
3554 }
3555
3556 /// Starting from original value recursively iterates over def-use chain up to
3557 /// known ending values represented in a map. For each traversed phi/select
3558 /// inserts a placeholder Phi or Select.
3559 /// Reports all new created Phi/Select nodes by adding them to set.
3560 /// Also reports and order in what values have been traversed.
3561 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3562 SmallVectorImpl<Value *> &TraverseOrder,
3563 SimplificationTracker &ST) {
3564 SmallVector<Value *, 32> Worklist;
3565 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3566, __PRETTY_FUNCTION__))
3566 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3566, __PRETTY_FUNCTION__))
;
3567 auto *Dummy = UndefValue::get(CommonType);
3568 Worklist.push_back(Original);
3569 while (!Worklist.empty()) {
3570 Value *Current = Worklist.pop_back_val();
3571 // if it is already visited or it is an ending value then skip it.
3572 if (Map.find(Current) != Map.end())
3573 continue;
3574 TraverseOrder.push_back(Current);
3575
3576 // CurrentValue must be a Phi node or select. All others must be covered
3577 // by anchors.
3578 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3579 // Is it OK to get metadata from OrigSelect?!
3580 // Create a Select placeholder with dummy value.
3581 SelectInst *Select = SelectInst::Create(
3582 CurrentSelect->getCondition(), Dummy, Dummy,
3583 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3584 Map[Current] = Select;
3585 ST.insertNewSelect(Select);
3586 // We are interested in True and False values.
3587 Worklist.push_back(CurrentSelect->getTrueValue());
3588 Worklist.push_back(CurrentSelect->getFalseValue());
3589 } else {
3590 // It must be a Phi node then.
3591 PHINode *CurrentPhi = cast<PHINode>(Current);
3592 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3593 PHINode *PHI =
3594 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3595 Map[Current] = PHI;
3596 ST.insertNewPhi(PHI);
3597 for (Value *P : CurrentPhi->incoming_values())
3598 Worklist.push_back(P);
3599 }
3600 }
3601 }
3602
3603 bool addrModeCombiningAllowed() {
3604 if (DisableComplexAddrModes)
3605 return false;
3606 switch (DifferentField) {
3607 default:
3608 return false;
3609 case ExtAddrMode::BaseRegField:
3610 return AddrSinkCombineBaseReg;
3611 case ExtAddrMode::BaseGVField:
3612 return AddrSinkCombineBaseGV;
3613 case ExtAddrMode::BaseOffsField:
3614 return AddrSinkCombineBaseOffs;
3615 case ExtAddrMode::ScaledRegField:
3616 return AddrSinkCombineScaledReg;
3617 }
3618 }
3619};
3620} // end anonymous namespace
3621
3622/// Try adding ScaleReg*Scale to the current addressing mode.
3623/// Return true and update AddrMode if this addr mode is legal for the target,
3624/// false if not.
3625bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3626 unsigned Depth) {
3627 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3628 // mode. Just process that directly.
3629 if (Scale == 1)
3630 return matchAddr(ScaleReg, Depth);
3631
3632 // If the scale is 0, it takes nothing to add this.
3633 if (Scale == 0)
3634 return true;
3635
3636 // If we already have a scale of this value, we can add to it, otherwise, we
3637 // need an available scale field.
3638 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3639 return false;
3640
3641 ExtAddrMode TestAddrMode = AddrMode;
3642
3643 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3644 // [A+B + A*7] -> [B+A*8].
3645 TestAddrMode.Scale += Scale;
3646 TestAddrMode.ScaledReg = ScaleReg;
3647
3648 // If the new address isn't legal, bail out.
3649 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3650 return false;
3651
3652 // It was legal, so commit it.
3653 AddrMode = TestAddrMode;
3654
3655 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3656 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3657 // X*Scale + C*Scale to addr mode.
3658 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3659 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3660 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3661 TestAddrMode.InBounds = false;
3662 TestAddrMode.ScaledReg = AddLHS;
3663 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3664
3665 // If this addressing mode is legal, commit it and remember that we folded
3666 // this instruction.
3667 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3668 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3669 AddrMode = TestAddrMode;
3670 return true;
3671 }
3672 }
3673
3674 // Otherwise, not (x+c)*scale, just return what we have.
3675 return true;
3676}
3677
3678/// This is a little filter, which returns true if an addressing computation
3679/// involving I might be folded into a load/store accessing it.
3680/// This doesn't need to be perfect, but needs to accept at least
3681/// the set of instructions that MatchOperationAddr can.
3682static bool MightBeFoldableInst(Instruction *I) {
3683 switch (I->getOpcode()) {
3684 case Instruction::BitCast:
3685 case Instruction::AddrSpaceCast:
3686 // Don't touch identity bitcasts.
3687 if (I->getType() == I->getOperand(0)->getType())
3688 return false;
3689 return I->getType()->isIntOrPtrTy();
3690 case Instruction::PtrToInt:
3691 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3692 return true;
3693 case Instruction::IntToPtr:
3694 // We know the input is intptr_t, so this is foldable.
3695 return true;
3696 case Instruction::Add:
3697 return true;
3698 case Instruction::Mul:
3699 case Instruction::Shl:
3700 // Can only handle X*C and X << C.
3701 return isa<ConstantInt>(I->getOperand(1));
3702 case Instruction::GetElementPtr:
3703 return true;
3704 default:
3705 return false;
3706 }
3707}
3708
3709/// Check whether or not \p Val is a legal instruction for \p TLI.
3710/// \note \p Val is assumed to be the product of some type promotion.
3711/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3712/// to be legal, as the non-promoted value would have had the same state.
3713static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3714 const DataLayout &DL, Value *Val) {
3715 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3716 if (!PromotedInst)
3717 return false;
3718 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3719 // If the ISDOpcode is undefined, it was undefined before the promotion.
3720 if (!ISDOpcode)
3721 return true;
3722 // Otherwise, check if the promoted instruction is legal or not.
3723 return TLI.isOperationLegalOrCustom(
3724 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3725}
3726
3727namespace {
3728
3729/// Hepler class to perform type promotion.
3730class TypePromotionHelper {
3731 /// Utility function to add a promoted instruction \p ExtOpnd to
3732 /// \p PromotedInsts and record the type of extension we have seen.
3733 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3734 Instruction *ExtOpnd,
3735 bool IsSExt) {
3736 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3737 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3738 if (It != PromotedInsts.end()) {
3739 // If the new extension is same as original, the information in
3740 // PromotedInsts[ExtOpnd] is still correct.
3741 if (It->second.getInt() == ExtTy)
3742 return;
3743
3744 // Now the new extension is different from old extension, we make
3745 // the type information invalid by setting extension type to
3746 // BothExtension.
3747 ExtTy = BothExtension;
3748 }
3749 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3750 }
3751
3752 /// Utility function to query the original type of instruction \p Opnd
3753 /// with a matched extension type. If the extension doesn't match, we
3754 /// cannot use the information we had on the original type.
3755 /// BothExtension doesn't match any extension type.
3756 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3757 Instruction *Opnd,
3758 bool IsSExt) {
3759 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3760 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3761 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3762 return It->second.getPointer();
3763 return nullptr;
3764 }
3765
3766 /// Utility function to check whether or not a sign or zero extension
3767 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3768 /// either using the operands of \p Inst or promoting \p Inst.
3769 /// The type of the extension is defined by \p IsSExt.
3770 /// In other words, check if:
3771 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3772 /// #1 Promotion applies:
3773 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3774 /// #2 Operand reuses:
3775 /// ext opnd1 to ConsideredExtType.
3776 /// \p PromotedInsts maps the instructions to their type before promotion.
3777 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3778 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3779
3780 /// Utility function to determine if \p OpIdx should be promoted when
3781 /// promoting \p Inst.
3782 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3783 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3784 }
3785
3786 /// Utility function to promote the operand of \p Ext when this
3787 /// operand is a promotable trunc or sext or zext.
3788 /// \p PromotedInsts maps the instructions to their type before promotion.
3789 /// \p CreatedInstsCost[out] contains the cost of all instructions
3790 /// created to promote the operand of Ext.
3791 /// Newly added extensions are inserted in \p Exts.
3792 /// Newly added truncates are inserted in \p Truncs.
3793 /// Should never be called directly.
3794 /// \return The promoted value which is used instead of Ext.
3795 static Value *promoteOperandForTruncAndAnyExt(
3796 Instruction *Ext, TypePromotionTransaction &TPT,
3797 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3798 SmallVectorImpl<Instruction *> *Exts,
3799 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3800
3801 /// Utility function to promote the operand of \p Ext when this
3802 /// operand is promotable and is not a supported trunc or sext.
3803 /// \p PromotedInsts maps the instructions to their type before promotion.
3804 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3805 /// created to promote the operand of Ext.
3806 /// Newly added extensions are inserted in \p Exts.
3807 /// Newly added truncates are inserted in \p Truncs.
3808 /// Should never be called directly.
3809 /// \return The promoted value which is used instead of Ext.
3810 static Value *promoteOperandForOther(Instruction *Ext,
3811 TypePromotionTransaction &TPT,
3812 InstrToOrigTy &PromotedInsts,
3813 unsigned &CreatedInstsCost,
3814 SmallVectorImpl<Instruction *> *Exts,
3815 SmallVectorImpl<Instruction *> *Truncs,
3816 const TargetLowering &TLI, bool IsSExt);
3817
3818 /// \see promoteOperandForOther.
3819 static Value *signExtendOperandForOther(
3820 Instruction *Ext, TypePromotionTransaction &TPT,
3821 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3822 SmallVectorImpl<Instruction *> *Exts,
3823 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3824 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3825 Exts, Truncs, TLI, true);
3826 }
3827
3828 /// \see promoteOperandForOther.
3829 static Value *zeroExtendOperandForOther(
3830 Instruction *Ext, TypePromotionTransaction &TPT,
3831 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3832 SmallVectorImpl<Instruction *> *Exts,
3833 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3834 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3835 Exts, Truncs, TLI, false);
3836 }
3837
3838public:
3839 /// Type for the utility function that promotes the operand of Ext.
3840 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3841 InstrToOrigTy &PromotedInsts,
3842 unsigned &CreatedInstsCost,
3843 SmallVectorImpl<Instruction *> *Exts,
3844 SmallVectorImpl<Instruction *> *Truncs,
3845 const TargetLowering &TLI);
3846
3847 /// Given a sign/zero extend instruction \p Ext, return the appropriate
3848 /// action to promote the operand of \p Ext instead of using Ext.
3849 /// \return NULL if no promotable action is possible with the current
3850 /// sign extension.
3851 /// \p InsertedInsts keeps track of all the instructions inserted by the
3852 /// other CodeGenPrepare optimizations. This information is important
3853 /// because we do not want to promote these instructions as CodeGenPrepare
3854 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3855 /// \p PromotedInsts maps the instructions to their type before promotion.
3856 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3857 const TargetLowering &TLI,
3858 const InstrToOrigTy &PromotedInsts);
3859};
3860
3861} // end anonymous namespace
3862
3863bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3864 Type *ConsideredExtType,
3865 const InstrToOrigTy &PromotedInsts,
3866 bool IsSExt) {
3867 // The promotion helper does not know how to deal with vector types yet.
3868 // To be able to fix that, we would need to fix the places where we
3869 // statically extend, e.g., constants and such.
3870 if (Inst->getType()->isVectorTy())
3871 return false;
3872
3873 // We can always get through zext.
3874 if (isa<ZExtInst>(Inst))
3875 return true;
3876
3877 // sext(sext) is ok too.
3878 if (IsSExt && isa<SExtInst>(Inst))
3879 return true;
3880
3881 // We can get through binary operator, if it is legal. In other words, the
3882 // binary operator must have a nuw or nsw flag.
3883 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3884 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3885 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3886 (IsSExt && BinOp->hasNoSignedWrap())))
3887 return true;
3888
3889 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3890 if ((Inst->getOpcode() == Instruction::And ||
3891 Inst->getOpcode() == Instruction::Or))
3892 return true;
3893
3894 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3895 if (Inst->getOpcode() == Instruction::Xor) {
3896 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3897 // Make sure it is not a NOT.
3898 if (Cst && !Cst->getValue().isAllOnesValue())
3899 return true;
3900 }
3901
3902 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3903 // It may change a poisoned value into a regular value, like
3904 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3905 // poisoned value regular value
3906 // It should be OK since undef covers valid value.
3907 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3908 return true;
3909
3910 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3911 // It may change a poisoned value into a regular value, like
3912 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3913 // poisoned value regular value
3914 // It should be OK since undef covers valid value.
3915 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3916 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
3917 if (ExtInst->hasOneUse()) {
3918 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
3919 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3920 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3921 if (Cst &&
3922 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3923 return true;
3924 }
3925 }
3926 }
3927
3928 // Check if we can do the following simplification.
3929 // ext(trunc(opnd)) --> ext(opnd)
3930 if (!isa<TruncInst>(Inst))
3931 return false;
3932
3933 Value *OpndVal = Inst->getOperand(0);
3934 // Check if we can use this operand in the extension.
3935 // If the type is larger than the result type of the extension, we cannot.
3936 if (!OpndVal->getType()->isIntegerTy() ||
3937 OpndVal->getType()->getIntegerBitWidth() >
3938 ConsideredExtType->getIntegerBitWidth())
3939 return false;
3940
3941 // If the operand of the truncate is not an instruction, we will not have
3942 // any information on the dropped bits.
3943 // (Actually we could for constant but it is not worth the extra logic).
3944 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3945 if (!Opnd)
3946 return false;
3947
3948 // Check if the source of the type is narrow enough.
3949 // I.e., check that trunc just drops extended bits of the same kind of
3950 // the extension.
3951 // #1 get the type of the operand and check the kind of the extended bits.
3952 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3953 if (OpndType)
3954 ;
3955 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3956 OpndType = Opnd->getOperand(0)->getType();
3957 else
3958 return false;
3959
3960 // #2 check that the truncate just drops extended bits.
3961 return Inst->getType()->getIntegerBitWidth() >=
3962 OpndType->getIntegerBitWidth();
3963}
3964
3965TypePromotionHelper::Action TypePromotionHelper::getAction(
3966 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3967 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3968 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3969, __PRETTY_FUNCTION__))
3969 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3969, __PRETTY_FUNCTION__))
;
3970 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3971 Type *ExtTy = Ext->getType();
3972 bool IsSExt = isa<SExtInst>(Ext);
3973 // If the operand of the extension is not an instruction, we cannot
3974 // get through.
3975 // If it, check we can get through.
3976 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3977 return nullptr;
3978
3979 // Do not promote if the operand has been added by codegenprepare.
3980 // Otherwise, it means we are undoing an optimization that is likely to be
3981 // redone, thus causing potential infinite loop.
3982 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3983 return nullptr;
3984
3985 // SExt or Trunc instructions.
3986 // Return the related handler.
3987 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3988 isa<ZExtInst>(ExtOpnd))
3989 return promoteOperandForTruncAndAnyExt;
3990
3991 // Regular instruction.
3992 // Abort early if we will have to insert non-free instructions.
3993 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3994 return nullptr;
3995 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3996}
3997
3998Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3999 Instruction *SExt, TypePromotionTransaction &TPT,
4000 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4001 SmallVectorImpl<Instruction *> *Exts,
4002 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4003 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4004 // get through it and this method should not be called.
4005 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4006 Value *ExtVal = SExt;
4007 bool HasMergedNonFreeExt = false;
4008 if (isa<ZExtInst>(SExtOpnd)) {
4009 // Replace s|zext(zext(opnd))
4010 // => zext(opnd).
4011 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4012 Value *ZExt =
4013 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4014 TPT.replaceAllUsesWith(SExt, ZExt);
4015 TPT.eraseInstruction(SExt);
4016 ExtVal = ZExt;
4017 } else {
4018 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4019 // => z|sext(opnd).
4020 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4021 }
4022 CreatedInstsCost = 0;
4023
4024 // Remove dead code.
4025 if (SExtOpnd->use_empty())
4026 TPT.eraseInstruction(SExtOpnd);
4027
4028 // Check if the extension is still needed.
4029 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4030 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4031 if (ExtInst) {
4032 if (Exts)
4033 Exts->push_back(ExtInst);
4034 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4035 }
4036 return ExtVal;
4037 }
4038
4039 // At this point we have: ext ty opnd to ty.
4040 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4041 Value *NextVal = ExtInst->getOperand(0);
4042 TPT.eraseInstruction(ExtInst, NextVal);
4043 return NextVal;
4044}
4045
4046Value *TypePromotionHelper::promoteOperandForOther(
4047 Instruction *Ext, TypePromotionTransaction &TPT,
4048 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4049 SmallVectorImpl<Instruction *> *Exts,
4050 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
4051 bool IsSExt) {
4052 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4053 // get through it and this method should not be called.
4054 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4055 CreatedInstsCost = 0;
4056 if (!ExtOpnd->hasOneUse()) {
4057 // ExtOpnd will be promoted.
4058 // All its uses, but Ext, will need to use a truncated value of the
4059 // promoted version.
4060 // Create the truncate now.
4061 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4062 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4063 // Insert it just after the definition.
4064 ITrunc->moveAfter(ExtOpnd);
4065 if (Truncs)
4066 Truncs->push_back(ITrunc);
4067 }
4068
4069 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4070 // Restore the operand of Ext (which has been replaced by the previous call
4071 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4072 TPT.setOperand(Ext, 0, ExtOpnd);
4073 }
4074
4075 // Get through the Instruction:
4076 // 1. Update its type.
4077 // 2. Replace the uses of Ext by Inst.
4078 // 3. Extend each operand that needs to be extended.
4079
4080 // Remember the original type of the instruction before promotion.
4081 // This is useful to know that the high bits are sign extended bits.
4082 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4083 // Step #1.
4084 TPT.mutateType(ExtOpnd, Ext->getType());
4085 // Step #2.
4086 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4087 // Step #3.
4088 Instruction *ExtForOpnd = Ext;
4089
4090 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
4091 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4092 ++OpIdx) {
4093 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
4094 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4095 !shouldExtOperand(ExtOpnd, OpIdx)) {
4096 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
4097 continue;
4098 }
4099 // Check if we can statically extend the operand.
4100 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4101 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4102 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4103 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4104 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4105 : Cst->getValue().zext(BitWidth);
4106 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4107 continue;
4108 }
4109 // UndefValue are typed, so we have to statically sign extend them.
4110 if (isa<UndefValue>(Opnd)) {
4111 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4112 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4113 continue;
4114 }
4115
4116 // Otherwise we have to explicitly sign extend the operand.
4117 // Check if Ext was reused to extend an operand.
4118 if (!ExtForOpnd) {
4119 // If yes, create a new one.
4120 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
4121 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
4122 : TPT.createZExt(Ext, Opnd, Ext->getType());
4123 if (!isa<Instruction>(ValForExtOpnd)) {
4124 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4125 continue;
4126 }
4127 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4128 }
4129 if (Exts)
4130 Exts->push_back(ExtForOpnd);
4131 TPT.setOperand(ExtForOpnd, 0, Opnd);
4132
4133 // Move the sign extension before the insertion point.
4134 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4135 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4136 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
4137 // If more sext are required, new instructions will have to be created.
4138 ExtForOpnd = nullptr;
4139 }
4140 if (ExtForOpnd == Ext) {
4141 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
4142 TPT.eraseInstruction(Ext);
4143 }
4144 return ExtOpnd;
4145}
4146
4147/// Check whether or not promoting an instruction to a wider type is profitable.
4148/// \p NewCost gives the cost of extension instructions created by the
4149/// promotion.
4150/// \p OldCost gives the cost of extension instructions before the promotion
4151/// plus the number of instructions that have been
4152/// matched in the addressing mode the promotion.
4153/// \p PromotedOperand is the value that has been promoted.
4154/// \return True if the promotion is profitable, false otherwise.
4155bool AddressingModeMatcher::isPromotionProfitable(
4156 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4157 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4158 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4159 // The cost of the new extensions is greater than the cost of the
4160 // old extension plus what we folded.
4161 // This is not profitable.
4162 if (NewCost > OldCost)
4163 return false;
4164 if (NewCost < OldCost)
4165 return true;
4166 // The promotion is neutral but it may help folding the sign extension in
4167 // loads for instance.
4168 // Check that we did not create an illegal instruction.
4169 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4170}
4171
4172/// Given an instruction or constant expr, see if we can fold the operation
4173/// into the addressing mode. If so, update the addressing mode and return
4174/// true, otherwise return false without modifying AddrMode.
4175/// If \p MovedAway is not NULL, it contains the information of whether or
4176/// not AddrInst has to be folded into the addressing mode on success.
4177/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4178/// because it has been moved away.
4179/// Thus AddrInst must not be added in the matched instructions.
4180/// This state can happen when AddrInst is a sext, since it may be moved away.
4181/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4182/// not be referenced anymore.
4183bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4184 unsigned Depth,
4185 bool *MovedAway) {
4186 // Avoid exponential behavior on extremely deep expression trees.
4187 if (Depth >= 5) return false;
4188
4189 // By default, all matched instructions stay in place.
4190 if (MovedAway)
4191 *MovedAway = false;
4192
4193 switch (Opcode) {
4194 case Instruction::PtrToInt:
4195 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4196 return matchAddr(AddrInst->getOperand(0), Depth);
4197 case Instruction::IntToPtr: {
4198 auto AS = AddrInst->getType()->getPointerAddressSpace();
4199 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4200 // This inttoptr is a no-op if the integer type is pointer sized.
4201 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4202 return matchAddr(AddrInst->getOperand(0), Depth);
4203 return false;
4204 }
4205 case Instruction::BitCast:
4206 // BitCast is always a noop, and we can handle it as long as it is
4207 // int->int or pointer->pointer (we don't want int<->fp or something).
4208 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4209 // Don't touch identity bitcasts. These were probably put here by LSR,
4210 // and we don't want to mess around with them. Assume it knows what it
4211 // is doing.
4212 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4213 return matchAddr(AddrInst->getOperand(0), Depth);
4214 return false;
4215 case Instruction::AddrSpaceCast: {
4216 unsigned SrcAS
4217 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4218 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4219 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
4220 return matchAddr(AddrInst->getOperand(0), Depth);
4221 return false;
4222 }
4223 case Instruction::Add: {
4224 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4225 ExtAddrMode BackupAddrMode = AddrMode;
4226 unsigned OldSize = AddrModeInsts.size();
4227 // Start a transaction at this point.
4228 // The LHS may match but not the RHS.
4229 // Therefore, we need a higher level restoration point to undo partially
4230 // matched operation.
4231 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4232 TPT.getRestorationPoint();
4233
4234 AddrMode.InBounds = false;
4235 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4236 matchAddr(AddrInst->getOperand(0), Depth+1))
4237 return true;
4238
4239 // Restore the old addr mode info.
4240 AddrMode = BackupAddrMode;
4241 AddrModeInsts.resize(OldSize);
4242 TPT.rollback(LastKnownGood);
4243
4244 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4245 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4246 matchAddr(AddrInst->getOperand(1), Depth+1))
4247 return true;
4248
4249 // Otherwise we definitely can't merge the ADD in.
4250 AddrMode = BackupAddrMode;
4251 AddrModeInsts.resize(OldSize);
4252 TPT.rollback(LastKnownGood);
4253 break;
4254 }
4255 //case Instruction::Or:
4256 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4257 //break;
4258 case Instruction::Mul:
4259 case Instruction::Shl: {
4260 // Can only handle X*C and X << C.
4261 AddrMode.InBounds = false;
4262 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4263 if (!RHS || RHS->getBitWidth() > 64)
4264 return false;
4265 int64_t Scale = RHS->getSExtValue();
4266 if (Opcode == Instruction::Shl)
4267 Scale = 1LL << Scale;
4268
4269 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4270 }
4271 case Instruction::GetElementPtr: {
4272 // Scan the GEP. We check it if it contains constant offsets and at most
4273 // one variable offset.
4274 int VariableOperand = -1;
4275 unsigned VariableScale = 0;
4276
4277 int64_t ConstantOffset = 0;
4278 gep_type_iterator GTI = gep_type_begin(AddrInst);
4279 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4280 if (StructType *STy = GTI.getStructTypeOrNull()) {
4281 const StructLayout *SL = DL.getStructLayout(STy);
4282 unsigned Idx =
4283 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4284 ConstantOffset += SL->getElementOffset(Idx);
4285 } else {
4286 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
4287 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4288 const APInt &CVal = CI->getValue();
4289 if (CVal.getMinSignedBits() <= 64) {
4290 ConstantOffset += CVal.getSExtValue() * TypeSize;
4291 continue;
4292 }
4293 }
4294 if (TypeSize) { // Scales of zero don't do anything.
4295 // We only allow one variable index at the moment.
4296 if (VariableOperand != -1)
4297 return false;
4298
4299 // Remember the variable index.
4300 VariableOperand = i;
4301 VariableScale = TypeSize;
4302 }
4303 }
4304 }
4305
4306 // A common case is for the GEP to only do a constant offset. In this case,
4307 // just add it to the disp field and check validity.
4308 if (VariableOperand == -1) {
4309 AddrMode.BaseOffs += ConstantOffset;
4310 if (ConstantOffset == 0 ||
4311 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4312 // Check to see if we can fold the base pointer in too.
4313 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4314 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4315 AddrMode.InBounds = false;
4316 return true;
4317 }
4318 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4319 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4320 ConstantOffset > 0) {
4321 // Record GEPs with non-zero offsets as candidates for splitting in the
4322 // event that the offset cannot fit into the r+i addressing mode.
4323 // Simple and common case that only one GEP is used in calculating the
4324 // address for the memory access.
4325 Value *Base = AddrInst->getOperand(0);
4326 auto *BaseI = dyn_cast<Instruction>(Base);
4327 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4328 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4329 (BaseI && !isa<CastInst>(BaseI) &&
4330 !isa<GetElementPtrInst>(BaseI))) {
4331 // Make sure the parent block allows inserting non-PHI instructions
4332 // before the terminator.
4333 BasicBlock *Parent =
4334 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4335 if (!Parent->getTerminator()->isEHPad())
4336 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4337 }
4338 }
4339 AddrMode.BaseOffs -= ConstantOffset;
4340 return false;
4341 }
4342
4343 // Save the valid addressing mode in case we can't match.
4344 ExtAddrMode BackupAddrMode = AddrMode;
4345 unsigned OldSize = AddrModeInsts.size();
4346
4347 // See if the scale and offset amount is valid for this target.
4348 AddrMode.BaseOffs += ConstantOffset;
4349 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4350 AddrMode.InBounds = false;
4351
4352 // Match the base operand of the GEP.
4353 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4354 // If it couldn't be matched, just stuff the value in a register.
4355 if (AddrMode.HasBaseReg) {
4356 AddrMode = BackupAddrMode;
4357 AddrModeInsts.resize(OldSize);
4358 return false;
4359 }
4360 AddrMode.HasBaseReg = true;
4361 AddrMode.BaseReg = AddrInst->getOperand(0);
4362 }
4363
4364 // Match the remaining variable portion of the GEP.
4365 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4366 Depth)) {
4367 // If it couldn't be matched, try stuffing the base into a register
4368 // instead of matching it, and retrying the match of the scale.
4369 AddrMode = BackupAddrMode;
4370 AddrModeInsts.resize(OldSize);
4371 if (AddrMode.HasBaseReg)
4372 return false;
4373 AddrMode.HasBaseReg = true;
4374 AddrMode.BaseReg = AddrInst->getOperand(0);
4375 AddrMode.BaseOffs += ConstantOffset;
4376 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4377 VariableScale, Depth)) {
4378 // If even that didn't work, bail.
4379 AddrMode = BackupAddrMode;
4380 AddrModeInsts.resize(OldSize);
4381 return false;
4382 }
4383 }
4384
4385 return true;
4386 }
4387 case Instruction::SExt:
4388 case Instruction::ZExt: {
4389 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4390 if (!Ext)
4391 return false;
4392
4393 // Try to move this ext out of the way of the addressing mode.
4394 // Ask for a method for doing so.
4395 TypePromotionHelper::Action TPH =
4396 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4397 if (!TPH)
4398 return false;
4399
4400 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4401 TPT.getRestorationPoint();
4402 unsigned CreatedInstsCost = 0;
4403 unsigned ExtCost = !TLI.isExtFree(Ext);
4404 Value *PromotedOperand =
4405 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4406 // SExt has been moved away.
4407 // Thus either it will be rematched later in the recursive calls or it is
4408 // gone. Anyway, we must not fold it into the addressing mode at this point.
4409 // E.g.,
4410 // op = add opnd, 1
4411 // idx = ext op
4412 // addr = gep base, idx
4413 // is now:
4414 // promotedOpnd = ext opnd <- no match here
4415 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4416 // addr = gep base, op <- match
4417 if (MovedAway)
4418 *MovedAway = true;
4419
4420 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4421, __PRETTY_FUNCTION__))
4421 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4421, __PRETTY_FUNCTION__))
;
4422
4423 ExtAddrMode BackupAddrMode = AddrMode;
4424 unsigned OldSize = AddrModeInsts.size();
4425
4426 if (!matchAddr(PromotedOperand, Depth) ||
4427 // The total of the new cost is equal to the cost of the created
4428 // instructions.
4429 // The total of the old cost is equal to the cost of the extension plus
4430 // what we have saved in the addressing mode.
4431 !isPromotionProfitable(CreatedInstsCost,
4432 ExtCost + (AddrModeInsts.size() - OldSize),
4433 PromotedOperand)) {
4434 AddrMode = BackupAddrMode;
4435 AddrModeInsts.resize(OldSize);
4436 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4437 TPT.rollback(LastKnownGood);
4438 return false;
4439 }
4440 return true;
4441 }
4442 }
4443 return false;
4444}
4445
4446/// If we can, try to add the value of 'Addr' into the current addressing mode.
4447/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4448/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4449/// for the target.
4450///
4451bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4452 // Start a transaction at this point that we will rollback if the matching
4453 // fails.
4454 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4455 TPT.getRestorationPoint();
4456 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4457 // Fold in immediates if legal for the target.
4458 AddrMode.BaseOffs += CI->getSExtValue();
4459 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4460 return true;
4461 AddrMode.BaseOffs -= CI->getSExtValue();
4462 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4463 // If this is a global variable, try to fold it into the addressing mode.
4464 if (!AddrMode.BaseGV) {
4465 AddrMode.BaseGV = GV;
4466 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4467 return true;
4468 AddrMode.BaseGV = nullptr;
4469 }
4470 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4471 ExtAddrMode BackupAddrMode = AddrMode;
4472 unsigned OldSize = AddrModeInsts.size();
4473
4474 // Check to see if it is possible to fold this operation.
4475 bool MovedAway = false;
4476 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4477 // This instruction may have been moved away. If so, there is nothing
4478 // to check here.
4479 if (MovedAway)
4480 return true;
4481 // Okay, it's possible to fold this. Check to see if it is actually
4482 // *profitable* to do so. We use a simple cost model to avoid increasing
4483 // register pressure too much.
4484 if (I->hasOneUse() ||
4485 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4486 AddrModeInsts.push_back(I);
4487 return true;
4488 }
4489
4490 // It isn't profitable to do this, roll back.
4491 //cerr << "NOT FOLDING: " << *I;
4492 AddrMode = BackupAddrMode;
4493 AddrModeInsts.resize(OldSize);
4494 TPT.rollback(LastKnownGood);
4495 }
4496 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4497 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4498 return true;
4499 TPT.rollback(LastKnownGood);
4500 } else if (isa<ConstantPointerNull>(Addr)) {
4501 // Null pointer gets folded without affecting the addressing mode.
4502 return true;
4503 }
4504
4505 // Worse case, the target should support [reg] addressing modes. :)
4506 if (!AddrMode.HasBaseReg) {
4507 AddrMode.HasBaseReg = true;
4508 AddrMode.BaseReg = Addr;
4509 // Still check for legality in case the target supports [imm] but not [i+r].
4510 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4511 return true;
4512 AddrMode.HasBaseReg = false;
4513 AddrMode.BaseReg = nullptr;
4514 }
4515
4516 // If the base register is already taken, see if we can do [r+r].
4517 if (AddrMode.Scale == 0) {
4518 AddrMode.Scale = 1;
4519 AddrMode.ScaledReg = Addr;
4520 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4521 return true;
4522 AddrMode.Scale = 0;
4523 AddrMode.ScaledReg = nullptr;
4524 }
4525 // Couldn't match.
4526 TPT.rollback(LastKnownGood);
4527 return false;
4528}
4529
4530/// Check to see if all uses of OpVal by the specified inline asm call are due
4531/// to memory operands. If so, return true, otherwise return false.
4532static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4533 const TargetLowering &TLI,
4534 const TargetRegisterInfo &TRI) {
4535 const Function *F = CI->getFunction();
4536 TargetLowering::AsmOperandInfoVector TargetConstraints =
4537 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4538 ImmutableCallSite(CI));
4539
4540 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4541 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4542
4543 // Compute the constraint code and ConstraintType to use.
4544 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4545
4546 // If this asm operand is our Value*, and if it isn't an indirect memory
4547 // operand, we can't fold it!
4548 if (OpInfo.CallOperandVal == OpVal &&
4549 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4550 !OpInfo.isIndirect))
4551 return false;
4552 }
4553
4554 return true;
4555}
4556
4557// Max number of memory uses to look at before aborting the search to conserve
4558// compile time.
4559static constexpr int MaxMemoryUsesToScan = 20;
4560
4561/// Recursively walk all the uses of I until we find a memory use.
4562/// If we find an obviously non-foldable instruction, return true.
4563/// Add the ultimately found memory instructions to MemoryUses.
4564static bool FindAllMemoryUses(
4565 Instruction *I,
4566 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4567 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4568 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
4569 BlockFrequencyInfo *BFI, int SeenInsts = 0) {
4570 // If we already considered this instruction, we're done.
4571 if (!ConsideredInsts.insert(I).second)
4572 return false;
4573
4574 // If this is an obviously unfoldable instruction, bail out.
4575 if (!MightBeFoldableInst(I))
4576 return true;
4577
4578 // Loop over all the uses, recursively processing them.
4579 for (Use &U : I->uses()) {
4580 // Conservatively return true if we're seeing a large number or a deep chain
4581 // of users. This avoids excessive compilation times in pathological cases.
4582 if (SeenInsts++ >= MaxMemoryUsesToScan)
4583 return true;
4584
4585 Instruction *UserI = cast<Instruction>(U.getUser());
4586 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4587 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4588 continue;
4589 }
4590
4591 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4592 unsigned opNo = U.getOperandNo();
4593 if (opNo != StoreInst::getPointerOperandIndex())
4594 return true; // Storing addr, not into addr.
4595 MemoryUses.push_back(std::make_pair(SI, opNo));
4596 continue;
4597 }
4598
4599 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4600 unsigned opNo = U.getOperandNo();
4601 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4602 return true; // Storing addr, not into addr.
4603 MemoryUses.push_back(std::make_pair(RMW, opNo));
4604 continue;
4605 }
4606
4607 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4608 unsigned opNo = U.getOperandNo();
4609 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4610 return true; // Storing addr, not into addr.
4611 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4612 continue;
4613 }
4614
4615 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4616 if (CI->hasFnAttr(Attribute::Cold)) {
4617 // If this is a cold call, we can sink the addressing calculation into
4618 // the cold path. See optimizeCallInst
4619 bool OptForSize = OptSize ||
4620 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
4621 if (!OptForSize)
4622 continue;
4623 }
4624
4625 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4626 if (!IA) return true;
4627
4628 // If this is a memory operand, we're cool, otherwise bail out.
4629 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4630 return true;
4631 continue;
4632 }
4633
4634 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
4635 PSI, BFI, SeenInsts))
4636 return true;
4637 }
4638
4639 return false;
4640}
4641
4642/// Return true if Val is already known to be live at the use site that we're
4643/// folding it into. If so, there is no cost to include it in the addressing
4644/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4645/// instruction already.
4646bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4647 Value *KnownLive2) {
4648 // If Val is either of the known-live values, we know it is live!
4649 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4650 return true;
4651
4652 // All values other than instructions and arguments (e.g. constants) are live.
4653 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4654
4655 // If Val is a constant sized alloca in the entry block, it is live, this is
4656 // true because it is just a reference to the stack/frame pointer, which is
4657 // live for the whole function.
4658 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4659 if (AI->isStaticAlloca())
4660 return true;
4661
4662 // Check to see if this value is already used in the memory instruction's
4663 // block. If so, it's already live into the block at the very least, so we
4664 // can reasonably fold it.
4665 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4666}
4667
4668/// It is possible for the addressing mode of the machine to fold the specified
4669/// instruction into a load or store that ultimately uses it.
4670/// However, the specified instruction has multiple uses.
4671/// Given this, it may actually increase register pressure to fold it
4672/// into the load. For example, consider this code:
4673///
4674/// X = ...
4675/// Y = X+1
4676/// use(Y) -> nonload/store
4677/// Z = Y+1
4678/// load Z
4679///
4680/// In this case, Y has multiple uses, and can be folded into the load of Z
4681/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4682/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4683/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4684/// number of computations either.
4685///
4686/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4687/// X was live across 'load Z' for other reasons, we actually *would* want to
4688/// fold the addressing mode in the Z case. This would make Y die earlier.
4689bool AddressingModeMatcher::
4690isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4691 ExtAddrMode &AMAfter) {
4692 if (IgnoreProfitability) return true;
4693
4694 // AMBefore is the addressing mode before this instruction was folded into it,
4695 // and AMAfter is the addressing mode after the instruction was folded. Get
4696 // the set of registers referenced by AMAfter and subtract out those
4697 // referenced by AMBefore: this is the set of values which folding in this
4698 // address extends the lifetime of.
4699 //
4700 // Note that there are only two potential values being referenced here,
4701 // BaseReg and ScaleReg (global addresses are always available, as are any
4702 // folded immediates).
4703 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4704
4705 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4706 // lifetime wasn't extended by adding this instruction.
4707 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4708 BaseReg = nullptr;
4709 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4710 ScaledReg = nullptr;
4711
4712 // If folding this instruction (and it's subexprs) didn't extend any live
4713 // ranges, we're ok with it.
4714 if (!BaseReg && !ScaledReg)
4715 return true;
4716
4717 // If all uses of this instruction can have the address mode sunk into them,
4718 // we can remove the addressing mode and effectively trade one live register
4719 // for another (at worst.) In this context, folding an addressing mode into
4720 // the use is just a particularly nice way of sinking it.
4721 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4722 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4723 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
4724 PSI, BFI))
4725 return false; // Has a non-memory, non-foldable use!
4726
4727 // Now that we know that all uses of this instruction are part of a chain of
4728 // computation involving only operations that could theoretically be folded
4729 // into a memory use, loop over each of these memory operation uses and see
4730 // if they could *actually* fold the instruction. The assumption is that
4731 // addressing modes are cheap and that duplicating the computation involved
4732 // many times is worthwhile, even on a fastpath. For sinking candidates
4733 // (i.e. cold call sites), this serves as a way to prevent excessive code
4734 // growth since most architectures have some reasonable small and fast way to
4735 // compute an effective address. (i.e LEA on x86)
4736 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4737 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4738 Instruction *User = MemoryUses[i].first;
4739 unsigned OpNo = MemoryUses[i].second;
4740
4741 // Get the access type of this use. If the use isn't a pointer, we don't
4742 // know what it accesses.
4743 Value *Address = User->getOperand(OpNo);
4744 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4745 if (!AddrTy)
4746 return false;
4747 Type *AddressAccessTy = AddrTy->getElementType();
4748 unsigned AS = AddrTy->getAddressSpace();
4749
4750 // Do a match against the root of this address, ignoring profitability. This
4751 // will tell us if the addressing mode for the memory operation will
4752 // *actually* cover the shared instruction.
4753 ExtAddrMode Result;
4754 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4755 0);
4756 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4757 TPT.getRestorationPoint();
4758 AddressingModeMatcher Matcher(
4759 MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4760 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
4761 Matcher.IgnoreProfitability = true;
4762 bool Success = Matcher.matchAddr(Address, 0);
4763 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4763, __PRETTY_FUNCTION__))
;
4764
4765 // The match was to check the profitability, the changes made are not
4766 // part of the original matcher. Therefore, they should be dropped
4767 // otherwise the original matcher will not present the right state.
4768 TPT.rollback(LastKnownGood);
4769
4770 // If the match didn't cover I, then it won't be shared by it.
4771 if (!is_contained(MatchedAddrModeInsts, I))
4772 return false;
4773
4774 MatchedAddrModeInsts.clear();
4775 }
4776
4777 return true;
4778}
4779
4780/// Return true if the specified values are defined in a
4781/// different basic block than BB.
4782static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4783 if (Instruction *I = dyn_cast<Instruction>(V))
4784 return I->getParent() != BB;
4785 return false;
4786}
4787
4788/// Sink addressing mode computation immediate before MemoryInst if doing so
4789/// can be done without increasing register pressure. The need for the
4790/// register pressure constraint means this can end up being an all or nothing
4791/// decision for all uses of the same addressing computation.
4792///
4793/// Load and Store Instructions often have addressing modes that can do
4794/// significant amounts of computation. As such, instruction selection will try
4795/// to get the load or store to do as much computation as possible for the
4796/// program. The problem is that isel can only see within a single block. As
4797/// such, we sink as much legal addressing mode work into the block as possible.
4798///
4799/// This method is used to optimize both load/store and inline asms with memory
4800/// operands. It's also used to sink addressing computations feeding into cold
4801/// call sites into their (cold) basic block.
4802///
4803/// The motivation for handling sinking into cold blocks is that doing so can
4804/// both enable other address mode sinking (by satisfying the register pressure
4805/// constraint above), and reduce register pressure globally (by removing the
4806/// addressing mode computation from the fast path entirely.).
4807bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4808 Type *AccessTy, unsigned AddrSpace) {
4809 Value *Repl = Addr;
4810
4811 // Try to collapse single-value PHI nodes. This is necessary to undo
4812 // unprofitable PRE transformations.
4813 SmallVector<Value*, 8> worklist;
4814 SmallPtrSet<Value*, 16> Visited;
4815 worklist.push_back(Addr);
4816
4817 // Use a worklist to iteratively look through PHI and select nodes, and
4818 // ensure that the addressing mode obtained from the non-PHI/select roots of
4819 // the graph are compatible.
4820 bool PhiOrSelectSeen = false;
4821 SmallVector<Instruction*, 16> AddrModeInsts;
4822 const SimplifyQuery SQ(*DL, TLInfo);
4823 AddressingModeCombiner AddrModes(SQ, Addr);
4824 TypePromotionTransaction TPT(RemovedInsts);
4825 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4826 TPT.getRestorationPoint();
4827 while (!worklist.empty()) {
1
Loop condition is true. Entering loop body
4828 Value *V = worklist.back();
4829 worklist.pop_back();
4830
4831 // We allow traversing cyclic Phi nodes.
4832 // In case of success after this loop we ensure that traversing through
4833 // Phi nodes ends up with all cases to compute address of the form
4834 // BaseGV + Base + Scale * Index + Offset
4835 // where Scale and Offset are constans and BaseGV, Base and Index
4836 // are exactly the same Values in all cases.
4837 // It means that BaseGV, Scale and Offset dominate our memory instruction
4838 // and have the same value as they had in address computation represented
4839 // as Phi. So we can safely sink address computation to memory instruction.
4840 if (!Visited.insert(V).second)
2
Assuming field 'second' is true
3
Taking false branch
4841 continue;
4842
4843 // For a PHI node, push all of its incoming values.
4844 if (PHINode *P
4.1
'P' is null
= dyn_cast<PHINode>(V)) {
4
Assuming 'V' is not a 'PHINode'
5
Taking false branch
4845 for (Value *IncValue : P->incoming_values())
4846 worklist.push_back(IncValue);
4847 PhiOrSelectSeen = true;
4848 continue;
4849 }
4850 // Similar for select.
4851 if (SelectInst *SI
6.1
'SI' is null
= dyn_cast<SelectInst>(V)) {
6
Assuming 'V' is not a 'SelectInst'
7
Taking false branch
4852 worklist.push_back(SI->getFalseValue());
4853 worklist.push_back(SI->getTrueValue());
4854 PhiOrSelectSeen = true;
4855 continue;
4856 }
4857
4858 // For non-PHIs, determine the addressing mode being computed. Note that
4859 // the result may differ depending on what other uses our candidate
4860 // addressing instructions might have.
4861 AddrModeInsts.clear();
4862 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4863 0);
4864 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4865 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4866 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
4867 BFI.get());
4868
4869 GetElementPtrInst *GEP = LargeOffsetGEP.first;
4870 if (GEP
7.1
'GEP' is null
&& !NewGEPBases.count(GEP)) {
8
Taking false branch
4871 // If splitting the underlying data structure can reduce the offset of a
4872 // GEP, collect the GEP. Skip the GEPs that are the new bases of
4873 // previously split data structures.
4874 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4875 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4876 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4877 }
4878
4879 NewAddrMode.OriginalValue = V;
4880 if (!AddrModes.addNewAddrMode(NewAddrMode))
9
Taking true branch
4881 break;
10
Execution continues on line 4887
4882 }
4883
4884 // Try to combine the AddrModes we've collected. If we couldn't collect any,
4885 // or we have multiple but either couldn't combine them or combining them
4886 // wouldn't do anything useful, bail out now.
4887 if (!AddrModes.combineAddrModes()) {
11
Calling 'AddressingModeCombiner::combineAddrModes'
4888 TPT.rollback(LastKnownGood);
4889 return false;
4890 }
4891 TPT.commit();
4892
4893 // Get the combined AddrMode (or the only AddrMode, if we only had one).
4894 ExtAddrMode AddrMode = AddrModes.getAddrMode();
4895
4896 // If all the instructions matched are already in this BB, don't do anything.
4897 // If we saw a Phi node then it is not local definitely, and if we saw a select
4898 // then we want to push the address calculation past it even if it's already
4899 // in this BB.
4900 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4901 return IsNonLocalValue(V, MemoryInst->getParent());
4902 })) {
4903 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
4904 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4905 return false;
4906 }
4907
4908 // Insert this computation right after this user. Since our caller is
4909 // scanning from the top of the BB to the bottom, reuse of the expr are
4910 // guaranteed to happen later.
4911 IRBuilder<> Builder(MemoryInst);
4912
4913 // Now that we determined the addressing expression we want to use and know
4914 // that we have to sink it into this block. Check to see if we have already
4915 // done this for some other load/store instr in this block. If so, reuse
4916 // the computation. Before attempting reuse, check if the address is valid
4917 // as it may have been erased.
4918
4919 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4920
4921 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4922 if (SunkAddr) {
4923 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4924 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4925 if (SunkAddr->getType() != Addr->getType())
4926 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4927 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
4928 SubtargetInfo->addrSinkUsingGEPs())) {
4929 // By default, we use the GEP-based method when AA is used later. This
4930 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4931 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4932 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4933 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4934 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4935
4936 // First, find the pointer.
4937 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4938 ResultPtr = AddrMode.BaseReg;
4939 AddrMode.BaseReg = nullptr;
4940 }
4941
4942 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4943 // We can't add more than one pointer together, nor can we scale a
4944 // pointer (both of which seem meaningless).
4945 if (ResultPtr || AddrMode.Scale != 1)
4946 return false;
4947
4948 ResultPtr = AddrMode.ScaledReg;
4949 AddrMode.Scale = 0;
4950 }
4951
4952 // It is only safe to sign extend the BaseReg if we know that the math
4953 // required to create it did not overflow before we extend it. Since
4954 // the original IR value was tossed in favor of a constant back when
4955 // the AddrMode was created we need to bail out gracefully if widths
4956 // do not match instead of extending it.
4957 //
4958 // (See below for code to add the scale.)
4959 if (AddrMode.Scale) {
4960 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4961 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4962 cast<IntegerType>(ScaledRegTy)->getBitWidth())
4963 return false;
4964 }
4965
4966 if (AddrMode.BaseGV) {
4967 if (ResultPtr)
4968 return false;
4969
4970 ResultPtr = AddrMode.BaseGV;
4971 }
4972
4973 // If the real base value actually came from an inttoptr, then the matcher
4974 // will look through it and provide only the integer value. In that case,
4975 // use it here.
4976 if (!DL->isNonIntegralPointerType(Addr->getType())) {
4977 if (!ResultPtr && AddrMode.BaseReg) {
4978 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4979 "sunkaddr");
4980 AddrMode.BaseReg = nullptr;
4981 } else if (!ResultPtr && AddrMode.Scale == 1) {
4982 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4983 "sunkaddr");
4984 AddrMode.Scale = 0;
4985 }
4986 }
4987
4988 if (!ResultPtr &&
4989 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4990 SunkAddr = Constant::getNullValue(Addr->getType());
4991 } else if (!ResultPtr) {
4992 return false;
4993 } else {
4994 Type *I8PtrTy =
4995 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4996 Type *I8Ty = Builder.getInt8Ty();
4997
4998 // Start with the base register. Do this first so that subsequent address
4999 // matching finds it last, which will prevent it from trying to match it
5000 // as the scaled value in case it happens to be a mul. That would be
5001 // problematic if we've sunk a different mul for the scale, because then
5002 // we'd end up sinking both muls.
5003 if (AddrMode.BaseReg) {
5004 Value *V = AddrMode.BaseReg;
5005 if (V->getType() != IntPtrTy)
5006 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5007
5008 ResultIndex = V;
5009 }
5010
5011 // Add the scale value.
5012 if (AddrMode.Scale) {
5013 Value *V = AddrMode.ScaledReg;
5014 if (V->getType() == IntPtrTy) {
5015 // done.
5016 } else {
5017 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5019, __PRETTY_FUNCTION__))
5018 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5019, __PRETTY_FUNCTION__))
5019 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5019, __PRETTY_FUNCTION__))
;
5020 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5021 }
5022
5023 if (AddrMode.Scale != 1)
5024 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5025 "sunkaddr");
5026 if (ResultIndex)
5027 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5028 else
5029 ResultIndex = V;
5030 }
5031
5032 // Add in the Base Offset if present.
5033 if (AddrMode.BaseOffs) {
5034 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5035 if (ResultIndex) {
5036 // We need to add this separately from the scale above to help with
5037 // SDAG consecutive load/store merging.
5038 if (ResultPtr->getType() != I8PtrTy)
5039 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5040 ResultPtr =
5041 AddrMode.InBounds
5042 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5043 "sunkaddr")
5044 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5045 }
5046
5047 ResultIndex = V;
5048 }
5049
5050 if (!ResultIndex) {
5051 SunkAddr = ResultPtr;
5052 } else {
5053 if (ResultPtr->getType() != I8PtrTy)
5054 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5055 SunkAddr =
5056 AddrMode.InBounds
5057 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5058 "sunkaddr")
5059 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5060 }
5061
5062 if (SunkAddr->getType() != Addr->getType())
5063 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5064 }
5065 } else {
5066 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5067 // non-integral pointers, so in that case bail out now.
5068 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5069 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5070 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5071 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5072 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5073 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5074 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5075 (AddrMode.BaseGV &&
5076 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5077 return false;
5078
5079 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5080 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5081 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5082 Value *Result = nullptr;
5083
5084 // Start with the base register. Do this first so that subsequent address
5085 // matching finds it last, which will prevent it from trying to match it
5086 // as the scaled value in case it happens to be a mul. That would be
5087 // problematic if we've sunk a different mul for the scale, because then
5088 // we'd end up sinking both muls.
5089 if (AddrMode.BaseReg) {
5090 Value *V = AddrMode.BaseReg;
5091 if (V->getType()->isPointerTy())
5092 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5093 if (V->getType() != IntPtrTy)
5094 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5095 Result = V;
5096 }
5097
5098 // Add the scale value.
5099 if (AddrMode.Scale) {
5100 Value *V = AddrMode.ScaledReg;
5101 if (V->getType() == IntPtrTy) {
5102 // done.
5103 } else if (V->getType()->isPointerTy()) {
5104 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5105 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5106 cast<IntegerType>(V->getType())->getBitWidth()) {
5107 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5108 } else {
5109 // It is only safe to sign extend the BaseReg if we know that the math
5110 // required to create it did not overflow before we extend it. Since
5111 // the original IR value was tossed in favor of a constant back when
5112 // the AddrMode was created we need to bail out gracefully if widths
5113 // do not match instead of extending it.
5114 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5115 if (I && (Result != AddrMode.BaseReg))
5116 I->eraseFromParent();
5117 return false;
5118 }
5119 if (AddrMode.Scale != 1)
5120 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5121 "sunkaddr");
5122 if (Result)
5123 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5124 else
5125 Result = V;
5126 }
5127
5128 // Add in the BaseGV if present.
5129 if (AddrMode.BaseGV) {
5130 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
5131 if (Result)
5132 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5133 else
5134 Result = V;
5135 }
5136
5137 // Add in the Base Offset if present.
5138 if (AddrMode.BaseOffs) {
5139 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5140 if (Result)
5141 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5142 else
5143 Result = V;
5144 }
5145
5146 if (!Result)
5147 SunkAddr = Constant::getNullValue(Addr->getType());
5148 else
5149 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5150 }
5151
5152 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5153 // Store the newly computed address into the cache. In the case we reused a
5154 // value, this should be idempotent.
5155 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5156
5157 // If we have no uses, recursively delete the value and all dead instructions
5158 // using it.
5159 if (Repl->use_empty()) {
5160 // This can cause recursive deletion, which can invalidate our iterator.
5161 // Use a WeakTrackingVH to hold onto it in case this happens.
5162 Value *CurValue = &*CurInstIterator;
5163 WeakTrackingVH IterHandle(CurValue);
5164 BasicBlock *BB = CurInstIterator->getParent();
5165
5166 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
5167
5168 if (IterHandle != CurValue) {
5169 // If the iterator instruction was recursively deleted, start over at the
5170 // start of the block.
5171 CurInstIterator = BB->begin();
5172 SunkAddrs.clear();
5173 }
5174 }
5175 ++NumMemoryInsts;
5176 return true;
5177}
5178
5179/// If there are any memory operands, use OptimizeMemoryInst to sink their
5180/// address computing into the block when possible / profitable.
5181bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5182 bool MadeChange = false;
5183
5184 const TargetRegisterInfo *TRI =
5185 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5186 TargetLowering::AsmOperandInfoVector TargetConstraints =
5187 TLI->ParseConstraints(*DL, TRI, CS);
5188 unsigned ArgNo = 0;
5189 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5190 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5191
5192 // Compute the constraint code and ConstraintType to use.
5193 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5194
5195 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5196 OpInfo.isIndirect) {
5197 Value *OpVal = CS->getArgOperand(ArgNo++);
5198 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5199 } else if (OpInfo.Type == InlineAsm::isInput)
5200 ArgNo++;
5201 }
5202
5203 return MadeChange;
5204}
5205
5206/// Check if all the uses of \p Val are equivalent (or free) zero or
5207/// sign extensions.
5208static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5209 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5209, __PRETTY_FUNCTION__))
;
5210 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5211 bool IsSExt = isa<SExtInst>(FirstUser);
5212 Type *ExtTy = FirstUser->getType();
5213 for (const User *U : Val->users()) {
5214 const Instruction *UI = cast<Instruction>(U);
5215 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5216 return false;
5217 Type *CurTy = UI->getType();
5218 // Same input and output types: Same instruction after CSE.
5219 if (CurTy == ExtTy)
5220 continue;
5221
5222 // If IsSExt is true, we are in this situation:
5223 // a = Val
5224 // b = sext ty1 a to ty2
5225 // c = sext ty1 a to ty3
5226 // Assuming ty2 is shorter than ty3, this could be turned into:
5227 // a = Val
5228 // b = sext ty1 a to ty2
5229 // c = sext ty2 b to ty3
5230 // However, the last sext is not free.
5231 if (IsSExt)
5232 return false;
5233
5234 // This is a ZExt, maybe this is free to extend from one type to another.
5235 // In that case, we would not account for a different use.
5236 Type *NarrowTy;
5237 Type *LargeTy;
5238 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5239 CurTy->getScalarType()->getIntegerBitWidth()) {
5240 NarrowTy = CurTy;
5241 LargeTy = ExtTy;
5242 } else {
5243 NarrowTy = ExtTy;
5244 LargeTy = CurTy;
5245 }
5246
5247 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5248 return false;
5249 }
5250 // All uses are the same or can be derived from one another for free.
5251 return true;
5252}
5253
5254/// Try to speculatively promote extensions in \p Exts and continue
5255/// promoting through newly promoted operands recursively as far as doing so is
5256/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5257/// When some promotion happened, \p TPT contains the proper state to revert
5258/// them.
5259///
5260/// \return true if some promotion happened, false otherwise.
5261bool CodeGenPrepare::tryToPromoteExts(
5262 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5263 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5264 unsigned CreatedInstsCost) {
5265 bool Promoted = false;
5266
5267 // Iterate over all the extensions to try to promote them.
5268 for (auto I : Exts) {
5269 // Early check if we directly have ext(load).
5270 if (isa<LoadInst>(I->getOperand(0))) {
5271 ProfitablyMovedExts.push_back(I);
5272 continue;
5273 }
5274
5275 // Check whether or not we want to do any promotion. The reason we have
5276 // this check inside the for loop is to catch the case where an extension
5277 // is directly fed by a load because in such case the extension can be moved
5278 // up without any promotion on its operands.
5279 if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5280 return false;
5281
5282 // Get the action to perform the promotion.
5283 TypePromotionHelper::Action TPH =
5284 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5285 // Check if we can promote.
5286 if (!TPH) {
5287 // Save the current extension as we cannot move up through its operand.
5288 ProfitablyMovedExts.push_back(I);
5289 continue;
5290 }
5291
5292 // Save the current state.
5293 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5294 TPT.getRestorationPoint();
5295 SmallVector<Instruction *, 4> NewExts;
5296 unsigned NewCreatedInstsCost = 0;
5297 unsigned ExtCost = !TLI->isExtFree(I);
5298 // Promote.
5299 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5300 &NewExts, nullptr, *TLI);
5301 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5302, __PRETTY_FUNCTION__))
5302 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5302, __PRETTY_FUNCTION__))
;
5303
5304 // We would be able to merge only one extension in a load.
5305 // Therefore, if we have more than 1 new extension we heuristically
5306 // cut this search path, because it means we degrade the code quality.
5307 // With exactly 2, the transformation is neutral, because we will merge
5308 // one extension but leave one. However, we optimistically keep going,
5309 // because the new extension may be removed too.
5310 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5311 // FIXME: It would be possible to propagate a negative value instead of
5312 // conservatively ceiling it to 0.
5313 TotalCreatedInstsCost =
5314 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5315 if (!StressExtLdPromotion &&
5316 (TotalCreatedInstsCost > 1 ||
5317 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5318 // This promotion is not profitable, rollback to the previous state, and
5319 // save the current extension in ProfitablyMovedExts as the latest
5320 // speculative promotion turned out to be unprofitable.
5321 TPT.rollback(LastKnownGood);
5322 ProfitablyMovedExts.push_back(I);
5323 continue;
5324 }
5325 // Continue promoting NewExts as far as doing so is profitable.
5326 SmallVector<Instruction *, 2> NewlyMovedExts;
5327 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5328 bool NewPromoted = false;
5329 for (auto ExtInst : NewlyMovedExts) {
5330 Instruction *MovedExt = cast<Instruction>(ExtInst);
5331 Value *ExtOperand = MovedExt->getOperand(0);
5332 // If we have reached to a load, we need this extra profitability check
5333 // as it could potentially be merged into an ext(load).
5334 if (isa<LoadInst>(ExtOperand) &&
5335 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5336 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5337 continue;
5338
5339 ProfitablyMovedExts.push_back(MovedExt);
5340 NewPromoted = true;
5341 }
5342
5343 // If none of speculative promotions for NewExts is profitable, rollback
5344 // and save the current extension (I) as the last profitable extension.
5345 if (!NewPromoted) {
5346 TPT.rollback(LastKnownGood);
5347 ProfitablyMovedExts.push_back(I);
5348 continue;
5349 }
5350 // The promotion is profitable.
5351 Promoted = true;
5352 }
5353 return Promoted;
5354}
5355
5356/// Merging redundant sexts when one is dominating the other.
5357bool CodeGenPrepare::mergeSExts(Function &F) {
5358 bool Changed = false;
5359 for (auto &Entry : ValToSExtendedUses) {
5360 SExts &Insts = Entry.second;
5361 SExts CurPts;
5362 for (Instruction *Inst : Insts) {
5363 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5364 Inst->getOperand(0) != Entry.first)
5365 continue;
5366 bool inserted = false;
5367 for (auto &Pt : CurPts) {
5368 if (getDT(F).dominates(Inst, Pt)) {
5369 Pt->replaceAllUsesWith(Inst);
5370 RemovedInsts.insert(Pt);
5371 Pt->removeFromParent();
5372 Pt = Inst;
5373 inserted = true;
5374 Changed = true;
5375 break;
5376 }
5377 if (!getDT(F).dominates(Pt, Inst))
5378 // Give up if we need to merge in a common dominator as the
5379 // experiments show it is not profitable.
5380 continue;
5381 Inst->replaceAllUsesWith(Pt);
5382 RemovedInsts.insert(Inst);
5383 Inst->removeFromParent();
5384 inserted = true;
5385 Changed = true;
5386 break;
5387 }
5388 if (!inserted)
5389 CurPts.push_back(Inst);
5390 }
5391 }
5392 return Changed;
5393}
5394
5395// Spliting large data structures so that the GEPs accessing them can have
5396// smaller offsets so that they can be sunk to the same blocks as their users.
5397// For example, a large struct starting from %base is splitted into two parts
5398// where the second part starts from %new_base.
5399//
5400// Before:
5401// BB0:
5402// %base =
5403//
5404// BB1:
5405// %gep0 = gep %base, off0
5406// %gep1 = gep %base, off1
5407// %gep2 = gep %base, off2
5408//
5409// BB2:
5410// %load1 = load %gep0
5411// %load2 = load %gep1
5412// %load3 = load %gep2
5413//
5414// After:
5415// BB0:
5416// %base =
5417// %new_base = gep %base, off0
5418//
5419// BB1:
5420// %new_gep0 = %new_base
5421// %new_gep1 = gep %new_base, off1 - off0
5422// %new_gep2 = gep %new_base, off2 - off0
5423//
5424// BB2:
5425// %load1 = load i32, i32* %new_gep0
5426// %load2 = load i32, i32* %new_gep1
5427// %load3 = load i32, i32* %new_gep2
5428//
5429// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5430// their offsets are smaller enough to fit into the addressing mode.
5431bool CodeGenPrepare::splitLargeGEPOffsets() {
5432 bool Changed = false;
5433 for (auto &Entry : LargeOffsetGEPMap) {
5434 Value *OldBase = Entry.first;
5435 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5436 &LargeOffsetGEPs = Entry.second;
5437 auto compareGEPOffset =
5438 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5439 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5440 if (LHS.first == RHS.first)
5441 return false;
5442 if (LHS.second != RHS.second)
5443 return LHS.second < RHS.second;
5444 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5445 };
5446 // Sorting all the GEPs of the same data structures based on the offsets.
5447 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5448 LargeOffsetGEPs.erase(
5449 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5450 LargeOffsetGEPs.end());
5451 // Skip if all the GEPs have the same offsets.
5452 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5453 continue;
5454 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5455 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5456 Value *NewBaseGEP = nullptr;
5457
5458 auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5459 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5460 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5461 int64_t Offset = LargeOffsetGEP->second;
5462 if (Offset != BaseOffset) {
5463 TargetLowering::AddrMode AddrMode;
5464 AddrMode.BaseOffs = Offset - BaseOffset;
5465 // The result type of the GEP might not be the type of the memory
5466 // access.
5467 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5468 GEP->getResultElementType(),
5469 GEP->getAddressSpace())) {
5470 // We need to create a new base if the offset to the current base is
5471 // too large to fit into the addressing mode. So, a very large struct
5472 // may be splitted into several parts.
5473 BaseGEP = GEP;
5474 BaseOffset = Offset;
5475 NewBaseGEP = nullptr;
5476 }
5477 }
5478
5479 // Generate a new GEP to replace the current one.
5480 LLVMContext &Ctx = GEP->getContext();
5481 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5482 Type *I8PtrTy =
5483 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5484 Type *I8Ty = Type::getInt8Ty(Ctx);
5485
5486 if (!NewBaseGEP) {
5487 // Create a new base if we don't have one yet. Find the insertion
5488 // pointer for the new base first.
5489 BasicBlock::iterator NewBaseInsertPt;
5490 BasicBlock *NewBaseInsertBB;
5491 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5492 // If the base of the struct is an instruction, the new base will be
5493 // inserted close to it.
5494 NewBaseInsertBB = BaseI->getParent();
5495 if (isa<PHINode>(BaseI))
5496 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5497 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5498 NewBaseInsertBB =
5499 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5500 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5501 } else
5502 NewBaseInsertPt = std::next(BaseI->getIterator());
5503 } else {
5504 // If the current base is an argument or global value, the new base
5505 // will be inserted to the entry block.
5506 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5507 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5508 }
5509 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5510 // Create a new base.
5511 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5512 NewBaseGEP = OldBase;
5513 if (NewBaseGEP->getType() != I8PtrTy)
5514 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5515 NewBaseGEP =
5516 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5517 NewGEPBases.insert(NewBaseGEP);
5518 }
5519
5520 IRBuilder<> Builder(GEP);
5521 Value *NewGEP = NewBaseGEP;
5522 if (Offset == BaseOffset) {
5523 if (GEP->getType() != I8PtrTy)
5524 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5525 } else {
5526 // Calculate the new offset for the new GEP.
5527 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5528 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5529
5530 if (GEP->getType() != I8PtrTy)
5531 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5532 }
5533 GEP->replaceAllUsesWith(NewGEP);
5534 LargeOffsetGEPID.erase(GEP);
5535 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5536 GEP->eraseFromParent();
5537 Changed = true;
5538 }
5539 }
5540 return Changed;
5541}
5542
5543/// Return true, if an ext(load) can be formed from an extension in
5544/// \p MovedExts.
5545bool CodeGenPrepare::canFormExtLd(
5546 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5547 Instruction *&Inst, bool HasPromoted) {
5548 for (auto *MovedExtInst : MovedExts) {
5549 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5550 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5551 Inst = MovedExtInst;
5552 break;
5553 }
5554 }
5555 if (!LI)
5556 return false;
5557
5558 // If they're already in the same block, there's nothing to do.
5559 // Make the cheap checks first if we did not promote.
5560 // If we promoted, we need to check if it is indeed profitable.
5561 if (!HasPromoted && LI->getParent() == Inst->getParent())
5562 return false;
5563
5564 return TLI->isExtLoad(LI, Inst, *DL);
5565}
5566
5567/// Move a zext or sext fed by a load into the same basic block as the load,
5568/// unless conditions are unfavorable. This allows SelectionDAG to fold the
5569/// extend into the load.
5570///
5571/// E.g.,
5572/// \code
5573/// %ld = load i32* %addr
5574/// %add = add nuw i32 %ld, 4
5575/// %zext = zext i32 %add to i64
5576// \endcode
5577/// =>
5578/// \code
5579/// %ld = load i32* %addr
5580/// %zext = zext i32 %ld to i64
5581/// %add = add nuw i64 %zext, 4
5582/// \encode
5583/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5584/// allow us to match zext(load i32*) to i64.
5585///
5586/// Also, try to promote the computations used to obtain a sign extended
5587/// value used into memory accesses.
5588/// E.g.,
5589/// \code
5590/// a = add nsw i32 b, 3
5591/// d = sext i32 a to i64
5592/// e = getelementptr ..., i64 d
5593/// \endcode
5594/// =>
5595/// \code
5596/// f = sext i32 b to i64
5597/// a = add nsw i64 f, 3
5598/// e = getelementptr ..., i64 a
5599/// \endcode
5600///
5601/// \p Inst[in/out] the extension may be modified during the process if some
5602/// promotions apply.
5603bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5604 bool AllowPromotionWithoutCommonHeader = false;
5605 /// See if it is an interesting sext operations for the address type
5606 /// promotion before trying to promote it, e.g., the ones with the right
5607 /// type and used in memory accesses.
5608 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5609 *Inst, AllowPromotionWithoutCommonHeader);
5610 TypePromotionTransaction TPT(RemovedInsts);
5611 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5612 TPT.getRestorationPoint();
5613 SmallVector<Instruction *, 1> Exts;
5614 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5615 Exts.push_back(Inst);
5616
5617 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5618
5619 // Look for a load being extended.
5620 LoadInst *LI = nullptr;
5621 Instruction *ExtFedByLoad;
5622
5623 // Try to promote a chain of computation if it allows to form an extended
5624 // load.
5625 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5626 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5626, __PRETTY_FUNCTION__))
;
5627 TPT.commit();
5628 // Move the extend into the same block as the load
5629 ExtFedByLoad->moveAfter(LI);
5630 // CGP does not check if the zext would be speculatively executed when moved
5631 // to the same basic block as the load. Preserving its original location
5632 // would pessimize the debugging experience, as well as negatively impact
5633 // the quality of sample pgo. We don't want to use "line 0" as that has a
5634 // size cost in the line-table section and logically the zext can be seen as
5635 // part of the load. Therefore we conservatively reuse the same debug
5636 // location for the load and the zext.
5637 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5638 ++NumExtsMoved;
5639 Inst = ExtFedByLoad;
5640 return true;
5641 }
5642
5643 // Continue promoting SExts if known as considerable depending on targets.
5644 if (ATPConsiderable &&
5645 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5646 HasPromoted, TPT, SpeculativelyMovedExts))
5647 return true;
5648
5649 TPT.rollback(LastKnownGood);
5650 return false;
5651}
5652
5653// Perform address type promotion if doing so is profitable.
5654// If AllowPromotionWithoutCommonHeader == false, we should find other sext
5655// instructions that sign extended the same initial value. However, if
5656// AllowPromotionWithoutCommonHeader == true, we expect promoting the
5657// extension is just profitable.
5658bool CodeGenPrepare::performAddressTypePromotion(
5659 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5660 bool HasPromoted, TypePromotionTransaction &TPT,
5661 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5662 bool Promoted = false;
5663 SmallPtrSet<Instruction *, 1> UnhandledExts;
5664 bool AllSeenFirst = true;
5665 for (auto I : SpeculativelyMovedExts) {
5666 Value *HeadOfChain = I->getOperand(0);
5667 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5668 SeenChainsForSExt.find(HeadOfChain);
5669 // If there is an unhandled SExt which has the same header, try to promote
5670 // it as well.
5671 if (AlreadySeen != SeenChainsForSExt.end()) {
5672 if (AlreadySeen->second != nullptr)
5673 UnhandledExts.insert(AlreadySeen->second);
5674 AllSeenFirst = false;
5675 }
5676 }
5677
5678 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5679 SpeculativelyMovedExts.size() == 1)) {
5680 TPT.commit();
5681 if (HasPromoted)
5682 Promoted = true;
5683 for (auto I : SpeculativelyMovedExts) {
5684 Value *HeadOfChain = I->getOperand(0);
5685 SeenChainsForSExt[HeadOfChain] = nullptr;
5686 ValToSExtendedUses[HeadOfChain].push_back(I);
5687 }
5688 // Update Inst as promotion happen.
5689 Inst = SpeculativelyMovedExts.pop_back_val();
5690 } else {
5691 // This is the first chain visited from the header, keep the current chain
5692 // as unhandled. Defer to promote this until we encounter another SExt
5693 // chain derived from the same header.
5694 for (auto I : SpeculativelyMovedExts) {
5695 Value *HeadOfChain = I->getOperand(0);
5696 SeenChainsForSExt[HeadOfChain] = Inst;
5697 }
5698 return false;
5699 }
5700
5701 if (!AllSeenFirst && !UnhandledExts.empty())
5702 for (auto VisitedSExt : UnhandledExts) {
5703 if (RemovedInsts.count(VisitedSExt))
5704 continue;
5705 TypePromotionTransaction TPT(RemovedInsts);
5706 SmallVector<Instruction *, 1> Exts;
5707 SmallVector<Instruction *, 2> Chains;
5708 Exts.push_back(VisitedSExt);
5709 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5710 TPT.commit();
5711 if (HasPromoted)
5712 Promoted = true;
5713 for (auto I : Chains) {
5714 Value *HeadOfChain = I->getOperand(0);
5715 // Mark this as handled.
5716 SeenChainsForSExt[HeadOfChain] = nullptr;
5717 ValToSExtendedUses[HeadOfChain].push_back(I);
5718 }
5719 }
5720 return Promoted;
5721}
5722
5723bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5724 BasicBlock *DefBB = I->getParent();
5725
5726 // If the result of a {s|z}ext and its source are both live out, rewrite all
5727 // other uses of the source with result of extension.
5728 Value *Src = I->getOperand(0);
5729 if (Src->hasOneUse())
5730 return false;
5731
5732 // Only do this xform if truncating is free.
5733 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
5734 return false;
5735
5736 // Only safe to perform the optimization if the source is also defined in
5737 // this block.
5738 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5739 return false;
5740
5741 bool DefIsLiveOut = false;
5742 for (User *U : I->users()) {
5743 Instruction *UI = cast<Instruction>(U);
5744
5745 // Figure out which BB this ext is used in.
5746 BasicBlock *UserBB = UI->getParent();
5747 if (UserBB == DefBB) continue;
5748 DefIsLiveOut = true;
5749 break;
5750 }
5751 if (!DefIsLiveOut)
5752 return false;
5753
5754 // Make sure none of the uses are PHI nodes.
5755 for (User *U : Src->users()) {
5756 Instruction *UI = cast<Instruction>(U);
5757 BasicBlock *UserBB = UI->getParent();
5758 if (UserBB == DefBB) continue;
5759 // Be conservative. We don't want this xform to end up introducing
5760 // reloads just before load / store instructions.
5761 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5762 return false;
5763 }
5764
5765 // InsertedTruncs - Only insert one trunc in each block once.
5766 DenseMap<BasicBlock*, Instruction*