Bug Summary

File:llvm/lib/CodeGen/CodeGenPrepare.cpp
Warning:line 3150, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/CodeGen/Analysis.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/SelectionDAGNodes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/CodeGen/ValueTypes.h"
42#include "llvm/Config/llvm-config.h"
43#include "llvm/IR/Argument.h"
44#include "llvm/IR/Attributes.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/CallSite.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DerivedTypes.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/GlobalVariable.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/InlineAsm.h"
58#include "llvm/IR/InstrTypes.h"
59#include "llvm/IR/Instruction.h"
60#include "llvm/IR/Instructions.h"
61#include "llvm/IR/IntrinsicInst.h"
62#include "llvm/IR/Intrinsics.h"
63#include "llvm/IR/IntrinsicsAArch64.h"
64#include "llvm/IR/IntrinsicsX86.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/MDBuilder.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PatternMatch.h"
70#include "llvm/IR/Statepoint.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
75#include "llvm/IR/ValueHandle.h"
76#include "llvm/IR/ValueMap.h"
77#include "llvm/InitializePasses.h"
78#include "llvm/Pass.h"
79#include "llvm/Support/BlockFrequency.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CommandLine.h"
83#include "llvm/Support/Compiler.h"
84#include "llvm/Support/Debug.h"
85#include "llvm/Support/ErrorHandling.h"
86#include "llvm/Support/MachineValueType.h"
87#include "llvm/Support/MathExtras.h"
88#include "llvm/Support/raw_ostream.h"
89#include "llvm/Target/TargetMachine.h"
90#include "llvm/Target/TargetOptions.h"
91#include "llvm/Transforms/Utils/BasicBlockUtils.h"
92#include "llvm/Transforms/Utils/BypassSlowDivision.h"
93#include "llvm/Transforms/Utils/Local.h"
94#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
95#include "llvm/Transforms/Utils/SizeOpts.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <utility>
103#include <vector>
104
105using namespace llvm;
106using namespace llvm::PatternMatch;
107
108#define DEBUG_TYPE"codegenprepare" "codegenprepare"
109
110STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated"}
;
111STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated"}
;
112STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts"}
;
113STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
114 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
;
115STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
116 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
;
117STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
118 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
;
119STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
120 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
121 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
;
122STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
123 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
124 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
;
125STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads"}
;
126STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized"}
;
127STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
128 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
;
129STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized"}
;
130STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated"}
;
131STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved"}
;
132STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
}
;
133STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
}
;
134
135static cl::opt<bool> DisableBranchOpts(
136 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
137 cl::desc("Disable branch optimizations in CodeGenPrepare"));
138
139static cl::opt<bool>
140 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
141 cl::desc("Disable GC optimizations in CodeGenPrepare"));
142
143static cl::opt<bool> DisableSelectToBranch(
144 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
145 cl::desc("Disable select to branch conversion."));
146
147static cl::opt<bool> AddrSinkUsingGEPs(
148 "addr-sink-using-gep", cl::Hidden, cl::init(true),
149 cl::desc("Address sinking in CGP using GEPs."));
150
151static cl::opt<bool> EnableAndCmpSinking(
152 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
153 cl::desc("Enable sinkinig and/cmp into branches."));
154
155static cl::opt<bool> DisableStoreExtract(
156 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
157 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
158
159static cl::opt<bool> StressStoreExtract(
160 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
161 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
162
163static cl::opt<bool> DisableExtLdPromotion(
164 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
165 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
166 "CodeGenPrepare"));
167
168static cl::opt<bool> StressExtLdPromotion(
169 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
170 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
171 "optimization in CodeGenPrepare"));
172
173static cl::opt<bool> DisablePreheaderProtect(
174 "disable-preheader-prot", cl::Hidden, cl::init(false),
175 cl::desc("Disable protection against removing loop preheaders"));
176
177static cl::opt<bool> ProfileGuidedSectionPrefix(
178 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
179 cl::desc("Use profile info to add section prefix for hot/cold functions"));
180
181static cl::opt<unsigned> FreqRatioToSkipMerge(
182 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
183 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
184 "(frequency of destination block) is greater than this ratio"));
185
186static cl::opt<bool> ForceSplitStore(
187 "force-split-store", cl::Hidden, cl::init(false),
188 cl::desc("Force store splitting no matter what the target query says."));
189
190static cl::opt<bool>
191EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
192 cl::desc("Enable merging of redundant sexts when one is dominating"
193 " the other."), cl::init(true));
194
195static cl::opt<bool> DisableComplexAddrModes(
196 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
197 cl::desc("Disables combining addressing modes with different parts "
198 "in optimizeMemoryInst."));
199
200static cl::opt<bool>
201AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
202 cl::desc("Allow creation of Phis in Address sinking."));
203
204static cl::opt<bool>
205AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
206 cl::desc("Allow creation of selects in Address sinking."));
207
208static cl::opt<bool> AddrSinkCombineBaseReg(
209 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
210 cl::desc("Allow combining of BaseReg field in Address sinking."));
211
212static cl::opt<bool> AddrSinkCombineBaseGV(
213 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
214 cl::desc("Allow combining of BaseGV field in Address sinking."));
215
216static cl::opt<bool> AddrSinkCombineBaseOffs(
217 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
218 cl::desc("Allow combining of BaseOffs field in Address sinking."));
219
220static cl::opt<bool> AddrSinkCombineScaledReg(
221 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
222 cl::desc("Allow combining of ScaledReg field in Address sinking."));
223
224static cl::opt<bool>
225 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
226 cl::init(true),
227 cl::desc("Enable splitting large offset of GEP."));
228
229static cl::opt<bool> EnableICMP_EQToICMP_ST(
230 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
231 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
232
233namespace {
234
235enum ExtType {
236 ZeroExtension, // Zero extension has been seen.
237 SignExtension, // Sign extension has been seen.
238 BothExtension // This extension type is used if we saw sext after
239 // ZeroExtension had been set, or if we saw zext after
240 // SignExtension had been set. It makes the type
241 // information of a promoted instruction invalid.
242};
243
244using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
245using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
246using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
247using SExts = SmallVector<Instruction *, 16>;
248using ValueToSExts = DenseMap<Value *, SExts>;
249
250class TypePromotionTransaction;
251
252 class CodeGenPrepare : public FunctionPass {
253 const TargetMachine *TM = nullptr;
254 const TargetSubtargetInfo *SubtargetInfo;
255 const TargetLowering *TLI = nullptr;
256 const TargetRegisterInfo *TRI;
257 const TargetTransformInfo *TTI = nullptr;
258 const TargetLibraryInfo *TLInfo;
259 const LoopInfo *LI;
260 std::unique_ptr<BlockFrequencyInfo> BFI;
261 std::unique_ptr<BranchProbabilityInfo> BPI;
262 ProfileSummaryInfo *PSI;
263
264 /// As we scan instructions optimizing them, this is the next instruction
265 /// to optimize. Transforms that can invalidate this should update it.
266 BasicBlock::iterator CurInstIterator;
267
268 /// Keeps track of non-local addresses that have been sunk into a block.
269 /// This allows us to avoid inserting duplicate code for blocks with
270 /// multiple load/stores of the same address. The usage of WeakTrackingVH
271 /// enables SunkAddrs to be treated as a cache whose entries can be
272 /// invalidated if a sunken address computation has been erased.
273 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
274
275 /// Keeps track of all instructions inserted for the current function.
276 SetOfInstrs InsertedInsts;
277
278 /// Keeps track of the type of the related instruction before their
279 /// promotion for the current function.
280 InstrToOrigTy PromotedInsts;
281
282 /// Keep track of instructions removed during promotion.
283 SetOfInstrs RemovedInsts;
284
285 /// Keep track of sext chains based on their initial value.
286 DenseMap<Value *, Instruction *> SeenChainsForSExt;
287
288 /// Keep track of GEPs accessing the same data structures such as structs or
289 /// arrays that are candidates to be split later because of their large
290 /// size.
291 MapVector<
292 AssertingVH<Value>,
293 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
294 LargeOffsetGEPMap;
295
296 /// Keep track of new GEP base after splitting the GEPs having large offset.
297 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
298
299 /// Map serial numbers to Large offset GEPs.
300 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
301
302 /// Keep track of SExt promoted.
303 ValueToSExts ValToSExtendedUses;
304
305 /// True if the function has the OptSize attribute.
306 bool OptSize;
307
308 /// DataLayout for the Function being processed.
309 const DataLayout *DL = nullptr;
310
311 /// Building the dominator tree can be expensive, so we only build it
312 /// lazily and update it when required.
313 std::unique_ptr<DominatorTree> DT;
314
315 public:
316 static char ID; // Pass identification, replacement for typeid
317
318 CodeGenPrepare() : FunctionPass(ID) {
319 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
320 }
321
322 bool runOnFunction(Function &F) override;
323
324 StringRef getPassName() const override { return "CodeGen Prepare"; }
325
326 void getAnalysisUsage(AnalysisUsage &AU) const override {
327 // FIXME: When we can selectively preserve passes, preserve the domtree.
328 AU.addRequired<ProfileSummaryInfoWrapperPass>();
329 AU.addRequired<TargetLibraryInfoWrapperPass>();
330 AU.addRequired<TargetTransformInfoWrapperPass>();
331 AU.addRequired<LoopInfoWrapperPass>();
332 }
333
334 private:
335 template <typename F>
336 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
337 // Substituting can cause recursive simplifications, which can invalidate
338 // our iterator. Use a WeakTrackingVH to hold onto it in case this
339 // happens.
340 Value *CurValue = &*CurInstIterator;
341 WeakTrackingVH IterHandle(CurValue);
342
343 f();
344
345 // If the iterator instruction was recursively deleted, start over at the
346 // start of the block.
347 if (IterHandle != CurValue) {
348 CurInstIterator = BB->begin();
349 SunkAddrs.clear();
350 }
351 }
352
353 // Get the DominatorTree, building if necessary.
354 DominatorTree &getDT(Function &F) {
355 if (!DT)
356 DT = std::make_unique<DominatorTree>(F);
357 return *DT;
358 }
359
360 bool eliminateFallThrough(Function &F);
361 bool eliminateMostlyEmptyBlocks(Function &F);
362 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
363 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
364 void eliminateMostlyEmptyBlock(BasicBlock *BB);
365 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
366 bool isPreheader);
367 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
368 bool optimizeInst(Instruction *I, bool &ModifiedDT);
369 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
370 Type *AccessTy, unsigned AddrSpace);
371 bool optimizeInlineAsmInst(CallInst *CS);
372 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
373 bool optimizeExt(Instruction *&I);
374 bool optimizeExtUses(Instruction *I);
375 bool optimizeLoadExt(LoadInst *Load);
376 bool optimizeShiftInst(BinaryOperator *BO);
377 bool optimizeSelectInst(SelectInst *SI);
378 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
379 bool optimizeSwitchInst(SwitchInst *SI);
380 bool optimizeExtractElementInst(Instruction *Inst);
381 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
382 bool fixupDbgValue(Instruction *I);
383 bool placeDbgValues(Function &F);
384 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
385 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
386 bool tryToPromoteExts(TypePromotionTransaction &TPT,
387 const SmallVectorImpl<Instruction *> &Exts,
388 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
389 unsigned CreatedInstsCost = 0);
390 bool mergeSExts(Function &F);
391 bool splitLargeGEPOffsets();
392 bool performAddressTypePromotion(
393 Instruction *&Inst,
394 bool AllowPromotionWithoutCommonHeader,
395 bool HasPromoted, TypePromotionTransaction &TPT,
396 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
397 bool splitBranchCondition(Function &F, bool &ModifiedDT);
398 bool simplifyOffsetableRelocate(Instruction &I);
399
400 bool tryToSinkFreeOperands(Instruction *I);
401 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
402 Intrinsic::ID IID);
403 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
404 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
405 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
406 };
407
408} // end anonymous namespace
409
410char CodeGenPrepare::ID = 0;
411
412INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
413 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
414INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
415INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
416 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
417
418FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
419
420bool CodeGenPrepare::runOnFunction(Function &F) {
421 if (skipFunction(F))
422 return false;
423
424 DL = &F.getParent()->getDataLayout();
425
426 bool EverMadeChange = false;
427 // Clear per function information.
428 InsertedInsts.clear();
429 PromotedInsts.clear();
430
431 if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
432 TM = &TPC->getTM<TargetMachine>();
433 SubtargetInfo = TM->getSubtargetImpl(F);
434 TLI = SubtargetInfo->getTargetLowering();
435 TRI = SubtargetInfo->getRegisterInfo();
436 }
437 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
438 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
439 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
440 BPI.reset(new BranchProbabilityInfo(F, *LI));
441 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
442 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
443 OptSize = F.hasOptSize();
444 if (ProfileGuidedSectionPrefix) {
445 if (PSI->isFunctionHotInCallGraph(&F, *BFI))
446 F.setSectionPrefix(".hot");
447 else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
448 F.setSectionPrefix(".unlikely");
449 }
450
451 /// This optimization identifies DIV instructions that can be
452 /// profitably bypassed and carried out with a shorter, faster divide.
453 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
454 TLI->isSlowDivBypassed()) {
455 const DenseMap<unsigned int, unsigned int> &BypassWidths =
456 TLI->getBypassSlowDivWidths();
457 BasicBlock* BB = &*F.begin();
458 while (BB != nullptr) {
459 // bypassSlowDivision may create new BBs, but we don't want to reapply the
460 // optimization to those blocks.
461 BasicBlock* Next = BB->getNextNode();
462 // F.hasOptSize is already checked in the outer if statement.
463 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
464 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
465 BB = Next;
466 }
467 }
468
469 // Eliminate blocks that contain only PHI nodes and an
470 // unconditional branch.
471 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
472
473 bool ModifiedDT = false;
474 if (!DisableBranchOpts)
475 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
476
477 // Split some critical edges where one of the sources is an indirect branch,
478 // to help generate sane code for PHIs involving such edges.
479 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
480
481 bool MadeChange = true;
482 while (MadeChange) {
483 MadeChange = false;
484 DT.reset();
485 for (Function::iterator I = F.begin(); I != F.end(); ) {
486 BasicBlock *BB = &*I++;
487 bool ModifiedDTOnIteration = false;
488 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
489
490 // Restart BB iteration if the dominator tree of the Function was changed
491 if (ModifiedDTOnIteration)
492 break;
493 }
494 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
495 MadeChange |= mergeSExts(F);
496 if (!LargeOffsetGEPMap.empty())
497 MadeChange |= splitLargeGEPOffsets();
498
499 // Really free removed instructions during promotion.
500 for (Instruction *I : RemovedInsts)
501 I->deleteValue();
502
503 EverMadeChange |= MadeChange;
504 SeenChainsForSExt.clear();
505 ValToSExtendedUses.clear();
506 RemovedInsts.clear();
507 LargeOffsetGEPMap.clear();
508 LargeOffsetGEPID.clear();
509 }
510
511 SunkAddrs.clear();
512
513 if (!DisableBranchOpts) {
514 MadeChange = false;
515 // Use a set vector to get deterministic iteration order. The order the
516 // blocks are removed may affect whether or not PHI nodes in successors
517 // are removed.
518 SmallSetVector<BasicBlock*, 8> WorkList;
519 for (BasicBlock &BB : F) {
520 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
521 MadeChange |= ConstantFoldTerminator(&BB, true);
522 if (!MadeChange) continue;
523
524 for (SmallVectorImpl<BasicBlock*>::iterator
525 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
526 if (pred_begin(*II) == pred_end(*II))
527 WorkList.insert(*II);
528 }
529
530 // Delete the dead blocks and any of their dead successors.
531 MadeChange |= !WorkList.empty();
532 while (!WorkList.empty()) {
533 BasicBlock *BB = WorkList.pop_back_val();
534 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
535
536 DeleteDeadBlock(BB);
537
538 for (SmallVectorImpl<BasicBlock*>::iterator
539 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
540 if (pred_begin(*II) == pred_end(*II))
541 WorkList.insert(*II);
542 }
543
544 // Merge pairs of basic blocks with unconditional branches, connected by
545 // a single edge.
546 if (EverMadeChange || MadeChange)
547 MadeChange |= eliminateFallThrough(F);
548
549 EverMadeChange |= MadeChange;
550 }
551
552 if (!DisableGCOpts) {
553 SmallVector<Instruction *, 2> Statepoints;
554 for (BasicBlock &BB : F)
555 for (Instruction &I : BB)
556 if (isStatepoint(I))
557 Statepoints.push_back(&I);
558 for (auto &I : Statepoints)
559 EverMadeChange |= simplifyOffsetableRelocate(*I);
560 }
561
562 // Do this last to clean up use-before-def scenarios introduced by other
563 // preparatory transforms.
564 EverMadeChange |= placeDbgValues(F);
565
566 return EverMadeChange;
567}
568
569/// Merge basic blocks which are connected by a single edge, where one of the
570/// basic blocks has a single successor pointing to the other basic block,
571/// which has a single predecessor.
572bool CodeGenPrepare::eliminateFallThrough(Function &F) {
573 bool Changed = false;
574 // Scan all of the blocks in the function, except for the entry block.
575 // Use a temporary array to avoid iterator being invalidated when
576 // deleting blocks.
577 SmallVector<WeakTrackingVH, 16> Blocks;
578 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
579 Blocks.push_back(&Block);
580
581 for (auto &Block : Blocks) {
582 auto *BB = cast_or_null<BasicBlock>(Block);
583 if (!BB)
584 continue;
585 // If the destination block has a single pred, then this is a trivial
586 // edge, just collapse it.
587 BasicBlock *SinglePred = BB->getSinglePredecessor();
588
589 // Don't merge if BB's address is taken.
590 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
591
592 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
593 if (Term && !Term->isConditional()) {
594 Changed = true;
595 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
596
597 // Merge BB into SinglePred and delete it.
598 MergeBlockIntoPredecessor(BB);
599 }
600 }
601 return Changed;
602}
603
604/// Find a destination block from BB if BB is mergeable empty block.
605BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
606 // If this block doesn't end with an uncond branch, ignore it.
607 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
608 if (!BI || !BI->isUnconditional())
609 return nullptr;
610
611 // If the instruction before the branch (skipping debug info) isn't a phi
612 // node, then other stuff is happening here.
613 BasicBlock::iterator BBI = BI->getIterator();
614 if (BBI != BB->begin()) {
615 --BBI;
616 while (isa<DbgInfoIntrinsic>(BBI)) {
617 if (BBI == BB->begin())
618 break;
619 --BBI;
620 }
621 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
622 return nullptr;
623 }
624
625 // Do not break infinite loops.
626 BasicBlock *DestBB = BI->getSuccessor(0);
627 if (DestBB == BB)
628 return nullptr;
629
630 if (!canMergeBlocks(BB, DestBB))
631 DestBB = nullptr;
632
633 return DestBB;
634}
635
636/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
637/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
638/// edges in ways that are non-optimal for isel. Start by eliminating these
639/// blocks so we can split them the way we want them.
640bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
641 SmallPtrSet<BasicBlock *, 16> Preheaders;
642 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
643 while (!LoopList.empty()) {
644 Loop *L = LoopList.pop_back_val();
645 LoopList.insert(LoopList.end(), L->begin(), L->end());
646 if (BasicBlock *Preheader = L->getLoopPreheader())
647 Preheaders.insert(Preheader);
648 }
649
650 bool MadeChange = false;
651 // Copy blocks into a temporary array to avoid iterator invalidation issues
652 // as we remove them.
653 // Note that this intentionally skips the entry block.
654 SmallVector<WeakTrackingVH, 16> Blocks;
655 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
656 Blocks.push_back(&Block);
657
658 for (auto &Block : Blocks) {
659 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
660 if (!BB)
661 continue;
662 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
663 if (!DestBB ||
664 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
665 continue;
666
667 eliminateMostlyEmptyBlock(BB);
668 MadeChange = true;
669 }
670 return MadeChange;
671}
672
673bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
674 BasicBlock *DestBB,
675 bool isPreheader) {
676 // Do not delete loop preheaders if doing so would create a critical edge.
677 // Loop preheaders can be good locations to spill registers. If the
678 // preheader is deleted and we create a critical edge, registers may be
679 // spilled in the loop body instead.
680 if (!DisablePreheaderProtect && isPreheader &&
681 !(BB->getSinglePredecessor() &&
682 BB->getSinglePredecessor()->getSingleSuccessor()))
683 return false;
684
685 // Skip merging if the block's successor is also a successor to any callbr
686 // that leads to this block.
687 // FIXME: Is this really needed? Is this a correctness issue?
688 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
689 if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
690 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
691 if (DestBB == CBI->getSuccessor(i))
692 return false;
693 }
694
695 // Try to skip merging if the unique predecessor of BB is terminated by a
696 // switch or indirect branch instruction, and BB is used as an incoming block
697 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
698 // add COPY instructions in the predecessor of BB instead of BB (if it is not
699 // merged). Note that the critical edge created by merging such blocks wont be
700 // split in MachineSink because the jump table is not analyzable. By keeping
701 // such empty block (BB), ISel will place COPY instructions in BB, not in the
702 // predecessor of BB.
703 BasicBlock *Pred = BB->getUniquePredecessor();
704 if (!Pred ||
705 !(isa<SwitchInst>(Pred->getTerminator()) ||
706 isa<IndirectBrInst>(Pred->getTerminator())))
707 return true;
708
709 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
710 return true;
711
712 // We use a simple cost heuristic which determine skipping merging is
713 // profitable if the cost of skipping merging is less than the cost of
714 // merging : Cost(skipping merging) < Cost(merging BB), where the
715 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
716 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
717 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
718 // Freq(Pred) / Freq(BB) > 2.
719 // Note that if there are multiple empty blocks sharing the same incoming
720 // value for the PHIs in the DestBB, we consider them together. In such
721 // case, Cost(merging BB) will be the sum of their frequencies.
722
723 if (!isa<PHINode>(DestBB->begin()))
724 return true;
725
726 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
727
728 // Find all other incoming blocks from which incoming values of all PHIs in
729 // DestBB are the same as the ones from BB.
730 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
731 ++PI) {
732 BasicBlock *DestBBPred = *PI;
733 if (DestBBPred == BB)
734 continue;
735
736 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
737 return DestPN.getIncomingValueForBlock(BB) ==
738 DestPN.getIncomingValueForBlock(DestBBPred);
739 }))
740 SameIncomingValueBBs.insert(DestBBPred);
741 }
742
743 // See if all BB's incoming values are same as the value from Pred. In this
744 // case, no reason to skip merging because COPYs are expected to be place in
745 // Pred already.
746 if (SameIncomingValueBBs.count(Pred))
747 return true;
748
749 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
750 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
751
752 for (auto SameValueBB : SameIncomingValueBBs)
753 if (SameValueBB->getUniquePredecessor() == Pred &&
754 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
755 BBFreq += BFI->getBlockFreq(SameValueBB);
756
757 return PredFreq.getFrequency() <=
758 BBFreq.getFrequency() * FreqRatioToSkipMerge;
759}
760
761/// Return true if we can merge BB into DestBB if there is a single
762/// unconditional branch between them, and BB contains no other non-phi
763/// instructions.
764bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
765 const BasicBlock *DestBB) const {
766 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
767 // the successor. If there are more complex condition (e.g. preheaders),
768 // don't mess around with them.
769 for (const PHINode &PN : BB->phis()) {
770 for (const User *U : PN.users()) {
771 const Instruction *UI = cast<Instruction>(U);
772 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
773 return false;
774 // If User is inside DestBB block and it is a PHINode then check
775 // incoming value. If incoming value is not from BB then this is
776 // a complex condition (e.g. preheaders) we want to avoid here.
777 if (UI->getParent() == DestBB) {
778 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
779 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
780 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
781 if (Insn && Insn->getParent() == BB &&
782 Insn->getParent() != UPN->getIncomingBlock(I))
783 return false;
784 }
785 }
786 }
787 }
788
789 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
790 // and DestBB may have conflicting incoming values for the block. If so, we
791 // can't merge the block.
792 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
793 if (!DestBBPN) return true; // no conflict.
794
795 // Collect the preds of BB.
796 SmallPtrSet<const BasicBlock*, 16> BBPreds;
797 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
798 // It is faster to get preds from a PHI than with pred_iterator.
799 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
800 BBPreds.insert(BBPN->getIncomingBlock(i));
801 } else {
802 BBPreds.insert(pred_begin(BB), pred_end(BB));
803 }
804
805 // Walk the preds of DestBB.
806 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
807 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
808 if (BBPreds.count(Pred)) { // Common predecessor?
809 for (const PHINode &PN : DestBB->phis()) {
810 const Value *V1 = PN.getIncomingValueForBlock(Pred);
811 const Value *V2 = PN.getIncomingValueForBlock(BB);
812
813 // If V2 is a phi node in BB, look up what the mapped value will be.
814 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
815 if (V2PN->getParent() == BB)
816 V2 = V2PN->getIncomingValueForBlock(Pred);
817
818 // If there is a conflict, bail out.
819 if (V1 != V2) return false;
820 }
821 }
822 }
823
824 return true;
825}
826
827/// Eliminate a basic block that has only phi's and an unconditional branch in
828/// it.
829void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
830 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
831 BasicBlock *DestBB = BI->getSuccessor(0);
832
833 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
834 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
835
836 // If the destination block has a single pred, then this is a trivial edge,
837 // just collapse it.
838 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
839 if (SinglePred != DestBB) {
840 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 841, __PRETTY_FUNCTION__))
841 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 841, __PRETTY_FUNCTION__))
;
842 // Merge DestBB into SinglePred/BB and delete it.
843 MergeBlockIntoPredecessor(DestBB);
844 // Note: BB(=SinglePred) will not be deleted on this path.
845 // DestBB(=its single successor) is the one that was deleted.
846 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
847 return;
848 }
849 }
850
851 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
852 // to handle the new incoming edges it is about to have.
853 for (PHINode &PN : DestBB->phis()) {
854 // Remove the incoming value for BB, and remember it.
855 Value *InVal = PN.removeIncomingValue(BB, false);
856
857 // Two options: either the InVal is a phi node defined in BB or it is some
858 // value that dominates BB.
859 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
860 if (InValPhi && InValPhi->getParent() == BB) {
861 // Add all of the input values of the input PHI as inputs of this phi.
862 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
863 PN.addIncoming(InValPhi->getIncomingValue(i),
864 InValPhi->getIncomingBlock(i));
865 } else {
866 // Otherwise, add one instance of the dominating value for each edge that
867 // we will be adding.
868 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
869 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
870 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
871 } else {
872 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
873 PN.addIncoming(InVal, *PI);
874 }
875 }
876 }
877
878 // The PHIs are now updated, change everything that refers to BB to use
879 // DestBB and remove BB.
880 BB->replaceAllUsesWith(DestBB);
881 BB->eraseFromParent();
882 ++NumBlocksElim;
883
884 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
885}
886
887// Computes a map of base pointer relocation instructions to corresponding
888// derived pointer relocation instructions given a vector of all relocate calls
889static void computeBaseDerivedRelocateMap(
890 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
891 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
892 &RelocateInstMap) {
893 // Collect information in two maps: one primarily for locating the base object
894 // while filling the second map; the second map is the final structure holding
895 // a mapping between Base and corresponding Derived relocate calls
896 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
897 for (auto *ThisRelocate : AllRelocateCalls) {
898 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
899 ThisRelocate->getDerivedPtrIndex());
900 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
901 }
902 for (auto &Item : RelocateIdxMap) {
903 std::pair<unsigned, unsigned> Key = Item.first;
904 if (Key.first == Key.second)
905 // Base relocation: nothing to insert
906 continue;
907
908 GCRelocateInst *I = Item.second;
909 auto BaseKey = std::make_pair(Key.first, Key.first);
910
911 // We're iterating over RelocateIdxMap so we cannot modify it.
912 auto MaybeBase = RelocateIdxMap.find(BaseKey);
913 if (MaybeBase == RelocateIdxMap.end())
914 // TODO: We might want to insert a new base object relocate and gep off
915 // that, if there are enough derived object relocates.
916 continue;
917
918 RelocateInstMap[MaybeBase->second].push_back(I);
919 }
920}
921
922// Accepts a GEP and extracts the operands into a vector provided they're all
923// small integer constants
924static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
925 SmallVectorImpl<Value *> &OffsetV) {
926 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
927 // Only accept small constant integer operands
928 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
929 if (!Op || Op->getZExtValue() > 20)
930 return false;
931 }
932
933 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
934 OffsetV.push_back(GEP->getOperand(i));
935 return true;
936}
937
938// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
939// replace, computes a replacement, and affects it.
940static bool
941simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
942 const SmallVectorImpl<GCRelocateInst *> &Targets) {
943 bool MadeChange = false;
944 // We must ensure the relocation of derived pointer is defined after
945 // relocation of base pointer. If we find a relocation corresponding to base
946 // defined earlier than relocation of base then we move relocation of base
947 // right before found relocation. We consider only relocation in the same
948 // basic block as relocation of base. Relocations from other basic block will
949 // be skipped by optimization and we do not care about them.
950 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
951 &*R != RelocatedBase; ++R)
952 if (auto RI = dyn_cast<GCRelocateInst>(R))
953 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
954 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
955 RelocatedBase->moveBefore(RI);
956 break;
957 }
958
959 for (GCRelocateInst *ToReplace : Targets) {
960 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 961, __PRETTY_FUNCTION__))
961 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 961, __PRETTY_FUNCTION__))
;
962 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
963 // A duplicate relocate call. TODO: coalesce duplicates.
964 continue;
965 }
966
967 if (RelocatedBase->getParent() != ToReplace->getParent()) {
968 // Base and derived relocates are in different basic blocks.
969 // In this case transform is only valid when base dominates derived
970 // relocate. However it would be too expensive to check dominance
971 // for each such relocate, so we skip the whole transformation.
972 continue;
973 }
974
975 Value *Base = ToReplace->getBasePtr();
976 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
977 if (!Derived || Derived->getPointerOperand() != Base)
978 continue;
979
980 SmallVector<Value *, 2> OffsetV;
981 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
982 continue;
983
984 // Create a Builder and replace the target callsite with a gep
985 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 986, __PRETTY_FUNCTION__))
986 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 986, __PRETTY_FUNCTION__))
;
987
988 // Insert after RelocatedBase
989 IRBuilder<> Builder(RelocatedBase->getNextNode());
990 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
991
992 // If gc_relocate does not match the actual type, cast it to the right type.
993 // In theory, there must be a bitcast after gc_relocate if the type does not
994 // match, and we should reuse it to get the derived pointer. But it could be
995 // cases like this:
996 // bb1:
997 // ...
998 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
999 // br label %merge
1000 //
1001 // bb2:
1002 // ...
1003 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
1004 // br label %merge
1005 //
1006 // merge:
1007 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1008 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1009 //
1010 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1011 // no matter there is already one or not. In this way, we can handle all cases, and
1012 // the extra bitcast should be optimized away in later passes.
1013 Value *ActualRelocatedBase = RelocatedBase;
1014 if (RelocatedBase->getType() != Base->getType()) {
1015 ActualRelocatedBase =
1016 Builder.CreateBitCast(RelocatedBase, Base->getType());
1017 }
1018 Value *Replacement = Builder.CreateGEP(
1019 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1020 Replacement->takeName(ToReplace);
1021 // If the newly generated derived pointer's type does not match the original derived
1022 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1023 Value *ActualReplacement = Replacement;
1024 if (Replacement->getType() != ToReplace->getType()) {
1025 ActualReplacement =
1026 Builder.CreateBitCast(Replacement, ToReplace->getType());
1027 }
1028 ToReplace->replaceAllUsesWith(ActualReplacement);
1029 ToReplace->eraseFromParent();
1030
1031 MadeChange = true;
1032 }
1033 return MadeChange;
1034}
1035
1036// Turns this:
1037//
1038// %base = ...
1039// %ptr = gep %base + 15
1040// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1041// %base' = relocate(%tok, i32 4, i32 4)
1042// %ptr' = relocate(%tok, i32 4, i32 5)
1043// %val = load %ptr'
1044//
1045// into this:
1046//
1047// %base = ...
1048// %ptr = gep %base + 15
1049// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1050// %base' = gc.relocate(%tok, i32 4, i32 4)
1051// %ptr' = gep %base' + 15
1052// %val = load %ptr'
1053bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1054 bool MadeChange = false;
1055 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1056
1057 for (auto *U : I.users())
1058 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1059 // Collect all the relocate calls associated with a statepoint
1060 AllRelocateCalls.push_back(Relocate);
1061
1062 // We need at least one base pointer relocation + one derived pointer
1063 // relocation to mangle
1064 if (AllRelocateCalls.size() < 2)
1065 return false;
1066
1067 // RelocateInstMap is a mapping from the base relocate instruction to the
1068 // corresponding derived relocate instructions
1069 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1070 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1071 if (RelocateInstMap.empty())
1072 return false;
1073
1074 for (auto &Item : RelocateInstMap)
1075 // Item.first is the RelocatedBase to offset against
1076 // Item.second is the vector of Targets to replace
1077 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1078 return MadeChange;
1079}
1080
1081/// Sink the specified cast instruction into its user blocks.
1082static bool SinkCast(CastInst *CI) {
1083 BasicBlock *DefBB = CI->getParent();
1084
1085 /// InsertedCasts - Only insert a cast in each block once.
1086 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1087
1088 bool MadeChange = false;
1089 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1090 UI != E; ) {
1091 Use &TheUse = UI.getUse();
1092 Instruction *User = cast<Instruction>(*UI);
1093
1094 // Figure out which BB this cast is used in. For PHI's this is the
1095 // appropriate predecessor block.
1096 BasicBlock *UserBB = User->getParent();
1097 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1098 UserBB = PN->getIncomingBlock(TheUse);
1099 }
1100
1101 // Preincrement use iterator so we don't invalidate it.
1102 ++UI;
1103
1104 // The first insertion point of a block containing an EH pad is after the
1105 // pad. If the pad is the user, we cannot sink the cast past the pad.
1106 if (User->isEHPad())
1107 continue;
1108
1109 // If the block selected to receive the cast is an EH pad that does not
1110 // allow non-PHI instructions before the terminator, we can't sink the
1111 // cast.
1112 if (UserBB->getTerminator()->isEHPad())
1113 continue;
1114
1115 // If this user is in the same block as the cast, don't change the cast.
1116 if (UserBB == DefBB) continue;
1117
1118 // If we have already inserted a cast into this block, use it.
1119 CastInst *&InsertedCast = InsertedCasts[UserBB];
1120
1121 if (!InsertedCast) {
1122 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1123 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1123, __PRETTY_FUNCTION__))
;
1124 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1125 CI->getType(), "", &*InsertPt);
1126 InsertedCast->setDebugLoc(CI->getDebugLoc());
1127 }
1128
1129 // Replace a use of the cast with a use of the new cast.
1130 TheUse = InsertedCast;
1131 MadeChange = true;
1132 ++NumCastUses;
1133 }
1134
1135 // If we removed all uses, nuke the cast.
1136 if (CI->use_empty()) {
1137 salvageDebugInfo(*CI);
1138 CI->eraseFromParent();
1139 MadeChange = true;
1140 }
1141
1142 return MadeChange;
1143}
1144
1145/// If the specified cast instruction is a noop copy (e.g. it's casting from
1146/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1147/// reduce the number of virtual registers that must be created and coalesced.
1148///
1149/// Return true if any changes are made.
1150static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1151 const DataLayout &DL) {
1152 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1153 // than sinking only nop casts, but is helpful on some platforms.
1154 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1155 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1156 ASC->getDestAddressSpace()))
1157 return false;
1158 }
1159
1160 // If this is a noop copy,
1161 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1162 EVT DstVT = TLI.getValueType(DL, CI->getType());
1163
1164 // This is an fp<->int conversion?
1165 if (SrcVT.isInteger() != DstVT.isInteger())
1166 return false;
1167
1168 // If this is an extension, it will be a zero or sign extension, which
1169 // isn't a noop.
1170 if (SrcVT.bitsLT(DstVT)) return false;
1171
1172 // If these values will be promoted, find out what they will be promoted
1173 // to. This helps us consider truncates on PPC as noop copies when they
1174 // are.
1175 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1176 TargetLowering::TypePromoteInteger)
1177 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1178 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1179 TargetLowering::TypePromoteInteger)
1180 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1181
1182 // If, after promotion, these are the same types, this is a noop copy.
1183 if (SrcVT != DstVT)
1184 return false;
1185
1186 return SinkCast(CI);
1187}
1188
1189bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1190 CmpInst *Cmp,
1191 Intrinsic::ID IID) {
1192 if (BO->getParent() != Cmp->getParent()) {
1193 // We used to use a dominator tree here to allow multi-block optimization.
1194 // But that was problematic because:
1195 // 1. It could cause a perf regression by hoisting the math op into the
1196 // critical path.
1197 // 2. It could cause a perf regression by creating a value that was live
1198 // across multiple blocks and increasing register pressure.
1199 // 3. Use of a dominator tree could cause large compile-time regression.
1200 // This is because we recompute the DT on every change in the main CGP
1201 // run-loop. The recomputing is probably unnecessary in many cases, so if
1202 // that was fixed, using a DT here would be ok.
1203 return false;
1204 }
1205
1206 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1207 Value *Arg0 = BO->getOperand(0);
1208 Value *Arg1 = BO->getOperand(1);
1209 if (BO->getOpcode() == Instruction::Add &&
1210 IID == Intrinsic::usub_with_overflow) {
1211 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1211, __PRETTY_FUNCTION__))
;
1212 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1213 }
1214
1215 // Insert at the first instruction of the pair.
1216 Instruction *InsertPt = nullptr;
1217 for (Instruction &Iter : *Cmp->getParent()) {
1218 if (&Iter == BO || &Iter == Cmp) {
1219 InsertPt = &Iter;
1220 break;
1221 }
1222 }
1223 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1223, __PRETTY_FUNCTION__))
;
1224
1225 IRBuilder<> Builder(InsertPt);
1226 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1227 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1228 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1229 BO->replaceAllUsesWith(Math);
1230 Cmp->replaceAllUsesWith(OV);
1231 BO->eraseFromParent();
1232 Cmp->eraseFromParent();
1233 return true;
1234}
1235
1236/// Match special-case patterns that check for unsigned add overflow.
1237static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1238 BinaryOperator *&Add) {
1239 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1240 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1241 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1242
1243 // We are not expecting non-canonical/degenerate code. Just bail out.
1244 if (isa<Constant>(A))
1245 return false;
1246
1247 ICmpInst::Predicate Pred = Cmp->getPredicate();
1248 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1249 B = ConstantInt::get(B->getType(), 1);
1250 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1251 B = ConstantInt::get(B->getType(), -1);
1252 else
1253 return false;
1254
1255 // Check the users of the variable operand of the compare looking for an add
1256 // with the adjusted constant.
1257 for (User *U : A->users()) {
1258 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1259 Add = cast<BinaryOperator>(U);
1260 return true;
1261 }
1262 }
1263 return false;
1264}
1265
1266/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1267/// intrinsic. Return true if any changes were made.
1268bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1269 bool &ModifiedDT) {
1270 Value *A, *B;
1271 BinaryOperator *Add;
1272 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
1273 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1274 return false;
1275
1276 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1277 TLI->getValueType(*DL, Add->getType())))
1278 return false;
1279
1280 // We don't want to move around uses of condition values this late, so we
1281 // check if it is legal to create the call to the intrinsic in the basic
1282 // block containing the icmp.
1283 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1284 return false;
1285
1286 if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
1287 return false;
1288
1289 // Reset callers - do not crash by iterating over a dead instruction.
1290 ModifiedDT = true;
1291 return true;
1292}
1293
1294bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1295 bool &ModifiedDT) {
1296 // We are not expecting non-canonical/degenerate code. Just bail out.
1297 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1298 if (isa<Constant>(A) && isa<Constant>(B))
1299 return false;
1300
1301 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1302 ICmpInst::Predicate Pred = Cmp->getPredicate();
1303 if (Pred == ICmpInst::ICMP_UGT) {
1304 std::swap(A, B);
1305 Pred = ICmpInst::ICMP_ULT;
1306 }
1307 // Convert special-case: (A == 0) is the same as (A u< 1).
1308 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1309 B = ConstantInt::get(B->getType(), 1);
1310 Pred = ICmpInst::ICMP_ULT;
1311 }
1312 // Convert special-case: (A != 0) is the same as (0 u< A).
1313 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1314 std::swap(A, B);
1315 Pred = ICmpInst::ICMP_ULT;
1316 }
1317 if (Pred != ICmpInst::ICMP_ULT)
1318 return false;
1319
1320 // Walk the users of a variable operand of a compare looking for a subtract or
1321 // add with that same operand. Also match the 2nd operand of the compare to
1322 // the add/sub, but that may be a negated constant operand of an add.
1323 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1324 BinaryOperator *Sub = nullptr;
1325 for (User *U : CmpVariableOperand->users()) {
1326 // A - B, A u< B --> usubo(A, B)
1327 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1328 Sub = cast<BinaryOperator>(U);
1329 break;
1330 }
1331
1332 // A + (-C), A u< C (canonicalized form of (sub A, C))
1333 const APInt *CmpC, *AddC;
1334 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1335 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1336 Sub = cast<BinaryOperator>(U);
1337 break;
1338 }
1339 }
1340 if (!Sub)
1341 return false;
1342
1343 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1344 TLI->getValueType(*DL, Sub->getType())))
1345 return false;
1346
1347 if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
1348 return false;
1349
1350 // Reset callers - do not crash by iterating over a dead instruction.
1351 ModifiedDT = true;
1352 return true;
1353}
1354
1355/// Sink the given CmpInst into user blocks to reduce the number of virtual
1356/// registers that must be created and coalesced. This is a clear win except on
1357/// targets with multiple condition code registers (PowerPC), where it might
1358/// lose; some adjustment may be wanted there.
1359///
1360/// Return true if any changes are made.
1361static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1362 if (TLI.hasMultipleConditionRegisters())
1363 return false;
1364
1365 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1366 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1367 return false;
1368
1369 // Only insert a cmp in each block once.
1370 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1371
1372 bool MadeChange = false;
1373 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1374 UI != E; ) {
1375 Use &TheUse = UI.getUse();
1376 Instruction *User = cast<Instruction>(*UI);
1377
1378 // Preincrement use iterator so we don't invalidate it.
1379 ++UI;
1380
1381 // Don't bother for PHI nodes.
1382 if (isa<PHINode>(User))
1383 continue;
1384
1385 // Figure out which BB this cmp is used in.
1386 BasicBlock *UserBB = User->getParent();
1387 BasicBlock *DefBB = Cmp->getParent();
1388
1389 // If this user is in the same block as the cmp, don't change the cmp.
1390 if (UserBB == DefBB) continue;
1391
1392 // If we have already inserted a cmp into this block, use it.
1393 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1394
1395 if (!InsertedCmp) {
1396 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1397 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1397, __PRETTY_FUNCTION__))
;
1398 InsertedCmp =
1399 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1400 Cmp->getOperand(0), Cmp->getOperand(1), "",
1401 &*InsertPt);
1402 // Propagate the debug info.
1403 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1404 }
1405
1406 // Replace a use of the cmp with a use of the new cmp.
1407 TheUse = InsertedCmp;
1408 MadeChange = true;
1409 ++NumCmpUses;
1410 }
1411
1412 // If we removed all uses, nuke the cmp.
1413 if (Cmp->use_empty()) {
1414 Cmp->eraseFromParent();
1415 MadeChange = true;
1416 }
1417
1418 return MadeChange;
1419}
1420
1421/// For pattern like:
1422///
1423/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1424/// ...
1425/// DomBB:
1426/// ...
1427/// br DomCond, TrueBB, CmpBB
1428/// CmpBB: (with DomBB being the single predecessor)
1429/// ...
1430/// Cmp = icmp eq CmpOp0, CmpOp1
1431/// ...
1432///
1433/// It would use two comparison on targets that lowering of icmp sgt/slt is
1434/// different from lowering of icmp eq (PowerPC). This function try to convert
1435/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1436/// After that, DomCond and Cmp can use the same comparison so reduce one
1437/// comparison.
1438///
1439/// Return true if any changes are made.
1440static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
1441 const TargetLowering &TLI) {
1442 if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1443 return false;
1444
1445 ICmpInst::Predicate Pred = Cmp->getPredicate();
1446 if (Pred != ICmpInst::ICMP_EQ)
1447 return false;
1448
1449 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1450 // icmp slt/sgt would introduce more redundant LLVM IR.
1451 for (User *U : Cmp->users()) {
1452 if (isa<BranchInst>(U))
1453 continue;
1454 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1455 continue;
1456 return false;
1457 }
1458
1459 // This is a cheap/incomplete check for dominance - just match a single
1460 // predecessor with a conditional branch.
1461 BasicBlock *CmpBB = Cmp->getParent();
1462 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1463 if (!DomBB)
1464 return false;
1465
1466 // We want to ensure that the only way control gets to the comparison of
1467 // interest is that a less/greater than comparison on the same operands is
1468 // false.
1469 Value *DomCond;
1470 BasicBlock *TrueBB, *FalseBB;
1471 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1472 return false;
1473 if (CmpBB != FalseBB)
1474 return false;
1475
1476 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1477 ICmpInst::Predicate DomPred;
1478 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1479 return false;
1480 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1481 return false;
1482
1483 // Convert the equality comparison to the opposite of the dominating
1484 // comparison and swap the direction for all branch/select users.
1485 // We have conceptually converted:
1486 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1487 // to
1488 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1489 // And similarly for branches.
1490 for (User *U : Cmp->users()) {
1491 if (auto *BI = dyn_cast<BranchInst>(U)) {
1492 assert(BI->isConditional() && "Must be conditional")((BI->isConditional() && "Must be conditional") ? static_cast
<void> (0) : __assert_fail ("BI->isConditional() && \"Must be conditional\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1492, __PRETTY_FUNCTION__))
;
1493 BI->swapSuccessors();
1494 continue;
1495 }
1496 if (auto *SI = dyn_cast<SelectInst>(U)) {
1497 // Swap operands
1498 SI->swapValues();
1499 SI->swapProfMetadata();
1500 continue;
1501 }
1502 llvm_unreachable("Must be a branch or a select")::llvm::llvm_unreachable_internal("Must be a branch or a select"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1502)
;
1503 }
1504 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1505 return true;
1506}
1507
1508bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1509 if (sinkCmpExpression(Cmp, *TLI))
1510 return true;
1511
1512 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1513 return true;
1514
1515 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1516 return true;
1517
1518 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1519 return true;
1520
1521 return false;
1522}
1523
1524/// Duplicate and sink the given 'and' instruction into user blocks where it is
1525/// used in a compare to allow isel to generate better code for targets where
1526/// this operation can be combined.
1527///
1528/// Return true if any changes are made.
1529static bool sinkAndCmp0Expression(Instruction *AndI,
1530 const TargetLowering &TLI,
1531 SetOfInstrs &InsertedInsts) {
1532 // Double-check that we're not trying to optimize an instruction that was
1533 // already optimized by some other part of this pass.
1534 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1535, __PRETTY_FUNCTION__))
1535 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1535, __PRETTY_FUNCTION__))
;
1536 (void) InsertedInsts;
1537
1538 // Nothing to do for single use in same basic block.
1539 if (AndI->hasOneUse() &&
1540 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1541 return false;
1542
1543 // Try to avoid cases where sinking/duplicating is likely to increase register
1544 // pressure.
1545 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1546 !isa<ConstantInt>(AndI->getOperand(1)) &&
1547 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1548 return false;
1549
1550 for (auto *U : AndI->users()) {
1551 Instruction *User = cast<Instruction>(U);
1552
1553 // Only sink 'and' feeding icmp with 0.
1554 if (!isa<ICmpInst>(User))
1555 return false;
1556
1557 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1558 if (!CmpC || !CmpC->isZero())
1559 return false;
1560 }
1561
1562 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1563 return false;
1564
1565 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1566 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1567
1568 // Push the 'and' into the same block as the icmp 0. There should only be
1569 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1570 // others, so we don't need to keep track of which BBs we insert into.
1571 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1572 UI != E; ) {
1573 Use &TheUse = UI.getUse();
1574 Instruction *User = cast<Instruction>(*UI);
1575
1576 // Preincrement use iterator so we don't invalidate it.
1577 ++UI;
1578
1579 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1580
1581 // Keep the 'and' in the same place if the use is already in the same block.
1582 Instruction *InsertPt =
1583 User->getParent() == AndI->getParent() ? AndI : User;
1584 Instruction *InsertedAnd =
1585 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1586 AndI->getOperand(1), "", InsertPt);
1587 // Propagate the debug info.
1588 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1589
1590 // Replace a use of the 'and' with a use of the new 'and'.
1591 TheUse = InsertedAnd;
1592 ++NumAndUses;
1593 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1594 }
1595
1596 // We removed all uses, nuke the and.
1597 AndI->eraseFromParent();
1598 return true;
1599}
1600
1601/// Check if the candidates could be combined with a shift instruction, which
1602/// includes:
1603/// 1. Truncate instruction
1604/// 2. And instruction and the imm is a mask of the low bits:
1605/// imm & (imm+1) == 0
1606static bool isExtractBitsCandidateUse(Instruction *User) {
1607 if (!isa<TruncInst>(User)) {
1608 if (User->getOpcode() != Instruction::And ||
1609 !isa<ConstantInt>(User->getOperand(1)))
1610 return false;
1611
1612 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1613
1614 if ((Cimm & (Cimm + 1)).getBoolValue())
1615 return false;
1616 }
1617 return true;
1618}
1619
1620/// Sink both shift and truncate instruction to the use of truncate's BB.
1621static bool
1622SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1623 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1624 const TargetLowering &TLI, const DataLayout &DL) {
1625 BasicBlock *UserBB = User->getParent();
1626 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1627 auto *TruncI = cast<TruncInst>(User);
1628 bool MadeChange = false;
1629
1630 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1631 TruncE = TruncI->user_end();
1632 TruncUI != TruncE;) {
1633
1634 Use &TruncTheUse = TruncUI.getUse();
1635 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1636 // Preincrement use iterator so we don't invalidate it.
1637
1638 ++TruncUI;
1639
1640 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1641 if (!ISDOpcode)
1642 continue;
1643
1644 // If the use is actually a legal node, there will not be an
1645 // implicit truncate.
1646 // FIXME: always querying the result type is just an
1647 // approximation; some nodes' legality is determined by the
1648 // operand or other means. There's no good way to find out though.
1649 if (TLI.isOperationLegalOrCustom(
1650 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1651 continue;
1652
1653 // Don't bother for PHI nodes.
1654 if (isa<PHINode>(TruncUser))
1655 continue;
1656
1657 BasicBlock *TruncUserBB = TruncUser->getParent();
1658
1659 if (UserBB == TruncUserBB)
1660 continue;
1661
1662 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1663 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1664
1665 if (!InsertedShift && !InsertedTrunc) {
1666 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1667 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1667, __PRETTY_FUNCTION__))
;
1668 // Sink the shift
1669 if (ShiftI->getOpcode() == Instruction::AShr)
1670 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1671 "", &*InsertPt);
1672 else
1673 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1674 "", &*InsertPt);
1675 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1676
1677 // Sink the trunc
1678 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1679 TruncInsertPt++;
1680 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1680, __PRETTY_FUNCTION__))
;
1681
1682 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1683 TruncI->getType(), "", &*TruncInsertPt);
1684 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1685
1686 MadeChange = true;
1687
1688 TruncTheUse = InsertedTrunc;
1689 }
1690 }
1691 return MadeChange;
1692}
1693
1694/// Sink the shift *right* instruction into user blocks if the uses could
1695/// potentially be combined with this shift instruction and generate BitExtract
1696/// instruction. It will only be applied if the architecture supports BitExtract
1697/// instruction. Here is an example:
1698/// BB1:
1699/// %x.extract.shift = lshr i64 %arg1, 32
1700/// BB2:
1701/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1702/// ==>
1703///
1704/// BB2:
1705/// %x.extract.shift.1 = lshr i64 %arg1, 32
1706/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1707///
1708/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1709/// instruction.
1710/// Return true if any changes are made.
1711static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1712 const TargetLowering &TLI,
1713 const DataLayout &DL) {
1714 BasicBlock *DefBB = ShiftI->getParent();
1715
1716 /// Only insert instructions in each block once.
1717 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1718
1719 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1720
1721 bool MadeChange = false;
1722 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1723 UI != E;) {
1724 Use &TheUse = UI.getUse();
1725 Instruction *User = cast<Instruction>(*UI);
1726 // Preincrement use iterator so we don't invalidate it.
1727 ++UI;
1728
1729 // Don't bother for PHI nodes.
1730 if (isa<PHINode>(User))
1731 continue;
1732
1733 if (!isExtractBitsCandidateUse(User))
1734 continue;
1735
1736 BasicBlock *UserBB = User->getParent();
1737
1738 if (UserBB == DefBB) {
1739 // If the shift and truncate instruction are in the same BB. The use of
1740 // the truncate(TruncUse) may still introduce another truncate if not
1741 // legal. In this case, we would like to sink both shift and truncate
1742 // instruction to the BB of TruncUse.
1743 // for example:
1744 // BB1:
1745 // i64 shift.result = lshr i64 opnd, imm
1746 // trunc.result = trunc shift.result to i16
1747 //
1748 // BB2:
1749 // ----> We will have an implicit truncate here if the architecture does
1750 // not have i16 compare.
1751 // cmp i16 trunc.result, opnd2
1752 //
1753 if (isa<TruncInst>(User) && shiftIsLegal
1754 // If the type of the truncate is legal, no truncate will be
1755 // introduced in other basic blocks.
1756 &&
1757 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1758 MadeChange =
1759 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1760
1761 continue;
1762 }
1763 // If we have already inserted a shift into this block, use it.
1764 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1765
1766 if (!InsertedShift) {
1767 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1768 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1768, __PRETTY_FUNCTION__))
;
1769
1770 if (ShiftI->getOpcode() == Instruction::AShr)
1771 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1772 "", &*InsertPt);
1773 else
1774 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1775 "", &*InsertPt);
1776 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1777
1778 MadeChange = true;
1779 }
1780
1781 // Replace a use of the shift with a use of the new shift.
1782 TheUse = InsertedShift;
1783 }
1784
1785 // If we removed all uses, or there are none, nuke the shift.
1786 if (ShiftI->use_empty()) {
1787 salvageDebugInfo(*ShiftI);
1788 ShiftI->eraseFromParent();
1789 MadeChange = true;
1790 }
1791
1792 return MadeChange;
1793}
1794
1795/// If counting leading or trailing zeros is an expensive operation and a zero
1796/// input is defined, add a check for zero to avoid calling the intrinsic.
1797///
1798/// We want to transform:
1799/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1800///
1801/// into:
1802/// entry:
1803/// %cmpz = icmp eq i64 %A, 0
1804/// br i1 %cmpz, label %cond.end, label %cond.false
1805/// cond.false:
1806/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1807/// br label %cond.end
1808/// cond.end:
1809/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1810///
1811/// If the transform is performed, return true and set ModifiedDT to true.
1812static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1813 const TargetLowering *TLI,
1814 const DataLayout *DL,
1815 bool &ModifiedDT) {
1816 if (!TLI || !DL)
1817 return false;
1818
1819 // If a zero input is undefined, it doesn't make sense to despeculate that.
1820 if (match(CountZeros->getOperand(1), m_One()))
1821 return false;
1822
1823 // If it's cheap to speculate, there's nothing to do.
1824 auto IntrinsicID = CountZeros->getIntrinsicID();
1825 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1826 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1827 return false;
1828
1829 // Only handle legal scalar cases. Anything else requires too much work.
1830 Type *Ty = CountZeros->getType();
1831 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1832 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1833 return false;
1834
1835 // The intrinsic will be sunk behind a compare against zero and branch.
1836 BasicBlock *StartBlock = CountZeros->getParent();
1837 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1838
1839 // Create another block after the count zero intrinsic. A PHI will be added
1840 // in this block to select the result of the intrinsic or the bit-width
1841 // constant if the input to the intrinsic is zero.
1842 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1843 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1844
1845 // Set up a builder to create a compare, conditional branch, and PHI.
1846 IRBuilder<> Builder(CountZeros->getContext());
1847 Builder.SetInsertPoint(StartBlock->getTerminator());
1848 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1849
1850 // Replace the unconditional branch that was created by the first split with
1851 // a compare against zero and a conditional branch.
1852 Value *Zero = Constant::getNullValue(Ty);
1853 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1854 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1855 StartBlock->getTerminator()->eraseFromParent();
1856
1857 // Create a PHI in the end block to select either the output of the intrinsic
1858 // or the bit width of the operand.
1859 Builder.SetInsertPoint(&EndBlock->front());
1860 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1861 CountZeros->replaceAllUsesWith(PN);
1862 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1863 PN->addIncoming(BitWidth, StartBlock);
1864 PN->addIncoming(CountZeros, CallBlock);
1865
1866 // We are explicitly handling the zero case, so we can set the intrinsic's
1867 // undefined zero argument to 'true'. This will also prevent reprocessing the
1868 // intrinsic; we only despeculate when a zero input is defined.
1869 CountZeros->setArgOperand(1, Builder.getTrue());
1870 ModifiedDT = true;
1871 return true;
1872}
1873
1874bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1875 BasicBlock *BB = CI->getParent();
1876
1877 // Lower inline assembly if we can.
1878 // If we found an inline asm expession, and if the target knows how to
1879 // lower it to normal LLVM code, do so now.
1880 if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1881 if (TLI->ExpandInlineAsm(CI)) {
1882 // Avoid invalidating the iterator.
1883 CurInstIterator = BB->begin();
1884 // Avoid processing instructions out of order, which could cause
1885 // reuse before a value is defined.
1886 SunkAddrs.clear();
1887 return true;
1888 }
1889 // Sink address computing for memory operands into the block.
1890 if (optimizeInlineAsmInst(CI))
1891 return true;
1892 }
1893
1894 // Align the pointer arguments to this call if the target thinks it's a good
1895 // idea
1896 unsigned MinSize, PrefAlign;
1897 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1898 for (auto &Arg : CI->arg_operands()) {
1899 // We want to align both objects whose address is used directly and
1900 // objects whose address is used in casts and GEPs, though it only makes
1901 // sense for GEPs if the offset is a multiple of the desired alignment and
1902 // if size - offset meets the size threshold.
1903 if (!Arg->getType()->isPointerTy())
1904 continue;
1905 APInt Offset(DL->getIndexSizeInBits(
1906 cast<PointerType>(Arg->getType())->getAddressSpace()),
1907 0);
1908 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1909 uint64_t Offset2 = Offset.getLimitedValue();
1910 if ((Offset2 & (PrefAlign-1)) != 0)
1911 continue;
1912 AllocaInst *AI;
1913 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1914 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1915 AI->setAlignment(MaybeAlign(PrefAlign));
1916 // Global variables can only be aligned if they are defined in this
1917 // object (i.e. they are uniquely initialized in this object), and
1918 // over-aligning global variables that have an explicit section is
1919 // forbidden.
1920 GlobalVariable *GV;
1921 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1922 GV->getPointerAlignment(*DL) < PrefAlign &&
1923 DL->getTypeAllocSize(GV->getValueType()) >=
1924 MinSize + Offset2)
1925 GV->setAlignment(MaybeAlign(PrefAlign));
1926 }
1927 // If this is a memcpy (or similar) then we may be able to improve the
1928 // alignment
1929 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1930 unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1931 if (DestAlign > MI->getDestAlignment())
1932 MI->setDestAlignment(DestAlign);
1933 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1934 unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1935 if (SrcAlign > MTI->getSourceAlignment())
1936 MTI->setSourceAlignment(SrcAlign);
1937 }
1938 }
1939 }
1940
1941 // If we have a cold call site, try to sink addressing computation into the
1942 // cold block. This interacts with our handling for loads and stores to
1943 // ensure that we can fold all uses of a potential addressing computation
1944 // into their uses. TODO: generalize this to work over profiling data
1945 bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
1946 if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
1947 for (auto &Arg : CI->arg_operands()) {
1948 if (!Arg->getType()->isPointerTy())
1949 continue;
1950 unsigned AS = Arg->getType()->getPointerAddressSpace();
1951 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1952 }
1953
1954 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1955 if (II) {
1956 switch (II->getIntrinsicID()) {
1957 default: break;
1958 case Intrinsic::experimental_widenable_condition: {
1959 // Give up on future widening oppurtunties so that we can fold away dead
1960 // paths and merge blocks before going into block-local instruction
1961 // selection.
1962 if (II->use_empty()) {
1963 II->eraseFromParent();
1964 return true;
1965 }
1966 Constant *RetVal = ConstantInt::getTrue(II->getContext());
1967 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1968 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1969 });
1970 return true;
1971 }
1972 case Intrinsic::objectsize:
1973 llvm_unreachable("llvm.objectsize.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.objectsize.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1973)
;
1974 case Intrinsic::is_constant:
1975 llvm_unreachable("llvm.is.constant.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.is.constant.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1975)
;
1976 case Intrinsic::aarch64_stlxr:
1977 case Intrinsic::aarch64_stxr: {
1978 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1979 if (!ExtVal || !ExtVal->hasOneUse() ||
1980 ExtVal->getParent() == CI->getParent())
1981 return false;
1982 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1983 ExtVal->moveBefore(CI);
1984 // Mark this instruction as "inserted by CGP", so that other
1985 // optimizations don't touch it.
1986 InsertedInsts.insert(ExtVal);
1987 return true;
1988 }
1989
1990 case Intrinsic::launder_invariant_group:
1991 case Intrinsic::strip_invariant_group: {
1992 Value *ArgVal = II->getArgOperand(0);
1993 auto it = LargeOffsetGEPMap.find(II);
1994 if (it != LargeOffsetGEPMap.end()) {
1995 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1996 // Make sure not to have to deal with iterator invalidation
1997 // after possibly adding ArgVal to LargeOffsetGEPMap.
1998 auto GEPs = std::move(it->second);
1999 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2000 LargeOffsetGEPMap.erase(II);
2001 }
2002
2003 II->replaceAllUsesWith(ArgVal);
2004 II->eraseFromParent();
2005 return true;
2006 }
2007 case Intrinsic::cttz:
2008 case Intrinsic::ctlz:
2009 // If counting zeros is expensive, try to avoid it.
2010 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2011 case Intrinsic::dbg_value:
2012 return fixupDbgValue(II);
2013 }
2014
2015 if (TLI) {
2016 SmallVector<Value*, 2> PtrOps;
2017 Type *AccessTy;
2018 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2019 while (!PtrOps.empty()) {
2020 Value *PtrVal = PtrOps.pop_back_val();
2021 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2022 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2023 return true;
2024 }
2025 }
2026 }
2027
2028 // From here on out we're working with named functions.
2029 if (!CI->getCalledFunction()) return false;
2030
2031 // Lower all default uses of _chk calls. This is very similar
2032 // to what InstCombineCalls does, but here we are only lowering calls
2033 // to fortified library functions (e.g. __memcpy_chk) that have the default
2034 // "don't know" as the objectsize. Anything else should be left alone.
2035 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2036 if (Value *V = Simplifier.optimizeCall(CI)) {
2037 CI->replaceAllUsesWith(V);
2038 CI->eraseFromParent();
2039 return true;
2040 }
2041
2042 return false;
2043}
2044
2045/// Look for opportunities to duplicate return instructions to the predecessor
2046/// to enable tail call optimizations. The case it is currently looking for is:
2047/// @code
2048/// bb0:
2049/// %tmp0 = tail call i32 @f0()
2050/// br label %return
2051/// bb1:
2052/// %tmp1 = tail call i32 @f1()
2053/// br label %return
2054/// bb2:
2055/// %tmp2 = tail call i32 @f2()
2056/// br label %return
2057/// return:
2058/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2059/// ret i32 %retval
2060/// @endcode
2061///
2062/// =>
2063///
2064/// @code
2065/// bb0:
2066/// %tmp0 = tail call i32 @f0()
2067/// ret i32 %tmp0
2068/// bb1:
2069/// %tmp1 = tail call i32 @f1()
2070/// ret i32 %tmp1
2071/// bb2:
2072/// %tmp2 = tail call i32 @f2()
2073/// ret i32 %tmp2
2074/// @endcode
2075bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
2076 if (!TLI)
2077 return false;
2078
2079 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2080 if (!RetI)
2081 return false;
2082
2083 PHINode *PN = nullptr;
2084 BitCastInst *BCI = nullptr;
2085 Value *V = RetI->getReturnValue();
2086 if (V) {
2087 BCI = dyn_cast<BitCastInst>(V);
2088 if (BCI)
2089 V = BCI->getOperand(0);
2090
2091 PN = dyn_cast<PHINode>(V);
2092 if (!PN)
2093 return false;
2094 }
2095
2096 if (PN && PN->getParent() != BB)
2097 return false;
2098
2099 // Make sure there are no instructions between the PHI and return, or that the
2100 // return is the first instruction in the block.
2101 if (PN) {
2102 BasicBlock::iterator BI = BB->begin();
2103 // Skip over debug and the bitcast.
2104 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
2105 if (&*BI != RetI)
2106 return false;
2107 } else {
2108 BasicBlock::iterator BI = BB->begin();
2109 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2110 if (&*BI != RetI)
2111 return false;
2112 }
2113
2114 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2115 /// call.
2116 const Function *F = BB->getParent();
2117 SmallVector<BasicBlock*, 4> TailCallBBs;
2118 if (PN) {
2119 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2120 // Look through bitcasts.
2121 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2122 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2123 BasicBlock *PredBB = PN->getIncomingBlock(I);
2124 // Make sure the phi value is indeed produced by the tail call.
2125 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2126 TLI->mayBeEmittedAsTailCall(CI) &&
2127 attributesPermitTailCall(F, CI, RetI, *TLI))
2128 TailCallBBs.push_back(PredBB);
2129 }
2130 } else {
2131 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2132 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2133 if (!VisitedBBs.insert(*PI).second)
2134 continue;
2135
2136 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2137 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2138 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2139 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2140 if (RI == RE)
2141 continue;
2142
2143 CallInst *CI = dyn_cast<CallInst>(&*RI);
2144 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2145 attributesPermitTailCall(F, CI, RetI, *TLI))
2146 TailCallBBs.push_back(*PI);
2147 }
2148 }
2149
2150 bool Changed = false;
2151 for (auto const &TailCallBB : TailCallBBs) {
2152 // Make sure the call instruction is followed by an unconditional branch to
2153 // the return block.
2154 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2155 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2156 continue;
2157
2158 // Duplicate the return into TailCallBB.
2159 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2160 ModifiedDT = Changed = true;
2161 ++NumRetsDup;
2162 }
2163
2164 // If we eliminated all predecessors of the block, delete the block now.
2165 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2166 BB->eraseFromParent();
2167
2168 return Changed;
2169}
2170
2171//===----------------------------------------------------------------------===//
2172// Memory Optimization
2173//===----------------------------------------------------------------------===//
2174
2175namespace {
2176
2177/// This is an extended version of TargetLowering::AddrMode
2178/// which holds actual Value*'s for register values.
2179struct ExtAddrMode : public TargetLowering::AddrMode {
2180 Value *BaseReg = nullptr;
2181 Value *ScaledReg = nullptr;
2182 Value *OriginalValue = nullptr;
2183 bool InBounds = true;
2184
2185 enum FieldName {
2186 NoField = 0x00,
2187 BaseRegField = 0x01,
2188 BaseGVField = 0x02,
2189 BaseOffsField = 0x04,
2190 ScaledRegField = 0x08,
2191 ScaleField = 0x10,
2192 MultipleFields = 0xff
2193 };
2194
2195
2196 ExtAddrMode() = default;
2197
2198 void print(raw_ostream &OS) const;
2199 void dump() const;
2200
2201 FieldName compare(const ExtAddrMode &other) {
2202 // First check that the types are the same on each field, as differing types
2203 // is something we can't cope with later on.
2204 if (BaseReg && other.BaseReg &&
2205 BaseReg->getType() != other.BaseReg->getType())
2206 return MultipleFields;
2207 if (BaseGV && other.BaseGV &&
2208 BaseGV->getType() != other.BaseGV->getType())
2209 return MultipleFields;
2210 if (ScaledReg && other.ScaledReg &&
2211 ScaledReg->getType() != other.ScaledReg->getType())
2212 return MultipleFields;
2213
2214 // Conservatively reject 'inbounds' mismatches.
2215 if (InBounds != other.InBounds)
2216 return MultipleFields;
2217
2218 // Check each field to see if it differs.
2219 unsigned Result = NoField;
2220 if (BaseReg != other.BaseReg)
2221 Result |= BaseRegField;
2222 if (BaseGV != other.BaseGV)
2223 Result |= BaseGVField;
2224 if (BaseOffs != other.BaseOffs)
2225 Result |= BaseOffsField;
2226 if (ScaledReg != other.ScaledReg)
2227 Result |= ScaledRegField;
2228 // Don't count 0 as being a different scale, because that actually means
2229 // unscaled (which will already be counted by having no ScaledReg).
2230 if (Scale && other.Scale && Scale != other.Scale)
2231 Result |= ScaleField;
2232
2233 if (countPopulation(Result) > 1)
2234 return MultipleFields;
2235 else
2236 return static_cast<FieldName>(Result);
2237 }
2238
2239 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2240 // with no offset.
2241 bool isTrivial() {
2242 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2243 // trivial if at most one of these terms is nonzero, except that BaseGV and
2244 // BaseReg both being zero actually means a null pointer value, which we
2245 // consider to be 'non-zero' here.
2246 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2247 }
2248
2249 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2250 switch (Field) {
2251 default:
2252 return nullptr;
2253 case BaseRegField:
2254 return BaseReg;
2255 case BaseGVField:
2256 return BaseGV;
2257 case ScaledRegField:
2258 return ScaledReg;
2259 case BaseOffsField:
2260 return ConstantInt::get(IntPtrTy, BaseOffs);
2261 }
2262 }
2263
2264 void SetCombinedField(FieldName Field, Value *V,
2265 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2266 switch (Field) {
2267 default:
2268 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2268)
;
2269 break;
2270 case ExtAddrMode::BaseRegField:
2271 BaseReg = V;
2272 break;
2273 case ExtAddrMode::BaseGVField:
2274 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2275 // in the BaseReg field.
2276 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2276, __PRETTY_FUNCTION__))
;
2277 BaseReg = V;
2278 BaseGV = nullptr;
2279 break;
2280 case ExtAddrMode::ScaledRegField:
2281 ScaledReg = V;
2282 // If we have a mix of scaled and unscaled addrmodes then we want scale
2283 // to be the scale and not zero.
2284 if (!Scale)
2285 for (const ExtAddrMode &AM : AddrModes)
2286 if (AM.Scale) {
2287 Scale = AM.Scale;
2288 break;
2289 }
2290 break;
2291 case ExtAddrMode::BaseOffsField:
2292 // The offset is no longer a constant, so it goes in ScaledReg with a
2293 // scale of 1.
2294 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2294, __PRETTY_FUNCTION__))
;
2295 ScaledReg = V;
2296 Scale = 1;
2297 BaseOffs = 0;
2298 break;
2299 }
2300 }
2301};
2302
2303} // end anonymous namespace
2304
2305#ifndef NDEBUG
2306static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2307 AM.print(OS);
2308 return OS;
2309}
2310#endif
2311
2312#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2313void ExtAddrMode::print(raw_ostream &OS) const {
2314 bool NeedPlus = false;
2315 OS << "[";
2316 if (InBounds)
2317 OS << "inbounds ";
2318 if (BaseGV) {
2319 OS << (NeedPlus ? " + " : "")
2320 << "GV:";
2321 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2322 NeedPlus = true;
2323 }
2324
2325 if (BaseOffs) {
2326 OS << (NeedPlus ? " + " : "")
2327 << BaseOffs;
2328 NeedPlus = true;
2329 }
2330
2331 if (BaseReg) {
2332 OS << (NeedPlus ? " + " : "")
2333 << "Base:";
2334 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2335 NeedPlus = true;
2336 }
2337 if (Scale) {
2338 OS << (NeedPlus ? " + " : "")
2339 << Scale << "*";
2340 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2341 }
2342
2343 OS << ']';
2344}
2345
2346LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2347 print(dbgs());
2348 dbgs() << '\n';
2349}
2350#endif
2351
2352namespace {
2353
2354/// This class provides transaction based operation on the IR.
2355/// Every change made through this class is recorded in the internal state and
2356/// can be undone (rollback) until commit is called.
2357class TypePromotionTransaction {
2358 /// This represents the common interface of the individual transaction.
2359 /// Each class implements the logic for doing one specific modification on
2360 /// the IR via the TypePromotionTransaction.
2361 class TypePromotionAction {
2362 protected:
2363 /// The Instruction modified.
2364 Instruction *Inst;
2365
2366 public:
2367 /// Constructor of the action.
2368 /// The constructor performs the related action on the IR.
2369 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2370
2371 virtual ~TypePromotionAction() = default;
2372
2373 /// Undo the modification done by this action.
2374 /// When this method is called, the IR must be in the same state as it was
2375 /// before this action was applied.
2376 /// \pre Undoing the action works if and only if the IR is in the exact same
2377 /// state as it was directly after this action was applied.
2378 virtual void undo() = 0;
2379
2380 /// Advocate every change made by this action.
2381 /// When the results on the IR of the action are to be kept, it is important
2382 /// to call this function, otherwise hidden information may be kept forever.
2383 virtual void commit() {
2384 // Nothing to be done, this action is not doing anything.
2385 }
2386 };
2387
2388 /// Utility to remember the position of an instruction.
2389 class InsertionHandler {
2390 /// Position of an instruction.
2391 /// Either an instruction:
2392 /// - Is the first in a basic block: BB is used.
2393 /// - Has a previous instruction: PrevInst is used.
2394 union {
2395 Instruction *PrevInst;
2396 BasicBlock *BB;
2397 } Point;
2398
2399 /// Remember whether or not the instruction had a previous instruction.
2400 bool HasPrevInstruction;
2401
2402 public:
2403 /// Record the position of \p Inst.
2404 InsertionHandler(Instruction *Inst) {
2405 BasicBlock::iterator It = Inst->getIterator();
2406 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2407 if (HasPrevInstruction)
2408 Point.PrevInst = &*--It;
2409 else
2410 Point.BB = Inst->getParent();
2411 }
2412
2413 /// Insert \p Inst at the recorded position.
2414 void insert(Instruction *Inst) {
2415 if (HasPrevInstruction) {
2416 if (Inst->getParent())
2417 Inst->removeFromParent();
2418 Inst->insertAfter(Point.PrevInst);
2419 } else {
2420 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2421 if (Inst->getParent())
2422 Inst->moveBefore(Position);
2423 else
2424 Inst->insertBefore(Position);
2425 }
2426 }
2427 };
2428
2429 /// Move an instruction before another.
2430 class InstructionMoveBefore : public TypePromotionAction {
2431 /// Original position of the instruction.
2432 InsertionHandler Position;
2433
2434 public:
2435 /// Move \p Inst before \p Before.
2436 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2437 : TypePromotionAction(Inst), Position(Inst) {
2438 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2439 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2440 Inst->moveBefore(Before);
2441 }
2442
2443 /// Move the instruction back to its original position.
2444 void undo() override {
2445 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2446 Position.insert(Inst);
2447 }
2448 };
2449
2450 /// Set the operand of an instruction with a new value.
2451 class OperandSetter : public TypePromotionAction {
2452 /// Original operand of the instruction.
2453 Value *Origin;
2454
2455 /// Index of the modified instruction.
2456 unsigned Idx;
2457
2458 public:
2459 /// Set \p Idx operand of \p Inst with \p NewVal.
2460 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2461 : TypePromotionAction(Inst), Idx(Idx) {
2462 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2463 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2464 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2465 Origin = Inst->getOperand(Idx);
2466 Inst->setOperand(Idx, NewVal);
2467 }
2468
2469 /// Restore the original value of the instruction.
2470 void undo() override {
2471 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2472 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2473 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2474 Inst->setOperand(Idx, Origin);
2475 }
2476 };
2477
2478 /// Hide the operands of an instruction.
2479 /// Do as if this instruction was not using any of its operands.
2480 class OperandsHider : public TypePromotionAction {
2481 /// The list of original operands.
2482 SmallVector<Value *, 4> OriginalValues;
2483
2484 public:
2485 /// Remove \p Inst from the uses of the operands of \p Inst.
2486 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2487 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2488 unsigned NumOpnds = Inst->getNumOperands();
2489 OriginalValues.reserve(NumOpnds);
2490 for (unsigned It = 0; It < NumOpnds; ++It) {
2491 // Save the current operand.
2492 Value *Val = Inst->getOperand(It);
2493 OriginalValues.push_back(Val);
2494 // Set a dummy one.
2495 // We could use OperandSetter here, but that would imply an overhead
2496 // that we are not willing to pay.
2497 Inst->setOperand(It, UndefValue::get(Val->getType()));
2498 }
2499 }
2500
2501 /// Restore the original list of uses.
2502 void undo() override {
2503 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2504 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2505 Inst->setOperand(It, OriginalValues[It]);
2506 }
2507 };
2508
2509 /// Build a truncate instruction.
2510 class TruncBuilder : public TypePromotionAction {
2511 Value *Val;
2512
2513 public:
2514 /// Build a truncate instruction of \p Opnd producing a \p Ty
2515 /// result.
2516 /// trunc Opnd to Ty.
2517 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2518 IRBuilder<> Builder(Opnd);
2519 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2520 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2521 }
2522
2523 /// Get the built value.
2524 Value *getBuiltValue() { return Val; }
2525
2526 /// Remove the built instruction.
2527 void undo() override {
2528 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2529 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2530 IVal->eraseFromParent();
2531 }
2532 };
2533
2534 /// Build a sign extension instruction.
2535 class SExtBuilder : public TypePromotionAction {
2536 Value *Val;
2537
2538 public:
2539 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2540 /// result.
2541 /// sext Opnd to Ty.
2542 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2543 : TypePromotionAction(InsertPt) {
2544 IRBuilder<> Builder(InsertPt);
2545 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2546 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2547 }
2548
2549 /// Get the built value.
2550 Value *getBuiltValue() { return Val; }
2551
2552 /// Remove the built instruction.
2553 void undo() override {
2554 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2555 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2556 IVal->eraseFromParent();
2557 }
2558 };
2559
2560 /// Build a zero extension instruction.
2561 class ZExtBuilder : public TypePromotionAction {
2562 Value *Val;
2563
2564 public:
2565 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2566 /// result.
2567 /// zext Opnd to Ty.
2568 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2569 : TypePromotionAction(InsertPt) {
2570 IRBuilder<> Builder(InsertPt);
2571 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2572 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2573 }
2574
2575 /// Get the built value.
2576 Value *getBuiltValue() { return Val; }
2577
2578 /// Remove the built instruction.
2579 void undo() override {
2580 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2581 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2582 IVal->eraseFromParent();
2583 }
2584 };
2585
2586 /// Mutate an instruction to another type.
2587 class TypeMutator : public TypePromotionAction {
2588 /// Record the original type.
2589 Type *OrigTy;
2590
2591 public:
2592 /// Mutate the type of \p Inst into \p NewTy.
2593 TypeMutator(Instruction *Inst, Type *NewTy)
2594 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2595 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2596 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2597 Inst->mutateType(NewTy);
2598 }
2599
2600 /// Mutate the instruction back to its original type.
2601 void undo() override {
2602 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2603 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2604 Inst->mutateType(OrigTy);
2605 }
2606 };
2607
2608 /// Replace the uses of an instruction by another instruction.
2609 class UsesReplacer : public TypePromotionAction {
2610 /// Helper structure to keep track of the replaced uses.
2611 struct InstructionAndIdx {
2612 /// The instruction using the instruction.
2613 Instruction *Inst;
2614
2615 /// The index where this instruction is used for Inst.
2616 unsigned Idx;
2617
2618 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2619 : Inst(Inst), Idx(Idx) {}
2620 };
2621
2622 /// Keep track of the original uses (pair Instruction, Index).
2623 SmallVector<InstructionAndIdx, 4> OriginalUses;
2624 /// Keep track of the debug users.
2625 SmallVector<DbgValueInst *, 1> DbgValues;
2626
2627 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2628
2629 public:
2630 /// Replace all the use of \p Inst by \p New.
2631 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2632 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2633 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2634 // Record the original uses.
2635 for (Use &U : Inst->uses()) {
2636 Instruction *UserI = cast<Instruction>(U.getUser());
2637 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2638 }
2639 // Record the debug uses separately. They are not in the instruction's
2640 // use list, but they are replaced by RAUW.
2641 findDbgValues(DbgValues, Inst);
2642
2643 // Now, we can replace the uses.
2644 Inst->replaceAllUsesWith(New);
2645 }
2646
2647 /// Reassign the original uses of Inst to Inst.
2648 void undo() override {
2649 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2650 for (use_iterator UseIt = OriginalUses.begin(),
2651 EndIt = OriginalUses.end();
2652 UseIt != EndIt; ++UseIt) {
2653 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2654 }
2655 // RAUW has replaced all original uses with references to the new value,
2656 // including the debug uses. Since we are undoing the replacements,
2657 // the original debug uses must also be reinstated to maintain the
2658 // correctness and utility of debug value instructions.
2659 for (auto *DVI: DbgValues) {
2660 LLVMContext &Ctx = Inst->getType()->getContext();
2661 auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2662 DVI->setOperand(0, MV);
2663 }
2664 }
2665 };
2666
2667 /// Remove an instruction from the IR.
2668 class InstructionRemover : public TypePromotionAction {
2669 /// Original position of the instruction.
2670 InsertionHandler Inserter;
2671
2672 /// Helper structure to hide all the link to the instruction. In other
2673 /// words, this helps to do as if the instruction was removed.
2674 OperandsHider Hider;
2675
2676 /// Keep track of the uses replaced, if any.
2677 UsesReplacer *Replacer = nullptr;
2678
2679 /// Keep track of instructions removed.
2680 SetOfInstrs &RemovedInsts;
2681
2682 public:
2683 /// Remove all reference of \p Inst and optionally replace all its
2684 /// uses with New.
2685 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2686 /// \pre If !Inst->use_empty(), then New != nullptr
2687 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2688 Value *New = nullptr)
2689 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2690 RemovedInsts(RemovedInsts) {
2691 if (New)
2692 Replacer = new UsesReplacer(Inst, New);
2693 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2694 RemovedInsts.insert(Inst);
2695 /// The instructions removed here will be freed after completing
2696 /// optimizeBlock() for all blocks as we need to keep track of the
2697 /// removed instructions during promotion.
2698 Inst->removeFromParent();
2699 }
2700
2701 ~InstructionRemover() override { delete Replacer; }
2702
2703 /// Resurrect the instruction and reassign it to the proper uses if
2704 /// new value was provided when build this action.
2705 void undo() override {
2706 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2707 Inserter.insert(Inst);
2708 if (Replacer)
2709 Replacer->undo();
2710 Hider.undo();
2711 RemovedInsts.erase(Inst);
2712 }
2713 };
2714
2715public:
2716 /// Restoration point.
2717 /// The restoration point is a pointer to an action instead of an iterator
2718 /// because the iterator may be invalidated but not the pointer.
2719 using ConstRestorationPt = const TypePromotionAction *;
2720
2721 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2722 : RemovedInsts(RemovedInsts) {}
2723
2724 /// Advocate every changes made in that transaction.
2725 void commit();
2726
2727 /// Undo all the changes made after the given point.
2728 void rollback(ConstRestorationPt Point);
2729
2730 /// Get the current restoration point.
2731 ConstRestorationPt getRestorationPoint() const;
2732
2733 /// \name API for IR modification with state keeping to support rollback.
2734 /// @{
2735 /// Same as Instruction::setOperand.
2736 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2737
2738 /// Same as Instruction::eraseFromParent.
2739 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2740
2741 /// Same as Value::replaceAllUsesWith.
2742 void replaceAllUsesWith(Instruction *Inst, Value *New);
2743
2744 /// Same as Value::mutateType.
2745 void mutateType(Instruction *Inst, Type *NewTy);
2746
2747 /// Same as IRBuilder::createTrunc.
2748 Value *createTrunc(Instruction *Opnd, Type *Ty);
2749
2750 /// Same as IRBuilder::createSExt.
2751 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2752
2753 /// Same as IRBuilder::createZExt.
2754 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2755
2756 /// Same as Instruction::moveBefore.
2757 void moveBefore(Instruction *Inst, Instruction *Before);
2758 /// @}
2759
2760private:
2761 /// The ordered list of actions made so far.
2762 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2763
2764 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2765
2766 SetOfInstrs &RemovedInsts;
2767};
2768
2769} // end anonymous namespace
2770
2771void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2772 Value *NewVal) {
2773 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
2774 Inst, Idx, NewVal));
2775}
2776
2777void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2778 Value *NewVal) {
2779 Actions.push_back(
2780 std::make_unique<TypePromotionTransaction::InstructionRemover>(
2781 Inst, RemovedInsts, NewVal));
2782}
2783
2784void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2785 Value *New) {
2786 Actions.push_back(
2787 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2788}
2789
2790void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2791 Actions.push_back(
2792 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2793}
2794
2795Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2796 Type *Ty) {
2797 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2798 Value *Val = Ptr->getBuiltValue();
2799 Actions.push_back(std::move(Ptr));
2800 return Val;
2801}
2802
2803Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2804 Value *Opnd, Type *Ty) {
2805 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2806 Value *Val = Ptr->getBuiltValue();
2807 Actions.push_back(std::move(Ptr));
2808 return Val;
2809}
2810
2811Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2812 Value *Opnd, Type *Ty) {
2813 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2814 Value *Val = Ptr->getBuiltValue();
2815 Actions.push_back(std::move(Ptr));
2816 return Val;
2817}
2818
2819void TypePromotionTransaction::moveBefore(Instruction *Inst,
2820 Instruction *Before) {
2821 Actions.push_back(
2822 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2823 Inst, Before));
2824}
2825
2826TypePromotionTransaction::ConstRestorationPt
2827TypePromotionTransaction::getRestorationPoint() const {
2828 return !Actions.empty() ? Actions.back().get() : nullptr;
2829}
2830
2831void TypePromotionTransaction::commit() {
2832 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2833 ++It)
2834 (*It)->commit();
2835 Actions.clear();
2836}
2837
2838void TypePromotionTransaction::rollback(
2839 TypePromotionTransaction::ConstRestorationPt Point) {
2840 while (!Actions.empty() && Point != Actions.back().get()) {
2841 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2842 Curr->undo();
2843 }
2844}
2845
2846namespace {
2847
2848/// A helper class for matching addressing modes.
2849///
2850/// This encapsulates the logic for matching the target-legal addressing modes.
2851class AddressingModeMatcher {
2852 SmallVectorImpl<Instruction*> &AddrModeInsts;
2853 const TargetLowering &TLI;
2854 const TargetRegisterInfo &TRI;
2855 const DataLayout &DL;
2856
2857 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2858 /// the memory instruction that we're computing this address for.
2859 Type *AccessTy;
2860 unsigned AddrSpace;
2861 Instruction *MemoryInst;
2862
2863 /// This is the addressing mode that we're building up. This is
2864 /// part of the return value of this addressing mode matching stuff.
2865 ExtAddrMode &AddrMode;
2866
2867 /// The instructions inserted by other CodeGenPrepare optimizations.
2868 const SetOfInstrs &InsertedInsts;
2869
2870 /// A map from the instructions to their type before promotion.
2871 InstrToOrigTy &PromotedInsts;
2872
2873 /// The ongoing transaction where every action should be registered.
2874 TypePromotionTransaction &TPT;
2875
2876 // A GEP which has too large offset to be folded into the addressing mode.
2877 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2878
2879 /// This is set to true when we should not do profitability checks.
2880 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2881 bool IgnoreProfitability;
2882
2883 /// True if we are optimizing for size.
2884 bool OptSize;
2885
2886 ProfileSummaryInfo *PSI;
2887 BlockFrequencyInfo *BFI;
2888
2889 AddressingModeMatcher(
2890 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2891 const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2892 ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2893 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2894 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
2895 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
2896 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2897 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2898 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2899 PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
2900 OptSize(OptSize), PSI(PSI), BFI(BFI) {
2901 IgnoreProfitability = false;
2902 }
2903
2904public:
2905 /// Find the maximal addressing mode that a load/store of V can fold,
2906 /// give an access type of AccessTy. This returns a list of involved
2907 /// instructions in AddrModeInsts.
2908 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2909 /// optimizations.
2910 /// \p PromotedInsts maps the instructions to their type before promotion.
2911 /// \p The ongoing transaction where every action should be registered.
2912 static ExtAddrMode
2913 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2914 SmallVectorImpl<Instruction *> &AddrModeInsts,
2915 const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2916 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2917 TypePromotionTransaction &TPT,
2918 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
2919 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
2920 ExtAddrMode Result;
2921
2922 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2923 MemoryInst, Result, InsertedInsts,
2924 PromotedInsts, TPT, LargeOffsetGEP,
2925 OptSize, PSI, BFI)
2926 .matchAddr(V, 0);
2927 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2927, __PRETTY_FUNCTION__))
;
2928 return Result;
2929 }
2930
2931private:
2932 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2933 bool matchAddr(Value *Addr, unsigned Depth);
2934 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2935 bool *MovedAway = nullptr);
2936 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2937 ExtAddrMode &AMBefore,
2938 ExtAddrMode &AMAfter);
2939 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2940 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2941 Value *PromotedOperand) const;
2942};
2943
2944class PhiNodeSet;
2945
2946/// An iterator for PhiNodeSet.
2947class PhiNodeSetIterator {
2948 PhiNodeSet * const Set;
2949 size_t CurrentIndex = 0;
2950
2951public:
2952 /// The constructor. Start should point to either a valid element, or be equal
2953 /// to the size of the underlying SmallVector of the PhiNodeSet.
2954 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2955 PHINode * operator*() const;
2956 PhiNodeSetIterator& operator++();
2957 bool operator==(const PhiNodeSetIterator &RHS) const;
2958 bool operator!=(const PhiNodeSetIterator &RHS) const;
2959};
2960
2961/// Keeps a set of PHINodes.
2962///
2963/// This is a minimal set implementation for a specific use case:
2964/// It is very fast when there are very few elements, but also provides good
2965/// performance when there are many. It is similar to SmallPtrSet, but also
2966/// provides iteration by insertion order, which is deterministic and stable
2967/// across runs. It is also similar to SmallSetVector, but provides removing
2968/// elements in O(1) time. This is achieved by not actually removing the element
2969/// from the underlying vector, so comes at the cost of using more memory, but
2970/// that is fine, since PhiNodeSets are used as short lived objects.
2971class PhiNodeSet {
2972 friend class PhiNodeSetIterator;
2973
2974 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2975 using iterator = PhiNodeSetIterator;
2976
2977 /// Keeps the elements in the order of their insertion in the underlying
2978 /// vector. To achieve constant time removal, it never deletes any element.
2979 SmallVector<PHINode *, 32> NodeList;
2980
2981 /// Keeps the elements in the underlying set implementation. This (and not the
2982 /// NodeList defined above) is the source of truth on whether an element
2983 /// is actually in the collection.
2984 MapType NodeMap;
2985
2986 /// Points to the first valid (not deleted) element when the set is not empty
2987 /// and the value is not zero. Equals to the size of the underlying vector
2988 /// when the set is empty. When the value is 0, as in the beginning, the
2989 /// first element may or may not be valid.
2990 size_t FirstValidElement = 0;
2991
2992public:
2993 /// Inserts a new element to the collection.
2994 /// \returns true if the element is actually added, i.e. was not in the
2995 /// collection before the operation.
2996 bool insert(PHINode *Ptr) {
2997 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2998 NodeList.push_back(Ptr);
2999 return true;
3000 }
3001 return false;
3002 }
3003
3004 /// Removes the element from the collection.
3005 /// \returns whether the element is actually removed, i.e. was in the
3006 /// collection before the operation.
3007 bool erase(PHINode *Ptr) {
3008 auto it = NodeMap.find(Ptr);
3009 if (it != NodeMap.end()) {
3010 NodeMap.erase(Ptr);
3011 SkipRemovedElements(FirstValidElement);
3012 return true;
3013 }
3014 return false;
3015 }
3016
3017 /// Removes all elements and clears the collection.
3018 void clear() {
3019 NodeMap.clear();
3020 NodeList.clear();
3021 FirstValidElement = 0;
3022 }
3023
3024 /// \returns an iterator that will iterate the elements in the order of
3025 /// insertion.
3026 iterator begin() {
3027 if (FirstValidElement == 0)
3028 SkipRemovedElements(FirstValidElement);
3029 return PhiNodeSetIterator(this, FirstValidElement);
3030 }
3031
3032 /// \returns an iterator that points to the end of the collection.
3033 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3034
3035 /// Returns the number of elements in the collection.
3036 size_t size() const {
3037 return NodeMap.size();
3038 }
3039
3040 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3041 size_t count(PHINode *Ptr) const {
3042 return NodeMap.count(Ptr);
3043 }
3044
3045private:
3046 /// Updates the CurrentIndex so that it will point to a valid element.
3047 ///
3048 /// If the element of NodeList at CurrentIndex is valid, it does not
3049 /// change it. If there are no more valid elements, it updates CurrentIndex
3050 /// to point to the end of the NodeList.
3051 void SkipRemovedElements(size_t &CurrentIndex) {
3052 while (CurrentIndex < NodeList.size()) {
3053 auto it = NodeMap.find(NodeList[CurrentIndex]);
3054 // If the element has been deleted and added again later, NodeMap will
3055 // point to a different index, so CurrentIndex will still be invalid.
3056 if (it != NodeMap.end() && it->second == CurrentIndex)
3057 break;
3058 ++CurrentIndex;
3059 }
3060 }
3061};
3062
3063PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3064 : Set(Set), CurrentIndex(Start) {}
3065
3066PHINode * PhiNodeSetIterator::operator*() const {
3067 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3068, __PRETTY_FUNCTION__))
3068 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3068, __PRETTY_FUNCTION__))
;
3069 return Set->NodeList[CurrentIndex];
3070}
3071
3072PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
3073 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3074, __PRETTY_FUNCTION__))
3074 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3074, __PRETTY_FUNCTION__))
;
3075 ++CurrentIndex;
3076 Set->SkipRemovedElements(CurrentIndex);
3077 return *this;
3078}
3079
3080bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3081 return CurrentIndex == RHS.CurrentIndex;
3082}
3083
3084bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3085 return !((*this) == RHS);
3086}
3087
3088/// Keep track of simplification of Phi nodes.
3089/// Accept the set of all phi nodes and erase phi node from this set
3090/// if it is simplified.
3091class SimplificationTracker {
3092 DenseMap<Value *, Value *> Storage;
3093 const SimplifyQuery &SQ;
3094 // Tracks newly created Phi nodes. The elements are iterated by insertion
3095 // order.
3096 PhiNodeSet AllPhiNodes;
3097 // Tracks newly created Select nodes.
3098 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3099
3100public:
3101 SimplificationTracker(const SimplifyQuery &sq)
3102 : SQ(sq) {}
3103
3104 Value *Get(Value *V) {
3105 do {
3106 auto SV = Storage.find(V);
3107 if (SV == Storage.end())
3108 return V;
3109 V = SV->second;
3110 } while (true);
3111 }
3112
3113 Value *Simplify(Value *Val) {
3114 SmallVector<Value *, 32> WorkList;
3115 SmallPtrSet<Value *, 32> Visited;
3116 WorkList.push_back(Val);
3117 while (!WorkList.empty()) {
3118 auto P = WorkList.pop_back_val();
3119 if (!Visited.insert(P).second)
3120 continue;
3121 if (auto *PI = dyn_cast<Instruction>(P))
3122 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3123 for (auto *U : PI->users())
3124 WorkList.push_back(cast<Value>(U));
3125 Put(PI, V);
3126 PI->replaceAllUsesWith(V);
3127 if (auto *PHI = dyn_cast<PHINode>(PI))
3128 AllPhiNodes.erase(PHI);
3129 if (auto *Select = dyn_cast<SelectInst>(PI))
3130 AllSelectNodes.erase(Select);
3131 PI->eraseFromParent();
3132 }
3133 }
3134 return Get(Val);
3135 }
3136
3137 void Put(Value *From, Value *To) {
3138 Storage.insert({ From, To });
3139 }
3140
3141 void ReplacePhi(PHINode *From, PHINode *To) {
3142 Value* OldReplacement = Get(From);
3143 while (OldReplacement != From) {
34
Assuming 'OldReplacement' is not equal to 'From'
35
Loop condition is true. Entering loop body
38
Assuming 'OldReplacement' is not equal to 'From'
39
Loop condition is true. Entering loop body
42
Assuming 'OldReplacement' is equal to 'From'
43
Loop condition is false. Execution continues on line 3148
3144 From = To;
40
Null pointer value stored to 'From'
3145 To = dyn_cast<PHINode>(OldReplacement);
36
Assuming 'OldReplacement' is not a 'PHINode'
37
Null pointer value stored to 'To'
41
Assuming 'OldReplacement' is a 'PHINode'
3146 OldReplacement = Get(From);
3147 }
3148 assert
43.1
'To' is non-null
(To && Get(To) == To && "Replacement PHI node is already replaced.")((To && Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("To && Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3148, __PRETTY_FUNCTION__))
;
44
Assuming the condition is true
45
'?' condition is true
3149 Put(From, To);
3150 From->replaceAllUsesWith(To);
46
Called C++ object pointer is null
3151 AllPhiNodes.erase(From);
3152 From->eraseFromParent();
3153 }
3154
3155 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3156
3157 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3158
3159 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3160
3161 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3162
3163 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3164
3165 void destroyNewNodes(Type *CommonType) {
3166 // For safe erasing, replace the uses with dummy value first.
3167 auto Dummy = UndefValue::get(CommonType);
3168 for (auto I : AllPhiNodes) {
3169 I->replaceAllUsesWith(Dummy);
3170 I->eraseFromParent();
3171 }
3172 AllPhiNodes.clear();
3173 for (auto I : AllSelectNodes) {
3174 I->replaceAllUsesWith(Dummy);
3175 I->eraseFromParent();
3176 }
3177 AllSelectNodes.clear();
3178 }
3179};
3180
3181/// A helper class for combining addressing modes.
3182class AddressingModeCombiner {
3183 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3184 typedef std::pair<PHINode *, PHINode *> PHIPair;
3185
3186private:
3187 /// The addressing modes we've collected.
3188 SmallVector<ExtAddrMode, 16> AddrModes;
3189
3190 /// The field in which the AddrModes differ, when we have more than one.
3191 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3192
3193 /// Are the AddrModes that we have all just equal to their original values?
3194 bool AllAddrModesTrivial = true;
3195
3196 /// Common Type for all different fields in addressing modes.
3197 Type *CommonType;
3198
3199 /// SimplifyQuery for simplifyInstruction utility.
3200 const SimplifyQuery &SQ;
3201
3202 /// Original Address.
3203 Value *Original;
3204
3205public:
3206 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3207 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3208
3209 /// Get the combined AddrMode
3210 const ExtAddrMode &getAddrMode() const {
3211 return AddrModes[0];
3212 }
3213
3214 /// Add a new AddrMode if it's compatible with the AddrModes we already
3215 /// have.
3216 /// \return True iff we succeeded in doing so.
3217 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3218 // Take note of if we have any non-trivial AddrModes, as we need to detect
3219 // when all AddrModes are trivial as then we would introduce a phi or select
3220 // which just duplicates what's already there.
3221 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3222
3223 // If this is the first addrmode then everything is fine.
3224 if (AddrModes.empty()) {
3225 AddrModes.emplace_back(NewAddrMode);
3226 return true;
3227 }
3228
3229 // Figure out how different this is from the other address modes, which we
3230 // can do just by comparing against the first one given that we only care
3231 // about the cumulative difference.
3232 ExtAddrMode::FieldName ThisDifferentField =
3233 AddrModes[0].compare(NewAddrMode);
3234 if (DifferentField == ExtAddrMode::NoField)
3235 DifferentField = ThisDifferentField;
3236 else if (DifferentField != ThisDifferentField)
3237 DifferentField = ExtAddrMode::MultipleFields;
3238
3239 // If NewAddrMode differs in more than one dimension we cannot handle it.
3240 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3241
3242 // If Scale Field is different then we reject.
3243 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3244
3245 // We also must reject the case when base offset is different and
3246 // scale reg is not null, we cannot handle this case due to merge of
3247 // different offsets will be used as ScaleReg.
3248 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3249 !NewAddrMode.ScaledReg);
3250
3251 // We also must reject the case when GV is different and BaseReg installed
3252 // due to we want to use base reg as a merge of GV values.
3253 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3254 !NewAddrMode.HasBaseReg);
3255
3256 // Even if NewAddMode is the same we still need to collect it due to
3257 // original value is different. And later we will need all original values
3258 // as anchors during finding the common Phi node.
3259 if (CanHandle)
3260 AddrModes.emplace_back(NewAddrMode);
3261 else
3262 AddrModes.clear();
3263
3264 return CanHandle;
3265 }
3266
3267 /// Combine the addressing modes we've collected into a single
3268 /// addressing mode.
3269 /// \return True iff we successfully combined them or we only had one so
3270 /// didn't need to combine them anyway.
3271 bool combineAddrModes() {
3272 // If we have no AddrModes then they can't be combined.
3273 if (AddrModes.size() == 0)
12
Assuming the condition is false
13
Taking false branch
3274 return false;
3275
3276 // A single AddrMode can trivially be combined.
3277 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
14
Assuming the condition is false
15
Assuming field 'DifferentField' is not equal to NoField
16
Taking false branch
3278 return true;
3279
3280 // If the AddrModes we collected are all just equal to the value they are
3281 // derived from then combining them wouldn't do anything useful.
3282 if (AllAddrModesTrivial)
17
Assuming field 'AllAddrModesTrivial' is false
18
Taking false branch
3283 return false;
3284
3285 if (!addrModeCombiningAllowed())
19
Assuming the condition is false
20
Taking false branch
3286 return false;
3287
3288 // Build a map between <original value, basic block where we saw it> to
3289 // value of base register.
3290 // Bail out if there is no common type.
3291 FoldAddrToValueMapping Map;
3292 if (!initializeMap(Map))
21
Taking false branch
3293 return false;
3294
3295 Value *CommonValue = findCommon(Map);
22
Calling 'AddressingModeCombiner::findCommon'
3296 if (CommonValue)
3297 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3298 return CommonValue != nullptr;
3299 }
3300
3301private:
3302 /// Initialize Map with anchor values. For address seen
3303 /// we set the value of different field saw in this address.
3304 /// At the same time we find a common type for different field we will
3305 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3306 /// Return false if there is no common type found.
3307 bool initializeMap(FoldAddrToValueMapping &Map) {
3308 // Keep track of keys where the value is null. We will need to replace it
3309 // with constant null when we know the common type.
3310 SmallVector<Value *, 2> NullValue;
3311 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3312 for (auto &AM : AddrModes) {
3313 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3314 if (DV) {
3315 auto *Type = DV->getType();
3316 if (CommonType && CommonType != Type)
3317 return false;
3318 CommonType = Type;
3319 Map[AM.OriginalValue] = DV;
3320 } else {
3321 NullValue.push_back(AM.OriginalValue);
3322 }
3323 }
3324 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3324, __PRETTY_FUNCTION__))
;
3325 for (auto *V : NullValue)
3326 Map[V] = Constant::getNullValue(CommonType);
3327 return true;
3328 }
3329
3330 /// We have mapping between value A and other value B where B was a field in
3331 /// addressing mode represented by A. Also we have an original value C
3332 /// representing an address we start with. Traversing from C through phi and
3333 /// selects we ended up with A's in a map. This utility function tries to find
3334 /// a value V which is a field in addressing mode C and traversing through phi
3335 /// nodes and selects we will end up in corresponded values B in a map.
3336 /// The utility will create a new Phi/Selects if needed.
3337 // The simple example looks as follows:
3338 // BB1:
3339 // p1 = b1 + 40
3340 // br cond BB2, BB3
3341 // BB2:
3342 // p2 = b2 + 40
3343 // br BB3
3344 // BB3:
3345 // p = phi [p1, BB1], [p2, BB2]
3346 // v = load p
3347 // Map is
3348 // p1 -> b1
3349 // p2 -> b2
3350 // Request is
3351 // p -> ?
3352 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3353 Value *findCommon(FoldAddrToValueMapping &Map) {
3354 // Tracks the simplification of newly created phi nodes. The reason we use
3355 // this mapping is because we will add new created Phi nodes in AddrToBase.
3356 // Simplification of Phi nodes is recursive, so some Phi node may
3357 // be simplified after we added it to AddrToBase. In reality this
3358 // simplification is possible only if original phi/selects were not
3359 // simplified yet.
3360 // Using this mapping we can find the current value in AddrToBase.
3361 SimplificationTracker ST(SQ);
3362
3363 // First step, DFS to create PHI nodes for all intermediate blocks.
3364 // Also fill traverse order for the second step.
3365 SmallVector<Value *, 32> TraverseOrder;
3366 InsertPlaceholders(Map, TraverseOrder, ST);
3367
3368 // Second Step, fill new nodes by merged values and simplify if possible.
3369 FillPlaceholders(Map, TraverseOrder, ST);
3370
3371 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
23
Assuming the condition is false
3372 ST.destroyNewNodes(CommonType);
3373 return nullptr;
3374 }
3375
3376 // Now we'd like to match New Phi nodes to existed ones.
3377 unsigned PhiNotMatchedCount = 0;
3378 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
24
Calling 'AddressingModeCombiner::MatchPhiSet'
3379 ST.destroyNewNodes(CommonType);
3380 return nullptr;
3381 }
3382
3383 auto *Result = ST.Get(Map.find(Original)->second);
3384 if (Result) {
3385 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3386 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3387 }
3388 return Result;
3389 }
3390
3391 /// Try to match PHI node to Candidate.
3392 /// Matcher tracks the matched Phi nodes.
3393 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3394 SmallSetVector<PHIPair, 8> &Matcher,
3395 PhiNodeSet &PhiNodesToMatch) {
3396 SmallVector<PHIPair, 8> WorkList;
3397 Matcher.insert({ PHI, Candidate });
3398 SmallSet<PHINode *, 8> MatchedPHIs;
3399 MatchedPHIs.insert(PHI);
3400 WorkList.push_back({ PHI, Candidate });
3401 SmallSet<PHIPair, 8> Visited;
3402 while (!WorkList.empty()) {
3403 auto Item = WorkList.pop_back_val();
3404 if (!Visited.insert(Item).second)
3405 continue;
3406 // We iterate over all incoming values to Phi to compare them.
3407 // If values are different and both of them Phi and the first one is a
3408 // Phi we added (subject to match) and both of them is in the same basic
3409 // block then we can match our pair if values match. So we state that
3410 // these values match and add it to work list to verify that.
3411 for (auto B : Item.first->blocks()) {
3412 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3413 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3414 if (FirstValue == SecondValue)
3415 continue;
3416
3417 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3418 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3419
3420 // One of them is not Phi or
3421 // The first one is not Phi node from the set we'd like to match or
3422 // Phi nodes from different basic blocks then
3423 // we will not be able to match.
3424 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3425 FirstPhi->getParent() != SecondPhi->getParent())
3426 return false;
3427
3428 // If we already matched them then continue.
3429 if (Matcher.count({ FirstPhi, SecondPhi }))
3430 continue;
3431 // So the values are different and does not match. So we need them to
3432 // match. (But we register no more than one match per PHI node, so that
3433 // we won't later try to replace them twice.)
3434 if (MatchedPHIs.insert(FirstPhi).second)
3435 Matcher.insert({ FirstPhi, SecondPhi });
3436 // But me must check it.
3437 WorkList.push_back({ FirstPhi, SecondPhi });
3438 }
3439 }
3440 return true;
3441 }
3442
3443 /// For the given set of PHI nodes (in the SimplificationTracker) try
3444 /// to find their equivalents.
3445 /// Returns false if this matching fails and creation of new Phi is disabled.
3446 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3447 unsigned &PhiNotMatchedCount) {
3448 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3449 // order, so the replacements (ReplacePhi) are also done in a deterministic
3450 // order.
3451 SmallSetVector<PHIPair, 8> Matched;
3452 SmallPtrSet<PHINode *, 8> WillNotMatch;
3453 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3454 while (PhiNodesToMatch.size()) {
25
Loop condition is true. Entering loop body
3455 PHINode *PHI = *PhiNodesToMatch.begin();
3456
3457 // Add us, if no Phi nodes in the basic block we do not match.
3458 WillNotMatch.clear();
3459 WillNotMatch.insert(PHI);
3460
3461 // Traverse all Phis until we found equivalent or fail to do that.
3462 bool IsMatched = false;
3463 for (auto &P : PHI->getParent()->phis()) {
3464 if (&P == PHI)
26
Assuming the condition is false
27
Taking false branch
3465 continue;
3466 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
28
Assuming 'IsMatched' is true
29
Taking true branch
3467 break;
30
Execution continues on line 3475
3468 // If it does not match, collect all Phi nodes from matcher.
3469 // if we end up with no match, them all these Phi nodes will not match
3470 // later.
3471 for (auto M : Matched)
3472 WillNotMatch.insert(M.first);
3473 Matched.clear();
3474 }
3475 if (IsMatched
30.1
'IsMatched' is true
) {
31
Taking true branch
3476 // Replace all matched values and erase them.
3477 for (auto MV : Matched)
32
Assuming '__begin4' is not equal to '__end4'
3478 ST.ReplacePhi(MV.first, MV.second);
33
Calling 'SimplificationTracker::ReplacePhi'
3479 Matched.clear();
3480 continue;
3481 }
3482 // If we are not allowed to create new nodes then bail out.
3483 if (!AllowNewPhiNodes)
3484 return false;
3485 // Just remove all seen values in matcher. They will not match anything.
3486 PhiNotMatchedCount += WillNotMatch.size();
3487 for (auto *P : WillNotMatch)
3488 PhiNodesToMatch.erase(P);
3489 }
3490 return true;
3491 }
3492 /// Fill the placeholders with values from predecessors and simplify them.
3493 void FillPlaceholders(FoldAddrToValueMapping &Map,
3494 SmallVectorImpl<Value *> &TraverseOrder,
3495 SimplificationTracker &ST) {
3496 while (!TraverseOrder.empty()) {
3497 Value *Current = TraverseOrder.pop_back_val();
3498 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3498, __PRETTY_FUNCTION__))
;
3499 Value *V = Map[Current];
3500
3501 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3502 // CurrentValue also must be Select.
3503 auto *CurrentSelect = cast<SelectInst>(Current);
3504 auto *TrueValue = CurrentSelect->getTrueValue();
3505 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3505, __PRETTY_FUNCTION__))
;
3506 Select->setTrueValue(ST.Get(Map[TrueValue]));
3507 auto *FalseValue = CurrentSelect->getFalseValue();
3508 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3508, __PRETTY_FUNCTION__))
;
3509 Select->setFalseValue(ST.Get(Map[FalseValue]));
3510 } else {
3511 // Must be a Phi node then.
3512 auto *PHI = cast<PHINode>(V);
3513 // Fill the Phi node with values from predecessors.
3514 for (auto B : predecessors(PHI->getParent())) {
3515 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
3516 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3516, __PRETTY_FUNCTION__))
;
3517 PHI->addIncoming(ST.Get(Map[PV]), B);
3518 }
3519 }
3520 Map[Current] = ST.Simplify(V);
3521 }
3522 }
3523
3524 /// Starting from original value recursively iterates over def-use chain up to
3525 /// known ending values represented in a map. For each traversed phi/select
3526 /// inserts a placeholder Phi or Select.
3527 /// Reports all new created Phi/Select nodes by adding them to set.
3528 /// Also reports and order in what values have been traversed.
3529 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3530 SmallVectorImpl<Value *> &TraverseOrder,
3531 SimplificationTracker &ST) {
3532 SmallVector<Value *, 32> Worklist;
3533 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3534, __PRETTY_FUNCTION__))
3534 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3534, __PRETTY_FUNCTION__))
;
3535 auto *Dummy = UndefValue::get(CommonType);
3536 Worklist.push_back(Original);
3537 while (!Worklist.empty()) {
3538 Value *Current = Worklist.pop_back_val();
3539 // if it is already visited or it is an ending value then skip it.
3540 if (Map.find(Current) != Map.end())
3541 continue;
3542 TraverseOrder.push_back(Current);
3543
3544 // CurrentValue must be a Phi node or select. All others must be covered
3545 // by anchors.
3546 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3547 // Is it OK to get metadata from OrigSelect?!
3548 // Create a Select placeholder with dummy value.
3549 SelectInst *Select = SelectInst::Create(
3550 CurrentSelect->getCondition(), Dummy, Dummy,
3551 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3552 Map[Current] = Select;
3553 ST.insertNewSelect(Select);
3554 // We are interested in True and False values.
3555 Worklist.push_back(CurrentSelect->getTrueValue());
3556 Worklist.push_back(CurrentSelect->getFalseValue());
3557 } else {
3558 // It must be a Phi node then.
3559 PHINode *CurrentPhi = cast<PHINode>(Current);
3560 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3561 PHINode *PHI =
3562 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3563 Map[Current] = PHI;
3564 ST.insertNewPhi(PHI);
3565 for (Value *P : CurrentPhi->incoming_values())
3566 Worklist.push_back(P);
3567 }
3568 }
3569 }
3570
3571 bool addrModeCombiningAllowed() {
3572 if (DisableComplexAddrModes)
3573 return false;
3574 switch (DifferentField) {
3575 default:
3576 return false;
3577 case ExtAddrMode::BaseRegField:
3578 return AddrSinkCombineBaseReg;
3579 case ExtAddrMode::BaseGVField:
3580 return AddrSinkCombineBaseGV;
3581 case ExtAddrMode::BaseOffsField:
3582 return AddrSinkCombineBaseOffs;
3583 case ExtAddrMode::ScaledRegField:
3584 return AddrSinkCombineScaledReg;
3585 }
3586 }
3587};
3588} // end anonymous namespace
3589
3590/// Try adding ScaleReg*Scale to the current addressing mode.
3591/// Return true and update AddrMode if this addr mode is legal for the target,
3592/// false if not.
3593bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3594 unsigned Depth) {
3595 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3596 // mode. Just process that directly.
3597 if (Scale == 1)
3598 return matchAddr(ScaleReg, Depth);
3599
3600 // If the scale is 0, it takes nothing to add this.
3601 if (Scale == 0)
3602 return true;
3603
3604 // If we already have a scale of this value, we can add to it, otherwise, we
3605 // need an available scale field.
3606 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3607 return false;
3608
3609 ExtAddrMode TestAddrMode = AddrMode;
3610
3611 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3612 // [A+B + A*7] -> [B+A*8].
3613 TestAddrMode.Scale += Scale;
3614 TestAddrMode.ScaledReg = ScaleReg;
3615
3616 // If the new address isn't legal, bail out.
3617 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3618 return false;
3619
3620 // It was legal, so commit it.
3621 AddrMode = TestAddrMode;
3622
3623 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3624 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3625 // X*Scale + C*Scale to addr mode.
3626 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3627 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3628 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3629 TestAddrMode.InBounds = false;
3630 TestAddrMode.ScaledReg = AddLHS;
3631 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3632
3633 // If this addressing mode is legal, commit it and remember that we folded
3634 // this instruction.
3635 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3636 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3637 AddrMode = TestAddrMode;
3638 return true;
3639 }
3640 }
3641
3642 // Otherwise, not (x+c)*scale, just return what we have.
3643 return true;
3644}
3645
3646/// This is a little filter, which returns true if an addressing computation
3647/// involving I might be folded into a load/store accessing it.
3648/// This doesn't need to be perfect, but needs to accept at least
3649/// the set of instructions that MatchOperationAddr can.
3650static bool MightBeFoldableInst(Instruction *I) {
3651 switch (I->getOpcode()) {
3652 case Instruction::BitCast:
3653 case Instruction::AddrSpaceCast:
3654 // Don't touch identity bitcasts.
3655 if (I->getType() == I->getOperand(0)->getType())
3656 return false;
3657 return I->getType()->isIntOrPtrTy();
3658 case Instruction::PtrToInt:
3659 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3660 return true;
3661 case Instruction::IntToPtr:
3662 // We know the input is intptr_t, so this is foldable.
3663 return true;
3664 case Instruction::Add:
3665 return true;
3666 case Instruction::Mul:
3667 case Instruction::Shl:
3668 // Can only handle X*C and X << C.
3669 return isa<ConstantInt>(I->getOperand(1));
3670 case Instruction::GetElementPtr:
3671 return true;
3672 default:
3673 return false;
3674 }
3675}
3676
3677/// Check whether or not \p Val is a legal instruction for \p TLI.
3678/// \note \p Val is assumed to be the product of some type promotion.
3679/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3680/// to be legal, as the non-promoted value would have had the same state.
3681static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3682 const DataLayout &DL, Value *Val) {
3683 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3684 if (!PromotedInst)
3685 return false;
3686 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3687 // If the ISDOpcode is undefined, it was undefined before the promotion.
3688 if (!ISDOpcode)
3689 return true;
3690 // Otherwise, check if the promoted instruction is legal or not.
3691 return TLI.isOperationLegalOrCustom(
3692 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3693}
3694
3695namespace {
3696
3697/// Hepler class to perform type promotion.
3698class TypePromotionHelper {
3699 /// Utility function to add a promoted instruction \p ExtOpnd to
3700 /// \p PromotedInsts and record the type of extension we have seen.
3701 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3702 Instruction *ExtOpnd,
3703 bool IsSExt) {
3704 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3705 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3706 if (It != PromotedInsts.end()) {
3707 // If the new extension is same as original, the information in
3708 // PromotedInsts[ExtOpnd] is still correct.
3709 if (It->second.getInt() == ExtTy)
3710 return;
3711
3712 // Now the new extension is different from old extension, we make
3713 // the type information invalid by setting extension type to
3714 // BothExtension.
3715 ExtTy = BothExtension;
3716 }
3717 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3718 }
3719
3720 /// Utility function to query the original type of instruction \p Opnd
3721 /// with a matched extension type. If the extension doesn't match, we
3722 /// cannot use the information we had on the original type.
3723 /// BothExtension doesn't match any extension type.
3724 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3725 Instruction *Opnd,
3726 bool IsSExt) {
3727 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3728 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3729 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3730 return It->second.getPointer();
3731 return nullptr;
3732 }
3733
3734 /// Utility function to check whether or not a sign or zero extension
3735 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3736 /// either using the operands of \p Inst or promoting \p Inst.
3737 /// The type of the extension is defined by \p IsSExt.
3738 /// In other words, check if:
3739 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3740 /// #1 Promotion applies:
3741 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3742 /// #2 Operand reuses:
3743 /// ext opnd1 to ConsideredExtType.
3744 /// \p PromotedInsts maps the instructions to their type before promotion.
3745 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3746 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3747
3748 /// Utility function to determine if \p OpIdx should be promoted when
3749 /// promoting \p Inst.
3750 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3751 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3752 }
3753
3754 /// Utility function to promote the operand of \p Ext when this
3755 /// operand is a promotable trunc or sext or zext.
3756 /// \p PromotedInsts maps the instructions to their type before promotion.
3757 /// \p CreatedInstsCost[out] contains the cost of all instructions
3758 /// created to promote the operand of Ext.
3759 /// Newly added extensions are inserted in \p Exts.
3760 /// Newly added truncates are inserted in \p Truncs.
3761 /// Should never be called directly.
3762 /// \return The promoted value which is used instead of Ext.
3763 static Value *promoteOperandForTruncAndAnyExt(
3764 Instruction *Ext, TypePromotionTransaction &TPT,
3765 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3766 SmallVectorImpl<Instruction *> *Exts,
3767 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3768
3769 /// Utility function to promote the operand of \p Ext when this
3770 /// operand is promotable and is not a supported trunc or sext.
3771 /// \p PromotedInsts maps the instructions to their type before promotion.
3772 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3773 /// created to promote the operand of Ext.
3774 /// Newly added extensions are inserted in \p Exts.
3775 /// Newly added truncates are inserted in \p Truncs.
3776 /// Should never be called directly.
3777 /// \return The promoted value which is used instead of Ext.
3778 static Value *promoteOperandForOther(Instruction *Ext,
3779 TypePromotionTransaction &TPT,
3780 InstrToOrigTy &PromotedInsts,
3781 unsigned &CreatedInstsCost,
3782 SmallVectorImpl<Instruction *> *Exts,
3783 SmallVectorImpl<Instruction *> *Truncs,
3784 const TargetLowering &TLI, bool IsSExt);
3785
3786 /// \see promoteOperandForOther.
3787 static Value *signExtendOperandForOther(
3788 Instruction *Ext, TypePromotionTransaction &TPT,
3789 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3790 SmallVectorImpl<Instruction *> *Exts,
3791 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3792 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3793 Exts, Truncs, TLI, true);
3794 }
3795
3796 /// \see promoteOperandForOther.
3797 static Value *zeroExtendOperandForOther(
3798 Instruction *Ext, TypePromotionTransaction &TPT,
3799 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3800 SmallVectorImpl<Instruction *> *Exts,
3801 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3802 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3803 Exts, Truncs, TLI, false);
3804 }
3805
3806public:
3807 /// Type for the utility function that promotes the operand of Ext.
3808 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3809 InstrToOrigTy &PromotedInsts,
3810 unsigned &CreatedInstsCost,
3811 SmallVectorImpl<Instruction *> *Exts,
3812 SmallVectorImpl<Instruction *> *Truncs,
3813 const TargetLowering &TLI);
3814
3815 /// Given a sign/zero extend instruction \p Ext, return the appropriate
3816 /// action to promote the operand of \p Ext instead of using Ext.
3817 /// \return NULL if no promotable action is possible with the current
3818 /// sign extension.
3819 /// \p InsertedInsts keeps track of all the instructions inserted by the
3820 /// other CodeGenPrepare optimizations. This information is important
3821 /// because we do not want to promote these instructions as CodeGenPrepare
3822 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3823 /// \p PromotedInsts maps the instructions to their type before promotion.
3824 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3825 const TargetLowering &TLI,
3826 const InstrToOrigTy &PromotedInsts);
3827};
3828
3829} // end anonymous namespace
3830
3831bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3832 Type *ConsideredExtType,
3833 const InstrToOrigTy &PromotedInsts,
3834 bool IsSExt) {
3835 // The promotion helper does not know how to deal with vector types yet.
3836 // To be able to fix that, we would need to fix the places where we
3837 // statically extend, e.g., constants and such.
3838 if (Inst->getType()->isVectorTy())
3839 return false;
3840
3841 // We can always get through zext.
3842 if (isa<ZExtInst>(Inst))
3843 return true;
3844
3845 // sext(sext) is ok too.
3846 if (IsSExt && isa<SExtInst>(Inst))
3847 return true;
3848
3849 // We can get through binary operator, if it is legal. In other words, the
3850 // binary operator must have a nuw or nsw flag.
3851 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3852 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3853 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3854 (IsSExt && BinOp->hasNoSignedWrap())))
3855 return true;
3856
3857 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3858 if ((Inst->getOpcode() == Instruction::And ||
3859 Inst->getOpcode() == Instruction::Or))
3860 return true;
3861
3862 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3863 if (Inst->getOpcode() == Instruction::Xor) {
3864 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3865 // Make sure it is not a NOT.
3866 if (Cst && !Cst->getValue().isAllOnesValue())
3867 return true;
3868 }
3869
3870 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3871 // It may change a poisoned value into a regular value, like
3872 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3873 // poisoned value regular value
3874 // It should be OK since undef covers valid value.
3875 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3876 return true;
3877
3878 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3879 // It may change a poisoned value into a regular value, like
3880 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3881 // poisoned value regular value
3882 // It should be OK since undef covers valid value.
3883 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3884 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
3885 if (ExtInst->hasOneUse()) {
3886 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
3887 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3888 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3889 if (Cst &&
3890 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3891 return true;
3892 }
3893 }
3894 }
3895
3896 // Check if we can do the following simplification.
3897 // ext(trunc(opnd)) --> ext(opnd)
3898 if (!isa<TruncInst>(Inst))
3899 return false;
3900
3901 Value *OpndVal = Inst->getOperand(0);
3902 // Check if we can use this operand in the extension.
3903 // If the type is larger than the result type of the extension, we cannot.
3904 if (!OpndVal->getType()->isIntegerTy() ||
3905 OpndVal->getType()->getIntegerBitWidth() >
3906 ConsideredExtType->getIntegerBitWidth())
3907 return false;
3908
3909 // If the operand of the truncate is not an instruction, we will not have
3910 // any information on the dropped bits.
3911 // (Actually we could for constant but it is not worth the extra logic).
3912 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3913 if (!Opnd)
3914 return false;
3915
3916 // Check if the source of the type is narrow enough.
3917 // I.e., check that trunc just drops extended bits of the same kind of
3918 // the extension.
3919 // #1 get the type of the operand and check the kind of the extended bits.
3920 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3921 if (OpndType)
3922 ;
3923 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3924 OpndType = Opnd->getOperand(0)->getType();
3925 else
3926 return false;
3927
3928 // #2 check that the truncate just drops extended bits.
3929 return Inst->getType()->getIntegerBitWidth() >=
3930 OpndType->getIntegerBitWidth();
3931}
3932
3933TypePromotionHelper::Action TypePromotionHelper::getAction(
3934 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3935 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3936 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3937, __PRETTY_FUNCTION__))
3937 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3937, __PRETTY_FUNCTION__))
;
3938 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3939 Type *ExtTy = Ext->getType();
3940 bool IsSExt = isa<SExtInst>(Ext);
3941 // If the operand of the extension is not an instruction, we cannot
3942 // get through.
3943 // If it, check we can get through.
3944 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3945 return nullptr;
3946
3947 // Do not promote if the operand has been added by codegenprepare.
3948 // Otherwise, it means we are undoing an optimization that is likely to be
3949 // redone, thus causing potential infinite loop.
3950 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3951 return nullptr;
3952
3953 // SExt or Trunc instructions.
3954 // Return the related handler.
3955 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3956 isa<ZExtInst>(ExtOpnd))
3957 return promoteOperandForTruncAndAnyExt;
3958
3959 // Regular instruction.
3960 // Abort early if we will have to insert non-free instructions.
3961 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3962 return nullptr;
3963 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3964}
3965
3966Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3967 Instruction *SExt, TypePromotionTransaction &TPT,
3968 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3969 SmallVectorImpl<Instruction *> *Exts,
3970 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3971 // By construction, the operand of SExt is an instruction. Otherwise we cannot
3972 // get through it and this method should not be called.
3973 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3974 Value *ExtVal = SExt;
3975 bool HasMergedNonFreeExt = false;
3976 if (isa<ZExtInst>(SExtOpnd)) {
3977 // Replace s|zext(zext(opnd))
3978 // => zext(opnd).
3979 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3980 Value *ZExt =
3981 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3982 TPT.replaceAllUsesWith(SExt, ZExt);
3983 TPT.eraseInstruction(SExt);
3984 ExtVal = ZExt;
3985 } else {
3986 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3987 // => z|sext(opnd).
3988 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3989 }
3990 CreatedInstsCost = 0;
3991
3992 // Remove dead code.
3993 if (SExtOpnd->use_empty())
3994 TPT.eraseInstruction(SExtOpnd);
3995
3996 // Check if the extension is still needed.
3997 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3998 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3999 if (ExtInst) {
4000 if (Exts)
4001 Exts->push_back(ExtInst);
4002 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4003 }
4004 return ExtVal;
4005 }
4006
4007 // At this point we have: ext ty opnd to ty.
4008 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4009 Value *NextVal = ExtInst->getOperand(0);
4010 TPT.eraseInstruction(ExtInst, NextVal);
4011 return NextVal;
4012}
4013
4014Value *TypePromotionHelper::promoteOperandForOther(
4015 Instruction *Ext, TypePromotionTransaction &TPT,
4016 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4017 SmallVectorImpl<Instruction *> *Exts,
4018 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
4019 bool IsSExt) {
4020 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4021 // get through it and this method should not be called.
4022 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4023 CreatedInstsCost = 0;
4024 if (!ExtOpnd->hasOneUse()) {
4025 // ExtOpnd will be promoted.
4026 // All its uses, but Ext, will need to use a truncated value of the
4027 // promoted version.
4028 // Create the truncate now.
4029 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4030 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4031 // Insert it just after the definition.
4032 ITrunc->moveAfter(ExtOpnd);
4033 if (Truncs)
4034 Truncs->push_back(ITrunc);
4035 }
4036
4037 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4038 // Restore the operand of Ext (which has been replaced by the previous call
4039 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4040 TPT.setOperand(Ext, 0, ExtOpnd);
4041 }
4042
4043 // Get through the Instruction:
4044 // 1. Update its type.
4045 // 2. Replace the uses of Ext by Inst.
4046 // 3. Extend each operand that needs to be extended.
4047
4048 // Remember the original type of the instruction before promotion.
4049 // This is useful to know that the high bits are sign extended bits.
4050 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4051 // Step #1.
4052 TPT.mutateType(ExtOpnd, Ext->getType());
4053 // Step #2.
4054 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4055 // Step #3.
4056 Instruction *ExtForOpnd = Ext;
4057
4058 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
4059 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4060 ++OpIdx) {
4061 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
4062 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4063 !shouldExtOperand(ExtOpnd, OpIdx)) {
4064 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
4065 continue;
4066 }
4067 // Check if we can statically extend the operand.
4068 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4069 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4070 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4071 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4072 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4073 : Cst->getValue().zext(BitWidth);
4074 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4075 continue;
4076 }
4077 // UndefValue are typed, so we have to statically sign extend them.
4078 if (isa<UndefValue>(Opnd)) {
4079 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4080 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4081 continue;
4082 }
4083
4084 // Otherwise we have to explicitly sign extend the operand.
4085 // Check if Ext was reused to extend an operand.
4086 if (!ExtForOpnd) {
4087 // If yes, create a new one.
4088 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
4089 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
4090 : TPT.createZExt(Ext, Opnd, Ext->getType());
4091 if (!isa<Instruction>(ValForExtOpnd)) {
4092 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4093 continue;
4094 }
4095 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4096 }
4097 if (Exts)
4098 Exts->push_back(ExtForOpnd);
4099 TPT.setOperand(ExtForOpnd, 0, Opnd);
4100
4101 // Move the sign extension before the insertion point.
4102 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4103 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4104 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
4105 // If more sext are required, new instructions will have to be created.
4106 ExtForOpnd = nullptr;
4107 }
4108 if (ExtForOpnd == Ext) {
4109 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
4110 TPT.eraseInstruction(Ext);
4111 }
4112 return ExtOpnd;
4113}
4114
4115/// Check whether or not promoting an instruction to a wider type is profitable.
4116/// \p NewCost gives the cost of extension instructions created by the
4117/// promotion.
4118/// \p OldCost gives the cost of extension instructions before the promotion
4119/// plus the number of instructions that have been
4120/// matched in the addressing mode the promotion.
4121/// \p PromotedOperand is the value that has been promoted.
4122/// \return True if the promotion is profitable, false otherwise.
4123bool AddressingModeMatcher::isPromotionProfitable(
4124 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4125 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4126 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4127 // The cost of the new extensions is greater than the cost of the
4128 // old extension plus what we folded.
4129 // This is not profitable.
4130 if (NewCost > OldCost)
4131 return false;
4132 if (NewCost < OldCost)
4133 return true;
4134 // The promotion is neutral but it may help folding the sign extension in
4135 // loads for instance.
4136 // Check that we did not create an illegal instruction.
4137 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4138}
4139
4140/// Given an instruction or constant expr, see if we can fold the operation
4141/// into the addressing mode. If so, update the addressing mode and return
4142/// true, otherwise return false without modifying AddrMode.
4143/// If \p MovedAway is not NULL, it contains the information of whether or
4144/// not AddrInst has to be folded into the addressing mode on success.
4145/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4146/// because it has been moved away.
4147/// Thus AddrInst must not be added in the matched instructions.
4148/// This state can happen when AddrInst is a sext, since it may be moved away.
4149/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4150/// not be referenced anymore.
4151bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4152 unsigned Depth,
4153 bool *MovedAway) {
4154 // Avoid exponential behavior on extremely deep expression trees.
4155 if (Depth >= 5) return false;
4156
4157 // By default, all matched instructions stay in place.
4158 if (MovedAway)
4159 *MovedAway = false;
4160
4161 switch (Opcode) {
4162 case Instruction::PtrToInt:
4163 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4164 return matchAddr(AddrInst->getOperand(0), Depth);
4165 case Instruction::IntToPtr: {
4166 auto AS = AddrInst->getType()->getPointerAddressSpace();
4167 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4168 // This inttoptr is a no-op if the integer type is pointer sized.
4169 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4170 return matchAddr(AddrInst->getOperand(0), Depth);
4171 return false;
4172 }
4173 case Instruction::BitCast:
4174 // BitCast is always a noop, and we can handle it as long as it is
4175 // int->int or pointer->pointer (we don't want int<->fp or something).
4176 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4177 // Don't touch identity bitcasts. These were probably put here by LSR,
4178 // and we don't want to mess around with them. Assume it knows what it
4179 // is doing.
4180 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4181 return matchAddr(AddrInst->getOperand(0), Depth);
4182 return false;
4183 case Instruction::AddrSpaceCast: {
4184 unsigned SrcAS
4185 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4186 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4187 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
4188 return matchAddr(AddrInst->getOperand(0), Depth);
4189 return false;
4190 }
4191 case Instruction::Add: {
4192 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4193 ExtAddrMode BackupAddrMode = AddrMode;
4194 unsigned OldSize = AddrModeInsts.size();
4195 // Start a transaction at this point.
4196 // The LHS may match but not the RHS.
4197 // Therefore, we need a higher level restoration point to undo partially
4198 // matched operation.
4199 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4200 TPT.getRestorationPoint();
4201
4202 AddrMode.InBounds = false;
4203 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4204 matchAddr(AddrInst->getOperand(0), Depth+1))
4205 return true;
4206
4207 // Restore the old addr mode info.
4208 AddrMode = BackupAddrMode;
4209 AddrModeInsts.resize(OldSize);
4210 TPT.rollback(LastKnownGood);
4211
4212 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4213 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4214 matchAddr(AddrInst->getOperand(1), Depth+1))
4215 return true;
4216
4217 // Otherwise we definitely can't merge the ADD in.
4218 AddrMode = BackupAddrMode;
4219 AddrModeInsts.resize(OldSize);
4220 TPT.rollback(LastKnownGood);
4221 break;
4222 }
4223 //case Instruction::Or:
4224 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4225 //break;
4226 case Instruction::Mul:
4227 case Instruction::Shl: {
4228 // Can only handle X*C and X << C.
4229 AddrMode.InBounds = false;
4230 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4231 if (!RHS || RHS->getBitWidth() > 64)
4232 return false;
4233 int64_t Scale = RHS->getSExtValue();
4234 if (Opcode == Instruction::Shl)
4235 Scale = 1LL << Scale;
4236
4237 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4238 }
4239 case Instruction::GetElementPtr: {
4240 // Scan the GEP. We check it if it contains constant offsets and at most
4241 // one variable offset.
4242 int VariableOperand = -1;
4243 unsigned VariableScale = 0;
4244
4245 int64_t ConstantOffset = 0;
4246 gep_type_iterator GTI = gep_type_begin(AddrInst);
4247 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4248 if (StructType *STy = GTI.getStructTypeOrNull()) {
4249 const StructLayout *SL = DL.getStructLayout(STy);
4250 unsigned Idx =
4251 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4252 ConstantOffset += SL->getElementOffset(Idx);
4253 } else {
4254 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
4255 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4256 const APInt &CVal = CI->getValue();
4257 if (CVal.getMinSignedBits() <= 64) {
4258 ConstantOffset += CVal.getSExtValue() * TypeSize;
4259 continue;
4260 }
4261 }
4262 if (TypeSize) { // Scales of zero don't do anything.
4263 // We only allow one variable index at the moment.
4264 if (VariableOperand != -1)
4265 return false;
4266
4267 // Remember the variable index.
4268 VariableOperand = i;
4269 VariableScale = TypeSize;
4270 }
4271 }
4272 }
4273
4274 // A common case is for the GEP to only do a constant offset. In this case,
4275 // just add it to the disp field and check validity.
4276 if (VariableOperand == -1) {
4277 AddrMode.BaseOffs += ConstantOffset;
4278 if (ConstantOffset == 0 ||
4279 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4280 // Check to see if we can fold the base pointer in too.
4281 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4282 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4283 AddrMode.InBounds = false;
4284 return true;
4285 }
4286 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4287 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4288 ConstantOffset > 0) {
4289 // Record GEPs with non-zero offsets as candidates for splitting in the
4290 // event that the offset cannot fit into the r+i addressing mode.
4291 // Simple and common case that only one GEP is used in calculating the
4292 // address for the memory access.
4293 Value *Base = AddrInst->getOperand(0);
4294 auto *BaseI = dyn_cast<Instruction>(Base);
4295 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4296 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4297 (BaseI && !isa<CastInst>(BaseI) &&
4298 !isa<GetElementPtrInst>(BaseI))) {
4299 // Make sure the parent block allows inserting non-PHI instructions
4300 // before the terminator.
4301 BasicBlock *Parent =
4302 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4303 if (!Parent->getTerminator()->isEHPad())
4304 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4305 }
4306 }
4307 AddrMode.BaseOffs -= ConstantOffset;
4308 return false;
4309 }
4310
4311 // Save the valid addressing mode in case we can't match.
4312 ExtAddrMode BackupAddrMode = AddrMode;
4313 unsigned OldSize = AddrModeInsts.size();
4314
4315 // See if the scale and offset amount is valid for this target.
4316 AddrMode.BaseOffs += ConstantOffset;
4317 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4318 AddrMode.InBounds = false;
4319
4320 // Match the base operand of the GEP.
4321 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4322 // If it couldn't be matched, just stuff the value in a register.
4323 if (AddrMode.HasBaseReg) {
4324 AddrMode = BackupAddrMode;
4325 AddrModeInsts.resize(OldSize);
4326 return false;
4327 }
4328 AddrMode.HasBaseReg = true;
4329 AddrMode.BaseReg = AddrInst->getOperand(0);
4330 }
4331
4332 // Match the remaining variable portion of the GEP.
4333 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4334 Depth)) {
4335 // If it couldn't be matched, try stuffing the base into a register
4336 // instead of matching it, and retrying the match of the scale.
4337 AddrMode = BackupAddrMode;
4338 AddrModeInsts.resize(OldSize);
4339 if (AddrMode.HasBaseReg)
4340 return false;
4341 AddrMode.HasBaseReg = true;
4342 AddrMode.BaseReg = AddrInst->getOperand(0);
4343 AddrMode.BaseOffs += ConstantOffset;
4344 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4345 VariableScale, Depth)) {
4346 // If even that didn't work, bail.
4347 AddrMode = BackupAddrMode;
4348 AddrModeInsts.resize(OldSize);
4349 return false;
4350 }
4351 }
4352
4353 return true;
4354 }
4355 case Instruction::SExt:
4356 case Instruction::ZExt: {
4357 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4358 if (!Ext)
4359 return false;
4360
4361 // Try to move this ext out of the way of the addressing mode.
4362 // Ask for a method for doing so.
4363 TypePromotionHelper::Action TPH =
4364 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4365 if (!TPH)
4366 return false;
4367
4368 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4369 TPT.getRestorationPoint();
4370 unsigned CreatedInstsCost = 0;
4371 unsigned ExtCost = !TLI.isExtFree(Ext);
4372 Value *PromotedOperand =
4373 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4374 // SExt has been moved away.
4375 // Thus either it will be rematched later in the recursive calls or it is
4376 // gone. Anyway, we must not fold it into the addressing mode at this point.
4377 // E.g.,
4378 // op = add opnd, 1
4379 // idx = ext op
4380 // addr = gep base, idx
4381 // is now:
4382 // promotedOpnd = ext opnd <- no match here
4383 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4384 // addr = gep base, op <- match
4385 if (MovedAway)
4386 *MovedAway = true;
4387
4388 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4389, __PRETTY_FUNCTION__))
4389 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4389, __PRETTY_FUNCTION__))
;
4390
4391 ExtAddrMode BackupAddrMode = AddrMode;
4392 unsigned OldSize = AddrModeInsts.size();
4393
4394 if (!matchAddr(PromotedOperand, Depth) ||
4395 // The total of the new cost is equal to the cost of the created
4396 // instructions.
4397 // The total of the old cost is equal to the cost of the extension plus
4398 // what we have saved in the addressing mode.
4399 !isPromotionProfitable(CreatedInstsCost,
4400 ExtCost + (AddrModeInsts.size() - OldSize),
4401 PromotedOperand)) {
4402 AddrMode = BackupAddrMode;
4403 AddrModeInsts.resize(OldSize);
4404 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4405 TPT.rollback(LastKnownGood);
4406 return false;
4407 }
4408 return true;
4409 }
4410 }
4411 return false;
4412}
4413
4414/// If we can, try to add the value of 'Addr' into the current addressing mode.
4415/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4416/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4417/// for the target.
4418///
4419bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4420 // Start a transaction at this point that we will rollback if the matching
4421 // fails.
4422 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4423 TPT.getRestorationPoint();
4424 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4425 // Fold in immediates if legal for the target.
4426 AddrMode.BaseOffs += CI->getSExtValue();
4427 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4428 return true;
4429 AddrMode.BaseOffs -= CI->getSExtValue();
4430 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4431 // If this is a global variable, try to fold it into the addressing mode.
4432 if (!AddrMode.BaseGV) {
4433 AddrMode.BaseGV = GV;
4434 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4435 return true;
4436 AddrMode.BaseGV = nullptr;
4437 }
4438 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4439 ExtAddrMode BackupAddrMode = AddrMode;
4440 unsigned OldSize = AddrModeInsts.size();
4441
4442 // Check to see if it is possible to fold this operation.
4443 bool MovedAway = false;
4444 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4445 // This instruction may have been moved away. If so, there is nothing
4446 // to check here.
4447 if (MovedAway)
4448 return true;
4449 // Okay, it's possible to fold this. Check to see if it is actually
4450 // *profitable* to do so. We use a simple cost model to avoid increasing
4451 // register pressure too much.
4452 if (I->hasOneUse() ||
4453 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4454 AddrModeInsts.push_back(I);
4455 return true;
4456 }
4457
4458 // It isn't profitable to do this, roll back.
4459 //cerr << "NOT FOLDING: " << *I;
4460 AddrMode = BackupAddrMode;
4461 AddrModeInsts.resize(OldSize);
4462 TPT.rollback(LastKnownGood);
4463 }
4464 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4465 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4466 return true;
4467 TPT.rollback(LastKnownGood);
4468 } else if (isa<ConstantPointerNull>(Addr)) {
4469 // Null pointer gets folded without affecting the addressing mode.
4470 return true;
4471 }
4472
4473 // Worse case, the target should support [reg] addressing modes. :)
4474 if (!AddrMode.HasBaseReg) {
4475 AddrMode.HasBaseReg = true;
4476 AddrMode.BaseReg = Addr;
4477 // Still check for legality in case the target supports [imm] but not [i+r].
4478 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4479 return true;
4480 AddrMode.HasBaseReg = false;
4481 AddrMode.BaseReg = nullptr;
4482 }
4483
4484 // If the base register is already taken, see if we can do [r+r].
4485 if (AddrMode.Scale == 0) {
4486 AddrMode.Scale = 1;
4487 AddrMode.ScaledReg = Addr;
4488 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4489 return true;
4490 AddrMode.Scale = 0;
4491 AddrMode.ScaledReg = nullptr;
4492 }
4493 // Couldn't match.
4494 TPT.rollback(LastKnownGood);
4495 return false;
4496}
4497
4498/// Check to see if all uses of OpVal by the specified inline asm call are due
4499/// to memory operands. If so, return true, otherwise return false.
4500static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4501 const TargetLowering &TLI,
4502 const TargetRegisterInfo &TRI) {
4503 const Function *F = CI->getFunction();
4504 TargetLowering::AsmOperandInfoVector TargetConstraints =
4505 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4506 ImmutableCallSite(CI));
4507
4508 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4509 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4510
4511 // Compute the constraint code and ConstraintType to use.
4512 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4513
4514 // If this asm operand is our Value*, and if it isn't an indirect memory
4515 // operand, we can't fold it!
4516 if (OpInfo.CallOperandVal == OpVal &&
4517 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4518 !OpInfo.isIndirect))
4519 return false;
4520 }
4521
4522 return true;
4523}
4524
4525// Max number of memory uses to look at before aborting the search to conserve
4526// compile time.
4527static constexpr int MaxMemoryUsesToScan = 20;
4528
4529/// Recursively walk all the uses of I until we find a memory use.
4530/// If we find an obviously non-foldable instruction, return true.
4531/// Add the ultimately found memory instructions to MemoryUses.
4532static bool FindAllMemoryUses(
4533 Instruction *I,
4534 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4535 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4536 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
4537 BlockFrequencyInfo *BFI, int SeenInsts = 0) {
4538 // If we already considered this instruction, we're done.
4539 if (!ConsideredInsts.insert(I).second)
4540 return false;
4541
4542 // If this is an obviously unfoldable instruction, bail out.
4543 if (!MightBeFoldableInst(I))
4544 return true;
4545
4546 // Loop over all the uses, recursively processing them.
4547 for (Use &U : I->uses()) {
4548 // Conservatively return true if we're seeing a large number or a deep chain
4549 // of users. This avoids excessive compilation times in pathological cases.
4550 if (SeenInsts++ >= MaxMemoryUsesToScan)
4551 return true;
4552
4553 Instruction *UserI = cast<Instruction>(U.getUser());
4554 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4555 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4556 continue;
4557 }
4558
4559 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4560 unsigned opNo = U.getOperandNo();
4561 if (opNo != StoreInst::getPointerOperandIndex())
4562 return true; // Storing addr, not into addr.
4563 MemoryUses.push_back(std::make_pair(SI, opNo));
4564 continue;
4565 }
4566
4567 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4568 unsigned opNo = U.getOperandNo();
4569 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4570 return true; // Storing addr, not into addr.
4571 MemoryUses.push_back(std::make_pair(RMW, opNo));
4572 continue;
4573 }
4574
4575 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4576 unsigned opNo = U.getOperandNo();
4577 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4578 return true; // Storing addr, not into addr.
4579 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4580 continue;
4581 }
4582
4583 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4584 // If this is a cold call, we can sink the addressing calculation into
4585 // the cold path. See optimizeCallInst
4586 bool OptForSize = OptSize ||
4587 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
4588 if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
4589 continue;
4590
4591 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4592 if (!IA) return true;
4593
4594 // If this is a memory operand, we're cool, otherwise bail out.
4595 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4596 return true;
4597 continue;
4598 }
4599
4600 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
4601 PSI, BFI, SeenInsts))
4602 return true;
4603 }
4604
4605 return false;
4606}
4607
4608/// Return true if Val is already known to be live at the use site that we're
4609/// folding it into. If so, there is no cost to include it in the addressing
4610/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4611/// instruction already.
4612bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4613 Value *KnownLive2) {
4614 // If Val is either of the known-live values, we know it is live!
4615 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4616 return true;
4617
4618 // All values other than instructions and arguments (e.g. constants) are live.
4619 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4620
4621 // If Val is a constant sized alloca in the entry block, it is live, this is
4622 // true because it is just a reference to the stack/frame pointer, which is
4623 // live for the whole function.
4624 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4625 if (AI->isStaticAlloca())
4626 return true;
4627
4628 // Check to see if this value is already used in the memory instruction's
4629 // block. If so, it's already live into the block at the very least, so we
4630 // can reasonably fold it.
4631 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4632}
4633
4634/// It is possible for the addressing mode of the machine to fold the specified
4635/// instruction into a load or store that ultimately uses it.
4636/// However, the specified instruction has multiple uses.
4637/// Given this, it may actually increase register pressure to fold it
4638/// into the load. For example, consider this code:
4639///
4640/// X = ...
4641/// Y = X+1
4642/// use(Y) -> nonload/store
4643/// Z = Y+1
4644/// load Z
4645///
4646/// In this case, Y has multiple uses, and can be folded into the load of Z
4647/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4648/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4649/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4650/// number of computations either.
4651///
4652/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4653/// X was live across 'load Z' for other reasons, we actually *would* want to
4654/// fold the addressing mode in the Z case. This would make Y die earlier.
4655bool AddressingModeMatcher::
4656isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4657 ExtAddrMode &AMAfter) {
4658 if (IgnoreProfitability) return true;
4659
4660 // AMBefore is the addressing mode before this instruction was folded into it,
4661 // and AMAfter is the addressing mode after the instruction was folded. Get
4662 // the set of registers referenced by AMAfter and subtract out those
4663 // referenced by AMBefore: this is the set of values which folding in this
4664 // address extends the lifetime of.
4665 //
4666 // Note that there are only two potential values being referenced here,
4667 // BaseReg and ScaleReg (global addresses are always available, as are any
4668 // folded immediates).
4669 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4670
4671 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4672 // lifetime wasn't extended by adding this instruction.
4673 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4674 BaseReg = nullptr;
4675 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4676 ScaledReg = nullptr;
4677
4678 // If folding this instruction (and it's subexprs) didn't extend any live
4679 // ranges, we're ok with it.
4680 if (!BaseReg && !ScaledReg)
4681 return true;
4682
4683 // If all uses of this instruction can have the address mode sunk into them,
4684 // we can remove the addressing mode and effectively trade one live register
4685 // for another (at worst.) In this context, folding an addressing mode into
4686 // the use is just a particularly nice way of sinking it.
4687 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4688 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4689 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
4690 PSI, BFI))
4691 return false; // Has a non-memory, non-foldable use!
4692
4693 // Now that we know that all uses of this instruction are part of a chain of
4694 // computation involving only operations that could theoretically be folded
4695 // into a memory use, loop over each of these memory operation uses and see
4696 // if they could *actually* fold the instruction. The assumption is that
4697 // addressing modes are cheap and that duplicating the computation involved
4698 // many times is worthwhile, even on a fastpath. For sinking candidates
4699 // (i.e. cold call sites), this serves as a way to prevent excessive code
4700 // growth since most architectures have some reasonable small and fast way to
4701 // compute an effective address. (i.e LEA on x86)
4702 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4703 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4704 Instruction *User = MemoryUses[i].first;
4705 unsigned OpNo = MemoryUses[i].second;
4706
4707 // Get the access type of this use. If the use isn't a pointer, we don't
4708 // know what it accesses.
4709 Value *Address = User->getOperand(OpNo);
4710 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4711 if (!AddrTy)
4712 return false;
4713 Type *AddressAccessTy = AddrTy->getElementType();
4714 unsigned AS = AddrTy->getAddressSpace();
4715
4716 // Do a match against the root of this address, ignoring profitability. This
4717 // will tell us if the addressing mode for the memory operation will
4718 // *actually* cover the shared instruction.
4719 ExtAddrMode Result;
4720 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4721 0);
4722 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4723 TPT.getRestorationPoint();
4724 AddressingModeMatcher Matcher(
4725 MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4726 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
4727 Matcher.IgnoreProfitability = true;
4728 bool Success = Matcher.matchAddr(Address, 0);
4729 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4729, __PRETTY_FUNCTION__))
;
4730
4731 // The match was to check the profitability, the changes made are not
4732 // part of the original matcher. Therefore, they should be dropped
4733 // otherwise the original matcher will not present the right state.
4734 TPT.rollback(LastKnownGood);
4735
4736 // If the match didn't cover I, then it won't be shared by it.
4737 if (!is_contained(MatchedAddrModeInsts, I))
4738 return false;
4739
4740 MatchedAddrModeInsts.clear();
4741 }
4742
4743 return true;
4744}
4745
4746/// Return true if the specified values are defined in a
4747/// different basic block than BB.
4748static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4749 if (Instruction *I = dyn_cast<Instruction>(V))
4750 return I->getParent() != BB;
4751 return false;
4752}
4753
4754/// Sink addressing mode computation immediate before MemoryInst if doing so
4755/// can be done without increasing register pressure. The need for the
4756/// register pressure constraint means this can end up being an all or nothing
4757/// decision for all uses of the same addressing computation.
4758///
4759/// Load and Store Instructions often have addressing modes that can do
4760/// significant amounts of computation. As such, instruction selection will try
4761/// to get the load or store to do as much computation as possible for the
4762/// program. The problem is that isel can only see within a single block. As
4763/// such, we sink as much legal addressing mode work into the block as possible.
4764///
4765/// This method is used to optimize both load/store and inline asms with memory
4766/// operands. It's also used to sink addressing computations feeding into cold
4767/// call sites into their (cold) basic block.
4768///
4769/// The motivation for handling sinking into cold blocks is that doing so can
4770/// both enable other address mode sinking (by satisfying the register pressure
4771/// constraint above), and reduce register pressure globally (by removing the
4772/// addressing mode computation from the fast path entirely.).
4773bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4774 Type *AccessTy, unsigned AddrSpace) {
4775 Value *Repl = Addr;
4776
4777 // Try to collapse single-value PHI nodes. This is necessary to undo
4778 // unprofitable PRE transformations.
4779 SmallVector<Value*, 8> worklist;
4780 SmallPtrSet<Value*, 16> Visited;
4781 worklist.push_back(Addr);
4782
4783 // Use a worklist to iteratively look through PHI and select nodes, and
4784 // ensure that the addressing mode obtained from the non-PHI/select roots of
4785 // the graph are compatible.
4786 bool PhiOrSelectSeen = false;
4787 SmallVector<Instruction*, 16> AddrModeInsts;
4788 const SimplifyQuery SQ(*DL, TLInfo);
4789 AddressingModeCombiner AddrModes(SQ, Addr);
4790 TypePromotionTransaction TPT(RemovedInsts);
4791 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4792 TPT.getRestorationPoint();
4793 while (!worklist.empty()) {
1
Loop condition is true. Entering loop body
4794 Value *V = worklist.back();
4795 worklist.pop_back();
4796
4797 // We allow traversing cyclic Phi nodes.
4798 // In case of success after this loop we ensure that traversing through
4799 // Phi nodes ends up with all cases to compute address of the form
4800 // BaseGV + Base + Scale * Index + Offset
4801 // where Scale and Offset are constans and BaseGV, Base and Index
4802 // are exactly the same Values in all cases.
4803 // It means that BaseGV, Scale and Offset dominate our memory instruction
4804 // and have the same value as they had in address computation represented
4805 // as Phi. So we can safely sink address computation to memory instruction.
4806 if (!Visited.insert(V).second)
2
Assuming field 'second' is true
3
Taking false branch
4807 continue;
4808
4809 // For a PHI node, push all of its incoming values.
4810 if (PHINode *P
4.1
'P' is null
= dyn_cast<PHINode>(V)) {
4
Assuming 'V' is not a 'PHINode'
5
Taking false branch
4811 for (Value *IncValue : P->incoming_values())
4812 worklist.push_back(IncValue);
4813 PhiOrSelectSeen = true;
4814 continue;
4815 }
4816 // Similar for select.
4817 if (SelectInst *SI
6.1
'SI' is null
= dyn_cast<SelectInst>(V)) {
6
Assuming 'V' is not a 'SelectInst'
7
Taking false branch
4818 worklist.push_back(SI->getFalseValue());
4819 worklist.push_back(SI->getTrueValue());
4820 PhiOrSelectSeen = true;
4821 continue;
4822 }
4823
4824 // For non-PHIs, determine the addressing mode being computed. Note that
4825 // the result may differ depending on what other uses our candidate
4826 // addressing instructions might have.
4827 AddrModeInsts.clear();
4828 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4829 0);
4830 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4831 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4832 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
4833 BFI.get());
4834
4835 GetElementPtrInst *GEP = LargeOffsetGEP.first;
4836 if (GEP
7.1
'GEP' is null
&& !NewGEPBases.count(GEP)) {
8
Taking false branch
4837 // If splitting the underlying data structure can reduce the offset of a
4838 // GEP, collect the GEP. Skip the GEPs that are the new bases of
4839 // previously split data structures.
4840 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4841 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4842 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4843 }
4844
4845 NewAddrMode.OriginalValue = V;
4846 if (!AddrModes.addNewAddrMode(NewAddrMode))
9
Taking true branch
4847 break;
10
Execution continues on line 4853
4848 }
4849
4850 // Try to combine the AddrModes we've collected. If we couldn't collect any,
4851 // or we have multiple but either couldn't combine them or combining them
4852 // wouldn't do anything useful, bail out now.
4853 if (!AddrModes.combineAddrModes()) {
11
Calling 'AddressingModeCombiner::combineAddrModes'
4854 TPT.rollback(LastKnownGood);
4855 return false;
4856 }
4857 TPT.commit();
4858
4859 // Get the combined AddrMode (or the only AddrMode, if we only had one).
4860 ExtAddrMode AddrMode = AddrModes.getAddrMode();
4861
4862 // If all the instructions matched are already in this BB, don't do anything.
4863 // If we saw a Phi node then it is not local definitely, and if we saw a select
4864 // then we want to push the address calculation past it even if it's already
4865 // in this BB.
4866 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4867 return IsNonLocalValue(V, MemoryInst->getParent());
4868 })) {
4869 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
4870 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4871 return false;
4872 }
4873
4874 // Insert this computation right after this user. Since our caller is
4875 // scanning from the top of the BB to the bottom, reuse of the expr are
4876 // guaranteed to happen later.
4877 IRBuilder<> Builder(MemoryInst);
4878
4879 // Now that we determined the addressing expression we want to use and know
4880 // that we have to sink it into this block. Check to see if we have already
4881 // done this for some other load/store instr in this block. If so, reuse
4882 // the computation. Before attempting reuse, check if the address is valid
4883 // as it may have been erased.
4884
4885 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4886
4887 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4888 if (SunkAddr) {
4889 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4890 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4891 if (SunkAddr->getType() != Addr->getType())
4892 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4893 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
4894 TM && SubtargetInfo->addrSinkUsingGEPs())) {
4895 // By default, we use the GEP-based method when AA is used later. This
4896 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4897 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4898 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4899 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4900 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4901
4902 // First, find the pointer.
4903 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4904 ResultPtr = AddrMode.BaseReg;
4905 AddrMode.BaseReg = nullptr;
4906 }
4907
4908 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4909 // We can't add more than one pointer together, nor can we scale a
4910 // pointer (both of which seem meaningless).
4911 if (ResultPtr || AddrMode.Scale != 1)
4912 return false;
4913
4914 ResultPtr = AddrMode.ScaledReg;
4915 AddrMode.Scale = 0;
4916 }
4917
4918 // It is only safe to sign extend the BaseReg if we know that the math
4919 // required to create it did not overflow before we extend it. Since
4920 // the original IR value was tossed in favor of a constant back when
4921 // the AddrMode was created we need to bail out gracefully if widths
4922 // do not match instead of extending it.
4923 //
4924 // (See below for code to add the scale.)
4925 if (AddrMode.Scale) {
4926 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4927 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4928 cast<IntegerType>(ScaledRegTy)->getBitWidth())
4929 return false;
4930 }
4931
4932 if (AddrMode.BaseGV) {
4933 if (ResultPtr)
4934 return false;
4935
4936 ResultPtr = AddrMode.BaseGV;
4937 }
4938
4939 // If the real base value actually came from an inttoptr, then the matcher
4940 // will look through it and provide only the integer value. In that case,
4941 // use it here.
4942 if (!DL->isNonIntegralPointerType(Addr->getType())) {
4943 if (!ResultPtr && AddrMode.BaseReg) {
4944 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4945 "sunkaddr");
4946 AddrMode.BaseReg = nullptr;
4947 } else if (!ResultPtr && AddrMode.Scale == 1) {
4948 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4949 "sunkaddr");
4950 AddrMode.Scale = 0;
4951 }
4952 }
4953
4954 if (!ResultPtr &&
4955 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4956 SunkAddr = Constant::getNullValue(Addr->getType());
4957 } else if (!ResultPtr) {
4958 return false;
4959 } else {
4960 Type *I8PtrTy =
4961 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4962 Type *I8Ty = Builder.getInt8Ty();
4963
4964 // Start with the base register. Do this first so that subsequent address
4965 // matching finds it last, which will prevent it from trying to match it
4966 // as the scaled value in case it happens to be a mul. That would be
4967 // problematic if we've sunk a different mul for the scale, because then
4968 // we'd end up sinking both muls.
4969 if (AddrMode.BaseReg) {
4970 Value *V = AddrMode.BaseReg;
4971 if (V->getType() != IntPtrTy)
4972 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4973
4974 ResultIndex = V;
4975 }
4976
4977 // Add the scale value.
4978 if (AddrMode.Scale) {
4979 Value *V = AddrMode.ScaledReg;
4980 if (V->getType() == IntPtrTy) {
4981 // done.
4982 } else {
4983 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4985, __PRETTY_FUNCTION__))
4984 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4985, __PRETTY_FUNCTION__))
4985 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4985, __PRETTY_FUNCTION__))
;
4986 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4987 }
4988
4989 if (AddrMode.Scale != 1)
4990 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4991 "sunkaddr");
4992 if (ResultIndex)
4993 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4994 else
4995 ResultIndex = V;
4996 }
4997
4998 // Add in the Base Offset if present.
4999 if (AddrMode.BaseOffs) {
5000 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5001 if (ResultIndex) {
5002 // We need to add this separately from the scale above to help with
5003 // SDAG consecutive load/store merging.
5004 if (ResultPtr->getType() != I8PtrTy)
5005 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5006 ResultPtr =
5007 AddrMode.InBounds
5008 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5009 "sunkaddr")
5010 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5011 }
5012
5013 ResultIndex = V;
5014 }
5015
5016 if (!ResultIndex) {
5017 SunkAddr = ResultPtr;
5018 } else {
5019 if (ResultPtr->getType() != I8PtrTy)
5020 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5021 SunkAddr =
5022 AddrMode.InBounds
5023 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5024 "sunkaddr")
5025 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5026 }
5027
5028 if (SunkAddr->getType() != Addr->getType())
5029 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5030 }
5031 } else {
5032 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5033 // non-integral pointers, so in that case bail out now.
5034 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5035 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5036 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5037 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5038 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5039 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5040 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5041 (AddrMode.BaseGV &&
5042 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5043 return false;
5044
5045 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5046 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5047 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5048 Value *Result = nullptr;
5049
5050 // Start with the base register. Do this first so that subsequent address
5051 // matching finds it last, which will prevent it from trying to match it
5052 // as the scaled value in case it happens to be a mul. That would be
5053 // problematic if we've sunk a different mul for the scale, because then
5054 // we'd end up sinking both muls.
5055 if (AddrMode.BaseReg) {
5056 Value *V = AddrMode.BaseReg;
5057 if (V->getType()->isPointerTy())
5058 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5059 if (V->getType() != IntPtrTy)
5060 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5061 Result = V;
5062 }
5063
5064 // Add the scale value.
5065 if (AddrMode.Scale) {
5066 Value *V = AddrMode.ScaledReg;
5067 if (V->getType() == IntPtrTy) {
5068 // done.
5069 } else if (V->getType()->isPointerTy()) {
5070 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5071 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5072 cast<IntegerType>(V->getType())->getBitWidth()) {
5073 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5074 } else {
5075 // It is only safe to sign extend the BaseReg if we know that the math
5076 // required to create it did not overflow before we extend it. Since
5077 // the original IR value was tossed in favor of a constant back when
5078 // the AddrMode was created we need to bail out gracefully if widths
5079 // do not match instead of extending it.
5080 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5081 if (I && (Result != AddrMode.BaseReg))
5082 I->eraseFromParent();
5083 return false;
5084 }
5085 if (AddrMode.Scale != 1)
5086 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5087 "sunkaddr");
5088 if (Result)
5089 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5090 else
5091 Result = V;
5092 }
5093
5094 // Add in the BaseGV if present.
5095 if (AddrMode.BaseGV) {
5096 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
5097 if (Result)
5098 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5099 else
5100 Result = V;
5101 }
5102
5103 // Add in the Base Offset if present.
5104 if (AddrMode.BaseOffs) {
5105 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5106 if (Result)
5107 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5108 else
5109 Result = V;
5110 }
5111
5112 if (!Result)
5113 SunkAddr = Constant::getNullValue(Addr->getType());
5114 else
5115 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5116 }
5117
5118 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5119 // Store the newly computed address into the cache. In the case we reused a
5120 // value, this should be idempotent.
5121 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5122
5123 // If we have no uses, recursively delete the value and all dead instructions
5124 // using it.
5125 if (Repl->use_empty()) {
5126 // This can cause recursive deletion, which can invalidate our iterator.
5127 // Use a WeakTrackingVH to hold onto it in case this happens.
5128 Value *CurValue = &*CurInstIterator;
5129 WeakTrackingVH IterHandle(CurValue);
5130 BasicBlock *BB = CurInstIterator->getParent();
5131
5132 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
5133
5134 if (IterHandle != CurValue) {
5135 // If the iterator instruction was recursively deleted, start over at the
5136 // start of the block.
5137 CurInstIterator = BB->begin();
5138 SunkAddrs.clear();
5139 }
5140 }
5141 ++NumMemoryInsts;
5142 return true;
5143}
5144
5145/// If there are any memory operands, use OptimizeMemoryInst to sink their
5146/// address computing into the block when possible / profitable.
5147bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5148 bool MadeChange = false;
5149
5150 const TargetRegisterInfo *TRI =
5151 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5152 TargetLowering::AsmOperandInfoVector TargetConstraints =
5153 TLI->ParseConstraints(*DL, TRI, CS);
5154 unsigned ArgNo = 0;
5155 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5156 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5157
5158 // Compute the constraint code and ConstraintType to use.
5159 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5160
5161 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5162 OpInfo.isIndirect) {
5163 Value *OpVal = CS->getArgOperand(ArgNo++);
5164 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5165 } else if (OpInfo.Type == InlineAsm::isInput)
5166 ArgNo++;
5167 }
5168
5169 return MadeChange;
5170}
5171
5172/// Check if all the uses of \p Val are equivalent (or free) zero or
5173/// sign extensions.
5174static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5175 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5175, __PRETTY_FUNCTION__))
;
5176 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5177 bool IsSExt = isa<SExtInst>(FirstUser);
5178 Type *ExtTy = FirstUser->getType();
5179 for (const User *U : Val->users()) {
5180 const Instruction *UI = cast<Instruction>(U);
5181 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5182 return false;
5183 Type *CurTy = UI->getType();
5184 // Same input and output types: Same instruction after CSE.
5185 if (CurTy == ExtTy)
5186 continue;
5187
5188 // If IsSExt is true, we are in this situation:
5189 // a = Val
5190 // b = sext ty1 a to ty2
5191 // c = sext ty1 a to ty3
5192 // Assuming ty2 is shorter than ty3, this could be turned into:
5193 // a = Val
5194 // b = sext ty1 a to ty2
5195 // c = sext ty2 b to ty3
5196 // However, the last sext is not free.
5197 if (IsSExt)
5198 return false;
5199
5200 // This is a ZExt, maybe this is free to extend from one type to another.
5201 // In that case, we would not account for a different use.
5202 Type *NarrowTy;
5203 Type *LargeTy;
5204 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5205 CurTy->getScalarType()->getIntegerBitWidth()) {
5206 NarrowTy = CurTy;
5207 LargeTy = ExtTy;
5208 } else {
5209 NarrowTy = ExtTy;
5210 LargeTy = CurTy;
5211 }
5212
5213 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5214 return false;
5215 }
5216 // All uses are the same or can be derived from one another for free.
5217 return true;
5218}
5219
5220/// Try to speculatively promote extensions in \p Exts and continue
5221/// promoting through newly promoted operands recursively as far as doing so is
5222/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5223/// When some promotion happened, \p TPT contains the proper state to revert
5224/// them.
5225///
5226/// \return true if some promotion happened, false otherwise.
5227bool CodeGenPrepare::tryToPromoteExts(
5228 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5229 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5230 unsigned CreatedInstsCost) {
5231 bool Promoted = false;
5232
5233 // Iterate over all the extensions to try to promote them.
5234 for (auto I : Exts) {
5235 // Early check if we directly have ext(load).
5236 if (isa<LoadInst>(I->getOperand(0))) {
5237 ProfitablyMovedExts.push_back(I);
5238 continue;
5239 }
5240
5241 // Check whether or not we want to do any promotion. The reason we have
5242 // this check inside the for loop is to catch the case where an extension
5243 // is directly fed by a load because in such case the extension can be moved
5244 // up without any promotion on its operands.
5245 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5246 return false;
5247
5248 // Get the action to perform the promotion.
5249 TypePromotionHelper::Action TPH =
5250 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5251 // Check if we can promote.
5252 if (!TPH) {
5253 // Save the current extension as we cannot move up through its operand.
5254 ProfitablyMovedExts.push_back(I);
5255 continue;
5256 }
5257
5258 // Save the current state.
5259 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5260 TPT.getRestorationPoint();
5261 SmallVector<Instruction *, 4> NewExts;
5262 unsigned NewCreatedInstsCost = 0;
5263 unsigned ExtCost = !TLI->isExtFree(I);
5264 // Promote.
5265 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5266 &NewExts, nullptr, *TLI);
5267 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5268, __PRETTY_FUNCTION__))
5268 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5268, __PRETTY_FUNCTION__))
;
5269
5270 // We would be able to merge only one extension in a load.
5271 // Therefore, if we have more than 1 new extension we heuristically
5272 // cut this search path, because it means we degrade the code quality.
5273 // With exactly 2, the transformation is neutral, because we will merge
5274 // one extension but leave one. However, we optimistically keep going,
5275 // because the new extension may be removed too.
5276 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5277 // FIXME: It would be possible to propagate a negative value instead of
5278 // conservatively ceiling it to 0.
5279 TotalCreatedInstsCost =
5280 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5281 if (!StressExtLdPromotion &&
5282 (TotalCreatedInstsCost > 1 ||
5283 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5284 // This promotion is not profitable, rollback to the previous state, and
5285 // save the current extension in ProfitablyMovedExts as the latest
5286 // speculative promotion turned out to be unprofitable.
5287 TPT.rollback(LastKnownGood);
5288 ProfitablyMovedExts.push_back(I);
5289 continue;
5290 }
5291 // Continue promoting NewExts as far as doing so is profitable.
5292 SmallVector<Instruction *, 2> NewlyMovedExts;
5293 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5294 bool NewPromoted = false;
5295 for (auto ExtInst : NewlyMovedExts) {
5296 Instruction *MovedExt = cast<Instruction>(ExtInst);
5297 Value *ExtOperand = MovedExt->getOperand(0);
5298 // If we have reached to a load, we need this extra profitability check
5299 // as it could potentially be merged into an ext(load).
5300 if (isa<LoadInst>(ExtOperand) &&
5301 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5302 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5303 continue;
5304
5305 ProfitablyMovedExts.push_back(MovedExt);
5306 NewPromoted = true;
5307 }
5308
5309 // If none of speculative promotions for NewExts is profitable, rollback
5310 // and save the current extension (I) as the last profitable extension.
5311 if (!NewPromoted) {
5312 TPT.rollback(LastKnownGood);
5313 ProfitablyMovedExts.push_back(I);
5314 continue;
5315 }
5316 // The promotion is profitable.
5317 Promoted = true;
5318 }
5319 return Promoted;
5320}
5321
5322/// Merging redundant sexts when one is dominating the other.
5323bool CodeGenPrepare::mergeSExts(Function &F) {
5324 bool Changed = false;
5325 for (auto &Entry : ValToSExtendedUses) {
5326 SExts &Insts = Entry.second;
5327 SExts CurPts;
5328 for (Instruction *Inst : Insts) {
5329 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5330 Inst->getOperand(0) != Entry.first)
5331 continue;
5332 bool inserted = false;
5333 for (auto &Pt : CurPts) {
5334 if (getDT(F).dominates(Inst, Pt)) {
5335 Pt->replaceAllUsesWith(Inst);
5336 RemovedInsts.insert(Pt);
5337 Pt->removeFromParent();
5338 Pt = Inst;
5339 inserted = true;
5340 Changed = true;
5341 break;
5342 }
5343 if (!getDT(F).dominates(Pt, Inst))
5344 // Give up if we need to merge in a common dominator as the
5345 // experiments show it is not profitable.
5346 continue;
5347 Inst->replaceAllUsesWith(Pt);
5348 RemovedInsts.insert(Inst);
5349 Inst->removeFromParent();
5350 inserted = true;
5351 Changed = true;
5352 break;
5353 }
5354 if (!inserted)
5355 CurPts.push_back(Inst);
5356 }
5357 }
5358 return Changed;
5359}
5360
5361// Spliting large data structures so that the GEPs accessing them can have
5362// smaller offsets so that they can be sunk to the same blocks as their users.
5363// For example, a large struct starting from %base is splitted into two parts
5364// where the second part starts from %new_base.
5365//
5366// Before:
5367// BB0:
5368// %base =
5369//
5370// BB1:
5371// %gep0 = gep %base, off0
5372// %gep1 = gep %base, off1
5373// %gep2 = gep %base, off2
5374//
5375// BB2:
5376// %load1 = load %gep0
5377// %load2 = load %gep1
5378// %load3 = load %gep2
5379//
5380// After:
5381// BB0:
5382// %base =
5383// %new_base = gep %base, off0
5384//
5385// BB1:
5386// %new_gep0 = %new_base
5387// %new_gep1 = gep %new_base, off1 - off0
5388// %new_gep2 = gep %new_base, off2 - off0
5389//
5390// BB2:
5391// %load1 = load i32, i32* %new_gep0
5392// %load2 = load i32, i32* %new_gep1
5393// %load3 = load i32, i32* %new_gep2
5394//
5395// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5396// their offsets are smaller enough to fit into the addressing mode.
5397bool CodeGenPrepare::splitLargeGEPOffsets() {
5398 bool Changed = false;
5399 for (auto &Entry : LargeOffsetGEPMap) {
5400 Value *OldBase = Entry.first;
5401 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5402 &LargeOffsetGEPs = Entry.second;
5403 auto compareGEPOffset =
5404 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5405 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5406 if (LHS.first == RHS.first)
5407 return false;
5408 if (LHS.second != RHS.second)
5409 return LHS.second < RHS.second;
5410 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5411 };
5412 // Sorting all the GEPs of the same data structures based on the offsets.
5413 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5414 LargeOffsetGEPs.erase(
5415 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5416 LargeOffsetGEPs.end());
5417 // Skip if all the GEPs have the same offsets.
5418 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5419 continue;
5420 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5421 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5422 Value *NewBaseGEP = nullptr;
5423
5424 auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5425 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5426 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5427 int64_t Offset = LargeOffsetGEP->second;
5428 if (Offset != BaseOffset) {
5429 TargetLowering::AddrMode AddrMode;
5430 AddrMode.BaseOffs = Offset - BaseOffset;
5431 // The result type of the GEP might not be the type of the memory
5432 // access.
5433 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5434 GEP->getResultElementType(),
5435 GEP->getAddressSpace())) {
5436 // We need to create a new base if the offset to the current base is
5437 // too large to fit into the addressing mode. So, a very large struct
5438 // may be splitted into several parts.
5439 BaseGEP = GEP;
5440 BaseOffset = Offset;
5441 NewBaseGEP = nullptr;
5442 }
5443 }
5444
5445 // Generate a new GEP to replace the current one.
5446 LLVMContext &Ctx = GEP->getContext();
5447 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5448 Type *I8PtrTy =
5449 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5450 Type *I8Ty = Type::getInt8Ty(Ctx);
5451
5452 if (!NewBaseGEP) {
5453 // Create a new base if we don't have one yet. Find the insertion
5454 // pointer for the new base first.
5455 BasicBlock::iterator NewBaseInsertPt;
5456 BasicBlock *NewBaseInsertBB;
5457 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5458 // If the base of the struct is an instruction, the new base will be
5459 // inserted close to it.
5460 NewBaseInsertBB = BaseI->getParent();
5461 if (isa<PHINode>(BaseI))
5462 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5463 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5464 NewBaseInsertBB =
5465 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5466 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5467 } else
5468 NewBaseInsertPt = std::next(BaseI->getIterator());
5469 } else {
5470 // If the current base is an argument or global value, the new base
5471 // will be inserted to the entry block.
5472 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5473 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5474 }
5475 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5476 // Create a new base.
5477 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5478 NewBaseGEP = OldBase;
5479 if (NewBaseGEP->getType() != I8PtrTy)
5480 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5481 NewBaseGEP =
5482 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5483 NewGEPBases.insert(NewBaseGEP);
5484 }
5485
5486 IRBuilder<> Builder(GEP);
5487 Value *NewGEP = NewBaseGEP;
5488 if (Offset == BaseOffset) {
5489 if (GEP->getType() != I8PtrTy)
5490 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5491 } else {
5492 // Calculate the new offset for the new GEP.
5493 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5494 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5495
5496 if (GEP->getType() != I8PtrTy)
5497 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5498 }
5499 GEP->replaceAllUsesWith(NewGEP);
5500 LargeOffsetGEPID.erase(GEP);
5501 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5502 GEP->eraseFromParent();
5503 Changed = true;
5504 }
5505 }
5506 return Changed;
5507}
5508
5509/// Return true, if an ext(load) can be formed from an extension in
5510/// \p MovedExts.
5511bool CodeGenPrepare::canFormExtLd(
5512 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5513 Instruction *&Inst, bool HasPromoted) {
5514 for (auto *MovedExtInst : MovedExts) {
5515 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5516 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5517 Inst = MovedExtInst;
5518 break;
5519 }
5520 }
5521 if (!LI)
5522 return false;
5523
5524 // If they're already in the same block, there's nothing to do.
5525 // Make the cheap checks first if we did not promote.
5526 // If we promoted, we need to check if it is indeed profitable.
5527 if (!HasPromoted && LI->getParent() == Inst->getParent())
5528 return false;
5529
5530 return TLI->isExtLoad(LI, Inst, *DL);
5531}
5532
5533/// Move a zext or sext fed by a load into the same basic block as the load,
5534/// unless conditions are unfavorable. This allows SelectionDAG to fold the
5535/// extend into the load.
5536///
5537/// E.g.,
5538/// \code
5539/// %ld = load i32* %addr
5540/// %add = add nuw i32 %ld, 4
5541/// %zext = zext i32 %add to i64
5542// \endcode
5543/// =>
5544/// \code
5545/// %ld = load i32* %addr
5546/// %zext = zext i32 %ld to i64
5547/// %add = add nuw i64 %zext, 4
5548/// \encode
5549/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5550/// allow us to match zext(load i32*) to i64.
5551///
5552/// Also, try to promote the computations used to obtain a sign extended
5553/// value used into memory accesses.
5554/// E.g.,
5555/// \code
5556/// a = add nsw i32 b, 3
5557/// d = sext i32 a to i64
5558/// e = getelementptr ..., i64 d
5559/// \endcode
5560/// =>
5561/// \code
5562/// f = sext i32 b to i64
5563/// a = add nsw i64 f, 3
5564/// e = getelementptr ..., i64 a
5565/// \endcode
5566///
5567/// \p Inst[in/out] the extension may be modified during the process if some
5568/// promotions apply.
5569bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5570 // ExtLoad formation and address type promotion infrastructure requires TLI to
5571 // be effective.
5572 if (!TLI)
5573 return false;
5574
5575 bool AllowPromotionWithoutCommonHeader = false;
5576 /// See if it is an interesting sext operations for the address type
5577 /// promotion before trying to promote it, e.g., the ones with the right
5578 /// type and used in memory accesses.
5579 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5580 *Inst, AllowPromotionWithoutCommonHeader);
5581 TypePromotionTransaction TPT(RemovedInsts);
5582 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5583 TPT.getRestorationPoint();
5584 SmallVector<Instruction *, 1> Exts;
5585 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5586 Exts.push_back(Inst);
5587
5588 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5589
5590 // Look for a load being extended.
5591 LoadInst *LI = nullptr;
5592 Instruction *ExtFedByLoad;
5593
5594 // Try to promote a chain of computation if it allows to form an extended
5595 // load.
5596 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5597 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5597, __PRETTY_FUNCTION__))
;
5598 TPT.commit();
5599 // Move the extend into the same block as the load
5600 ExtFedByLoad->moveAfter(LI);
5601 // CGP does not check if the zext would be speculatively executed when moved
5602 // to the same basic block as the load. Preserving its original location
5603 // would pessimize the debugging experience, as well as negatively impact
5604 // the quality of sample pgo. We don't want to use "line 0" as that has a
5605 // size cost in the line-table section and logically the zext can be seen as
5606 // part of the load. Therefore we conservatively reuse the same debug
5607 // location for the load and the zext.
5608 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5609 ++NumExtsMoved;
5610 Inst = ExtFedByLoad;
5611 return true;
5612 }
5613
5614 // Continue promoting SExts if known as considerable depending on targets.
5615 if (ATPConsiderable &&
5616 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5617 HasPromoted, TPT, SpeculativelyMovedExts))
5618 return true;
5619
5620 TPT.rollback(LastKnownGood);
5621 return false;
5622}
5623
5624// Perform address type promotion if doing so is profitable.
5625// If AllowPromotionWithoutCommonHeader == false, we should find other sext
5626// instructions that sign extended the same initial value. However, if
5627// AllowPromotionWithoutCommonHeader == true, we expect promoting the
5628// extension is just profitable.
5629bool CodeGenPrepare::performAddressTypePromotion(
5630 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5631 bool HasPromoted, TypePromotionTransaction &TPT,
5632 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5633 bool Promoted = false;
5634 SmallPtrSet<Instruction *, 1> UnhandledExts;
5635 bool AllSeenFirst = true;
5636 for (auto I : SpeculativelyMovedExts) {
5637 Value *HeadOfChain = I->getOperand(0);
5638 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5639 SeenChainsForSExt.find(HeadOfChain);
5640 // If there is an unhandled SExt which has the same header, try to promote
5641 // it as well.
5642 if (AlreadySeen != SeenChainsForSExt.end()) {
5643 if (AlreadySeen->second != nullptr)
5644 UnhandledExts.insert(AlreadySeen->second);
5645 AllSeenFirst = false;
5646 }
5647 }
5648
5649 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5650 SpeculativelyMovedExts.size() == 1)) {
5651 TPT.commit();
5652 if (HasPromoted)
5653 Promoted = true;
5654 for (auto I : SpeculativelyMovedExts) {
5655 Value *HeadOfChain = I->getOperand(0);
5656 SeenChainsForSExt[HeadOfChain] = nullptr;
5657 ValToSExtendedUses[HeadOfChain].push_back(I);
5658 }
5659 // Update Inst as promotion happen.
5660 Inst = SpeculativelyMovedExts.pop_back_val();
5661 } else {
5662 // This is the first chain visited from the header, keep the current chain
5663 // as unhandled. Defer to promote this until we encounter another SExt
5664 // chain derived from the same header.
5665 for (auto I : SpeculativelyMovedExts) {
5666 Value *HeadOfChain = I->getOperand(0);
5667 SeenChainsForSExt[HeadOfChain] = Inst;
5668 }
5669 return false;
5670 }
5671
5672 if (!AllSeenFirst && !UnhandledExts.empty())
5673 for (auto VisitedSExt : UnhandledExts) {
5674 if (RemovedInsts.count(VisitedSExt))
5675 continue;
5676 TypePromotionTransaction TPT(RemovedInsts);
5677 SmallVector<Instruction *, 1> Exts;
5678 SmallVector<Instruction *, 2> Chains;
5679 Exts.push_back(VisitedSExt);
5680 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5681 TPT.commit();
5682 if (HasPromoted)
5683 Promoted = true;
5684 for (auto I : Chains) {
5685 Value *HeadOfChain = I->getOperand(0);
5686 // Mark this as handled.
5687 SeenChainsForSExt[HeadOfChain] = nullptr;
5688 ValToSExtendedUses[HeadOfChain].push_back(I);
5689 }
5690 }
5691 return Promoted;
5692}
5693
5694bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5695 BasicBlock *DefBB = I->getParent();
5696
5697 // If the result of a {s|z}ext and its source are both live out, rewrite all
5698 // other uses of the source with result of extension.
5699 Value *Src = I->getOperand(0);
5700 if (Src->hasOneUse())
5701 return false;
5702
5703 // Only do this xform if truncating is free.
5704 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5705 return false;
5706
5707 // Only safe to perform the optimization if the source is also defined in
5708 // this block.
5709 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5710 return false;
5711
5712 bool DefIsLiveOut = false;
5713 for (User *U : I->users()) {
5714 Instruction *UI = cast<Instruction>(U);
5715
5716 // Figure out which BB this ext is used in.
5717 BasicBlock *UserBB = UI->getParent();
5718 if (UserBB == DefBB) continue;
5719 DefIsLiveOut = true;
5720 break;
5721 }
5722 if (!DefIsLiveOut)
5723 return false;
5724
5725 // Make sure none of the uses are PHI nodes.
5726 for (User *U : Src->users()) {
5727 Instruction *UI = cast<Instruction>(U);
5728 BasicBlock *UserBB = UI->getParent();
5729 if (UserBB == DefBB) continue;
5730 // Be conservative. We don't want this xform to end up introducing
5731 // reloads just before load / store instructions.
5732 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5733 return false;
5734 }
5735
5736 // InsertedTruncs - Only insert one trunc in each block once.
5737 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5738
5739 bool MadeChange = false;
5740 for (Use &U : Src->uses()) {
5741 Instruction *User = cast<Instruction>(U.getUser());
5742
5743 // Figure out which BB this ext is used in.
5744 BasicBlock *UserBB = User->getParent();
5745 if (UserBB == DefBB) continue;
5746
5747 // Both src and def are live in this block. Rewrite the use.
5748 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5749
5750 if (!InsertedTrunc) {
5751 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5752 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5752, __PRETTY_FUNCTION__))
;
5753 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5754 InsertedInsts.insert(InsertedTrunc);
5755 }
5756
5757 // Replace a use of the {s|z}ext source with a use of the result.
5758 U = InsertedTrunc;
5759 ++NumExtUses;
5760 MadeChange = true;
5761 }
5762
5763 return MadeChange;
5764}
5765
5766// Find loads whose uses only use some of the loaded value's bits. Add an "and"
5767// just after the load if the target can fold this into one extload instruction,
5768// with the hope of eliminating some of the other later "and" instructions using
5769// the loaded value. "and"s that are made trivially redundant by the insertion