Bug Summary

File:llvm/lib/CodeGen/CodeGenPrepare.cpp
Warning:line 3131, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-07-102640-14763-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Transforms/Utils/Local.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/Analysis.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/SelectionDAGNodes.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/TargetPassConfig.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CallSite.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/GlobalVariable.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
61#include "llvm/IR/Instructions.h"
62#include "llvm/IR/IntrinsicInst.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/MDBuilder.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/PatternMatch.h"
69#include "llvm/IR/Statepoint.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
75#include "llvm/IR/ValueMap.h"
76#include "llvm/Pass.h"
77#include "llvm/Support/BlockFrequency.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Compiler.h"
82#include "llvm/Support/Debug.h"
83#include "llvm/Support/ErrorHandling.h"
84#include "llvm/Support/MachineValueType.h"
85#include "llvm/Support/MathExtras.h"
86#include "llvm/Support/raw_ostream.h"
87#include "llvm/Target/TargetMachine.h"
88#include "llvm/Target/TargetOptions.h"
89#include "llvm/Transforms/Utils/BasicBlockUtils.h"
90#include "llvm/Transforms/Utils/BypassSlowDivision.h"
91#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <limits>
97#include <memory>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102using namespace llvm::PatternMatch;
103
104#define DEBUG_TYPE"codegenprepare" "codegenprepare"
105
106STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated"}
;
107STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated"}
;
108STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts"}
;
109STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
110 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
;
111STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
112 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
;
113STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
114 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
;
115STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
116 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
117 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
;
118STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
119 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
120 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
;
121STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads"}
;
122STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized"}
;
123STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
124 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
;
125STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized"}
;
126STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated"}
;
127STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved"}
;
128STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
}
;
129STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
}
;
130
131static cl::opt<bool> DisableBranchOpts(
132 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
133 cl::desc("Disable branch optimizations in CodeGenPrepare"));
134
135static cl::opt<bool>
136 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
137 cl::desc("Disable GC optimizations in CodeGenPrepare"));
138
139static cl::opt<bool> DisableSelectToBranch(
140 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
141 cl::desc("Disable select to branch conversion."));
142
143static cl::opt<bool> AddrSinkUsingGEPs(
144 "addr-sink-using-gep", cl::Hidden, cl::init(true),
145 cl::desc("Address sinking in CGP using GEPs."));
146
147static cl::opt<bool> EnableAndCmpSinking(
148 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
149 cl::desc("Enable sinkinig and/cmp into branches."));
150
151static cl::opt<bool> DisableStoreExtract(
152 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
153 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
154
155static cl::opt<bool> StressStoreExtract(
156 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
157 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
158
159static cl::opt<bool> DisableExtLdPromotion(
160 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
161 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
162 "CodeGenPrepare"));
163
164static cl::opt<bool> StressExtLdPromotion(
165 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
166 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
167 "optimization in CodeGenPrepare"));
168
169static cl::opt<bool> DisablePreheaderProtect(
170 "disable-preheader-prot", cl::Hidden, cl::init(false),
171 cl::desc("Disable protection against removing loop preheaders"));
172
173static cl::opt<bool> ProfileGuidedSectionPrefix(
174 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
175 cl::desc("Use profile info to add section prefix for hot/cold functions"));
176
177static cl::opt<unsigned> FreqRatioToSkipMerge(
178 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
179 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
180 "(frequency of destination block) is greater than this ratio"));
181
182static cl::opt<bool> ForceSplitStore(
183 "force-split-store", cl::Hidden, cl::init(false),
184 cl::desc("Force store splitting no matter what the target query says."));
185
186static cl::opt<bool>
187EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
188 cl::desc("Enable merging of redundant sexts when one is dominating"
189 " the other."), cl::init(true));
190
191static cl::opt<bool> DisableComplexAddrModes(
192 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
193 cl::desc("Disables combining addressing modes with different parts "
194 "in optimizeMemoryInst."));
195
196static cl::opt<bool>
197AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
198 cl::desc("Allow creation of Phis in Address sinking."));
199
200static cl::opt<bool>
201AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
202 cl::desc("Allow creation of selects in Address sinking."));
203
204static cl::opt<bool> AddrSinkCombineBaseReg(
205 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
206 cl::desc("Allow combining of BaseReg field in Address sinking."));
207
208static cl::opt<bool> AddrSinkCombineBaseGV(
209 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
210 cl::desc("Allow combining of BaseGV field in Address sinking."));
211
212static cl::opt<bool> AddrSinkCombineBaseOffs(
213 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
214 cl::desc("Allow combining of BaseOffs field in Address sinking."));
215
216static cl::opt<bool> AddrSinkCombineScaledReg(
217 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
218 cl::desc("Allow combining of ScaledReg field in Address sinking."));
219
220static cl::opt<bool>
221 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
222 cl::init(true),
223 cl::desc("Enable splitting large offset of GEP."));
224
225static cl::opt<bool> EnableICMP_EQToICMP_ST(
226 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
227 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
228
229namespace {
230
231enum ExtType {
232 ZeroExtension, // Zero extension has been seen.
233 SignExtension, // Sign extension has been seen.
234 BothExtension // This extension type is used if we saw sext after
235 // ZeroExtension had been set, or if we saw zext after
236 // SignExtension had been set. It makes the type
237 // information of a promoted instruction invalid.
238};
239
240using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
241using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
242using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
243using SExts = SmallVector<Instruction *, 16>;
244using ValueToSExts = DenseMap<Value *, SExts>;
245
246class TypePromotionTransaction;
247
248 class CodeGenPrepare : public FunctionPass {
249 const TargetMachine *TM = nullptr;
250 const TargetSubtargetInfo *SubtargetInfo;
251 const TargetLowering *TLI = nullptr;
252 const TargetRegisterInfo *TRI;
253 const TargetTransformInfo *TTI = nullptr;
254 const TargetLibraryInfo *TLInfo;
255 const LoopInfo *LI;
256 std::unique_ptr<BlockFrequencyInfo> BFI;
257 std::unique_ptr<BranchProbabilityInfo> BPI;
258
259 /// As we scan instructions optimizing them, this is the next instruction
260 /// to optimize. Transforms that can invalidate this should update it.
261 BasicBlock::iterator CurInstIterator;
262
263 /// Keeps track of non-local addresses that have been sunk into a block.
264 /// This allows us to avoid inserting duplicate code for blocks with
265 /// multiple load/stores of the same address. The usage of WeakTrackingVH
266 /// enables SunkAddrs to be treated as a cache whose entries can be
267 /// invalidated if a sunken address computation has been erased.
268 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
269
270 /// Keeps track of all instructions inserted for the current function.
271 SetOfInstrs InsertedInsts;
272
273 /// Keeps track of the type of the related instruction before their
274 /// promotion for the current function.
275 InstrToOrigTy PromotedInsts;
276
277 /// Keep track of instructions removed during promotion.
278 SetOfInstrs RemovedInsts;
279
280 /// Keep track of sext chains based on their initial value.
281 DenseMap<Value *, Instruction *> SeenChainsForSExt;
282
283 /// Keep track of GEPs accessing the same data structures such as structs or
284 /// arrays that are candidates to be split later because of their large
285 /// size.
286 MapVector<
287 AssertingVH<Value>,
288 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
289 LargeOffsetGEPMap;
290
291 /// Keep track of new GEP base after splitting the GEPs having large offset.
292 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
293
294 /// Map serial numbers to Large offset GEPs.
295 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
296
297 /// Keep track of SExt promoted.
298 ValueToSExts ValToSExtendedUses;
299
300 /// True if optimizing for size.
301 bool OptSize;
302
303 /// DataLayout for the Function being processed.
304 const DataLayout *DL = nullptr;
305
306 /// Building the dominator tree can be expensive, so we only build it
307 /// lazily and update it when required.
308 std::unique_ptr<DominatorTree> DT;
309
310 public:
311 static char ID; // Pass identification, replacement for typeid
312
313 CodeGenPrepare() : FunctionPass(ID) {
314 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
315 }
316
317 bool runOnFunction(Function &F) override;
318
319 StringRef getPassName() const override { return "CodeGen Prepare"; }
320
321 void getAnalysisUsage(AnalysisUsage &AU) const override {
322 // FIXME: When we can selectively preserve passes, preserve the domtree.
323 AU.addRequired<ProfileSummaryInfoWrapperPass>();
324 AU.addRequired<TargetLibraryInfoWrapperPass>();
325 AU.addRequired<TargetTransformInfoWrapperPass>();
326 AU.addRequired<LoopInfoWrapperPass>();
327 }
328
329 private:
330 template <typename F>
331 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
332 // Substituting can cause recursive simplifications, which can invalidate
333 // our iterator. Use a WeakTrackingVH to hold onto it in case this
334 // happens.
335 Value *CurValue = &*CurInstIterator;
336 WeakTrackingVH IterHandle(CurValue);
337
338 f();
339
340 // If the iterator instruction was recursively deleted, start over at the
341 // start of the block.
342 if (IterHandle != CurValue) {
343 CurInstIterator = BB->begin();
344 SunkAddrs.clear();
345 }
346 }
347
348 // Get the DominatorTree, building if necessary.
349 DominatorTree &getDT(Function &F) {
350 if (!DT)
351 DT = std::make_unique<DominatorTree>(F);
352 return *DT;
353 }
354
355 bool eliminateFallThrough(Function &F);
356 bool eliminateMostlyEmptyBlocks(Function &F);
357 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
358 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
359 void eliminateMostlyEmptyBlock(BasicBlock *BB);
360 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
361 bool isPreheader);
362 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
363 bool optimizeInst(Instruction *I, bool &ModifiedDT);
364 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
365 Type *AccessTy, unsigned AddrSpace);
366 bool optimizeInlineAsmInst(CallInst *CS);
367 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
368 bool optimizeExt(Instruction *&I);
369 bool optimizeExtUses(Instruction *I);
370 bool optimizeLoadExt(LoadInst *Load);
371 bool optimizeShiftInst(BinaryOperator *BO);
372 bool optimizeSelectInst(SelectInst *SI);
373 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
374 bool optimizeSwitchInst(SwitchInst *SI);
375 bool optimizeExtractElementInst(Instruction *Inst);
376 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
377 bool placeDbgValues(Function &F);
378 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
379 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
380 bool tryToPromoteExts(TypePromotionTransaction &TPT,
381 const SmallVectorImpl<Instruction *> &Exts,
382 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
383 unsigned CreatedInstsCost = 0);
384 bool mergeSExts(Function &F);
385 bool splitLargeGEPOffsets();
386 bool performAddressTypePromotion(
387 Instruction *&Inst,
388 bool AllowPromotionWithoutCommonHeader,
389 bool HasPromoted, TypePromotionTransaction &TPT,
390 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
391 bool splitBranchCondition(Function &F, bool &ModifiedDT);
392 bool simplifyOffsetableRelocate(Instruction &I);
393
394 bool tryToSinkFreeOperands(Instruction *I);
395 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
396 Intrinsic::ID IID);
397 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
398 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
399 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
400 };
401
402} // end anonymous namespace
403
404char CodeGenPrepare::ID = 0;
405
406INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
407 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
408INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
409INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
410 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
411
412FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
413
414bool CodeGenPrepare::runOnFunction(Function &F) {
415 if (skipFunction(F))
416 return false;
417
418 DL = &F.getParent()->getDataLayout();
419
420 bool EverMadeChange = false;
421 // Clear per function information.
422 InsertedInsts.clear();
423 PromotedInsts.clear();
424
425 if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
426 TM = &TPC->getTM<TargetMachine>();
427 SubtargetInfo = TM->getSubtargetImpl(F);
428 TLI = SubtargetInfo->getTargetLowering();
429 TRI = SubtargetInfo->getRegisterInfo();
430 }
431 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
432 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
433 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
434 BPI.reset(new BranchProbabilityInfo(F, *LI));
435 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
436 OptSize = F.hasOptSize();
437
438 ProfileSummaryInfo *PSI =
439 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
440 if (ProfileGuidedSectionPrefix) {
441 if (PSI->isFunctionHotInCallGraph(&F, *BFI))
442 F.setSectionPrefix(".hot");
443 else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
444 F.setSectionPrefix(".unlikely");
445 }
446
447 /// This optimization identifies DIV instructions that can be
448 /// profitably bypassed and carried out with a shorter, faster divide.
449 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
450 TLI->isSlowDivBypassed()) {
451 const DenseMap<unsigned int, unsigned int> &BypassWidths =
452 TLI->getBypassSlowDivWidths();
453 BasicBlock* BB = &*F.begin();
454 while (BB != nullptr) {
455 // bypassSlowDivision may create new BBs, but we don't want to reapply the
456 // optimization to those blocks.
457 BasicBlock* Next = BB->getNextNode();
458 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
459 BB = Next;
460 }
461 }
462
463 // Eliminate blocks that contain only PHI nodes and an
464 // unconditional branch.
465 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
466
467 bool ModifiedDT = false;
468 if (!DisableBranchOpts)
469 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
470
471 // Split some critical edges where one of the sources is an indirect branch,
472 // to help generate sane code for PHIs involving such edges.
473 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
474
475 bool MadeChange = true;
476 while (MadeChange) {
477 MadeChange = false;
478 DT.reset();
479 for (Function::iterator I = F.begin(); I != F.end(); ) {
480 BasicBlock *BB = &*I++;
481 bool ModifiedDTOnIteration = false;
482 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
483
484 // Restart BB iteration if the dominator tree of the Function was changed
485 if (ModifiedDTOnIteration)
486 break;
487 }
488 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
489 MadeChange |= mergeSExts(F);
490 if (!LargeOffsetGEPMap.empty())
491 MadeChange |= splitLargeGEPOffsets();
492
493 // Really free removed instructions during promotion.
494 for (Instruction *I : RemovedInsts)
495 I->deleteValue();
496
497 EverMadeChange |= MadeChange;
498 SeenChainsForSExt.clear();
499 ValToSExtendedUses.clear();
500 RemovedInsts.clear();
501 LargeOffsetGEPMap.clear();
502 LargeOffsetGEPID.clear();
503 }
504
505 SunkAddrs.clear();
506
507 if (!DisableBranchOpts) {
508 MadeChange = false;
509 // Use a set vector to get deterministic iteration order. The order the
510 // blocks are removed may affect whether or not PHI nodes in successors
511 // are removed.
512 SmallSetVector<BasicBlock*, 8> WorkList;
513 for (BasicBlock &BB : F) {
514 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
515 MadeChange |= ConstantFoldTerminator(&BB, true);
516 if (!MadeChange) continue;
517
518 for (SmallVectorImpl<BasicBlock*>::iterator
519 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
520 if (pred_begin(*II) == pred_end(*II))
521 WorkList.insert(*II);
522 }
523
524 // Delete the dead blocks and any of their dead successors.
525 MadeChange |= !WorkList.empty();
526 while (!WorkList.empty()) {
527 BasicBlock *BB = WorkList.pop_back_val();
528 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
529
530 DeleteDeadBlock(BB);
531
532 for (SmallVectorImpl<BasicBlock*>::iterator
533 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
534 if (pred_begin(*II) == pred_end(*II))
535 WorkList.insert(*II);
536 }
537
538 // Merge pairs of basic blocks with unconditional branches, connected by
539 // a single edge.
540 if (EverMadeChange || MadeChange)
541 MadeChange |= eliminateFallThrough(F);
542
543 EverMadeChange |= MadeChange;
544 }
545
546 if (!DisableGCOpts) {
547 SmallVector<Instruction *, 2> Statepoints;
548 for (BasicBlock &BB : F)
549 for (Instruction &I : BB)
550 if (isStatepoint(I))
551 Statepoints.push_back(&I);
552 for (auto &I : Statepoints)
553 EverMadeChange |= simplifyOffsetableRelocate(*I);
554 }
555
556 // Do this last to clean up use-before-def scenarios introduced by other
557 // preparatory transforms.
558 EverMadeChange |= placeDbgValues(F);
559
560 return EverMadeChange;
561}
562
563/// Merge basic blocks which are connected by a single edge, where one of the
564/// basic blocks has a single successor pointing to the other basic block,
565/// which has a single predecessor.
566bool CodeGenPrepare::eliminateFallThrough(Function &F) {
567 bool Changed = false;
568 // Scan all of the blocks in the function, except for the entry block.
569 // Use a temporary array to avoid iterator being invalidated when
570 // deleting blocks.
571 SmallVector<WeakTrackingVH, 16> Blocks;
572 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
573 Blocks.push_back(&Block);
574
575 for (auto &Block : Blocks) {
576 auto *BB = cast_or_null<BasicBlock>(Block);
577 if (!BB)
578 continue;
579 // If the destination block has a single pred, then this is a trivial
580 // edge, just collapse it.
581 BasicBlock *SinglePred = BB->getSinglePredecessor();
582
583 // Don't merge if BB's address is taken.
584 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
585
586 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
587 if (Term && !Term->isConditional()) {
588 Changed = true;
589 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
590
591 // Merge BB into SinglePred and delete it.
592 MergeBlockIntoPredecessor(BB);
593 }
594 }
595 return Changed;
596}
597
598/// Find a destination block from BB if BB is mergeable empty block.
599BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
600 // If this block doesn't end with an uncond branch, ignore it.
601 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
602 if (!BI || !BI->isUnconditional())
603 return nullptr;
604
605 // If the instruction before the branch (skipping debug info) isn't a phi
606 // node, then other stuff is happening here.
607 BasicBlock::iterator BBI = BI->getIterator();
608 if (BBI != BB->begin()) {
609 --BBI;
610 while (isa<DbgInfoIntrinsic>(BBI)) {
611 if (BBI == BB->begin())
612 break;
613 --BBI;
614 }
615 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
616 return nullptr;
617 }
618
619 // Do not break infinite loops.
620 BasicBlock *DestBB = BI->getSuccessor(0);
621 if (DestBB == BB)
622 return nullptr;
623
624 if (!canMergeBlocks(BB, DestBB))
625 DestBB = nullptr;
626
627 return DestBB;
628}
629
630/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
631/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
632/// edges in ways that are non-optimal for isel. Start by eliminating these
633/// blocks so we can split them the way we want them.
634bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
635 SmallPtrSet<BasicBlock *, 16> Preheaders;
636 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
637 while (!LoopList.empty()) {
638 Loop *L = LoopList.pop_back_val();
639 LoopList.insert(LoopList.end(), L->begin(), L->end());
640 if (BasicBlock *Preheader = L->getLoopPreheader())
641 Preheaders.insert(Preheader);
642 }
643
644 bool MadeChange = false;
645 // Copy blocks into a temporary array to avoid iterator invalidation issues
646 // as we remove them.
647 // Note that this intentionally skips the entry block.
648 SmallVector<WeakTrackingVH, 16> Blocks;
649 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
650 Blocks.push_back(&Block);
651
652 for (auto &Block : Blocks) {
653 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
654 if (!BB)
655 continue;
656 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
657 if (!DestBB ||
658 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
659 continue;
660
661 eliminateMostlyEmptyBlock(BB);
662 MadeChange = true;
663 }
664 return MadeChange;
665}
666
667bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
668 BasicBlock *DestBB,
669 bool isPreheader) {
670 // Do not delete loop preheaders if doing so would create a critical edge.
671 // Loop preheaders can be good locations to spill registers. If the
672 // preheader is deleted and we create a critical edge, registers may be
673 // spilled in the loop body instead.
674 if (!DisablePreheaderProtect && isPreheader &&
675 !(BB->getSinglePredecessor() &&
676 BB->getSinglePredecessor()->getSingleSuccessor()))
677 return false;
678
679 // Skip merging if the block's successor is also a successor to any callbr
680 // that leads to this block.
681 // FIXME: Is this really needed? Is this a correctness issue?
682 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
683 if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
684 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
685 if (DestBB == CBI->getSuccessor(i))
686 return false;
687 }
688
689 // Try to skip merging if the unique predecessor of BB is terminated by a
690 // switch or indirect branch instruction, and BB is used as an incoming block
691 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
692 // add COPY instructions in the predecessor of BB instead of BB (if it is not
693 // merged). Note that the critical edge created by merging such blocks wont be
694 // split in MachineSink because the jump table is not analyzable. By keeping
695 // such empty block (BB), ISel will place COPY instructions in BB, not in the
696 // predecessor of BB.
697 BasicBlock *Pred = BB->getUniquePredecessor();
698 if (!Pred ||
699 !(isa<SwitchInst>(Pred->getTerminator()) ||
700 isa<IndirectBrInst>(Pred->getTerminator())))
701 return true;
702
703 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
704 return true;
705
706 // We use a simple cost heuristic which determine skipping merging is
707 // profitable if the cost of skipping merging is less than the cost of
708 // merging : Cost(skipping merging) < Cost(merging BB), where the
709 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
710 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
711 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
712 // Freq(Pred) / Freq(BB) > 2.
713 // Note that if there are multiple empty blocks sharing the same incoming
714 // value for the PHIs in the DestBB, we consider them together. In such
715 // case, Cost(merging BB) will be the sum of their frequencies.
716
717 if (!isa<PHINode>(DestBB->begin()))
718 return true;
719
720 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
721
722 // Find all other incoming blocks from which incoming values of all PHIs in
723 // DestBB are the same as the ones from BB.
724 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
725 ++PI) {
726 BasicBlock *DestBBPred = *PI;
727 if (DestBBPred == BB)
728 continue;
729
730 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
731 return DestPN.getIncomingValueForBlock(BB) ==
732 DestPN.getIncomingValueForBlock(DestBBPred);
733 }))
734 SameIncomingValueBBs.insert(DestBBPred);
735 }
736
737 // See if all BB's incoming values are same as the value from Pred. In this
738 // case, no reason to skip merging because COPYs are expected to be place in
739 // Pred already.
740 if (SameIncomingValueBBs.count(Pred))
741 return true;
742
743 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
744 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
745
746 for (auto SameValueBB : SameIncomingValueBBs)
747 if (SameValueBB->getUniquePredecessor() == Pred &&
748 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
749 BBFreq += BFI->getBlockFreq(SameValueBB);
750
751 return PredFreq.getFrequency() <=
752 BBFreq.getFrequency() * FreqRatioToSkipMerge;
753}
754
755/// Return true if we can merge BB into DestBB if there is a single
756/// unconditional branch between them, and BB contains no other non-phi
757/// instructions.
758bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
759 const BasicBlock *DestBB) const {
760 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
761 // the successor. If there are more complex condition (e.g. preheaders),
762 // don't mess around with them.
763 for (const PHINode &PN : BB->phis()) {
764 for (const User *U : PN.users()) {
765 const Instruction *UI = cast<Instruction>(U);
766 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
767 return false;
768 // If User is inside DestBB block and it is a PHINode then check
769 // incoming value. If incoming value is not from BB then this is
770 // a complex condition (e.g. preheaders) we want to avoid here.
771 if (UI->getParent() == DestBB) {
772 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
773 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
774 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
775 if (Insn && Insn->getParent() == BB &&
776 Insn->getParent() != UPN->getIncomingBlock(I))
777 return false;
778 }
779 }
780 }
781 }
782
783 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
784 // and DestBB may have conflicting incoming values for the block. If so, we
785 // can't merge the block.
786 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
787 if (!DestBBPN) return true; // no conflict.
788
789 // Collect the preds of BB.
790 SmallPtrSet<const BasicBlock*, 16> BBPreds;
791 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
792 // It is faster to get preds from a PHI than with pred_iterator.
793 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
794 BBPreds.insert(BBPN->getIncomingBlock(i));
795 } else {
796 BBPreds.insert(pred_begin(BB), pred_end(BB));
797 }
798
799 // Walk the preds of DestBB.
800 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
801 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
802 if (BBPreds.count(Pred)) { // Common predecessor?
803 for (const PHINode &PN : DestBB->phis()) {
804 const Value *V1 = PN.getIncomingValueForBlock(Pred);
805 const Value *V2 = PN.getIncomingValueForBlock(BB);
806
807 // If V2 is a phi node in BB, look up what the mapped value will be.
808 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
809 if (V2PN->getParent() == BB)
810 V2 = V2PN->getIncomingValueForBlock(Pred);
811
812 // If there is a conflict, bail out.
813 if (V1 != V2) return false;
814 }
815 }
816 }
817
818 return true;
819}
820
821/// Eliminate a basic block that has only phi's and an unconditional branch in
822/// it.
823void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
824 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
825 BasicBlock *DestBB = BI->getSuccessor(0);
826
827 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
828 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
829
830 // If the destination block has a single pred, then this is a trivial edge,
831 // just collapse it.
832 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
833 if (SinglePred != DestBB) {
834 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 835, __PRETTY_FUNCTION__))
835 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 835, __PRETTY_FUNCTION__))
;
836 // Merge DestBB into SinglePred/BB and delete it.
837 MergeBlockIntoPredecessor(DestBB);
838 // Note: BB(=SinglePred) will not be deleted on this path.
839 // DestBB(=its single successor) is the one that was deleted.
840 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
841 return;
842 }
843 }
844
845 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
846 // to handle the new incoming edges it is about to have.
847 for (PHINode &PN : DestBB->phis()) {
848 // Remove the incoming value for BB, and remember it.
849 Value *InVal = PN.removeIncomingValue(BB, false);
850
851 // Two options: either the InVal is a phi node defined in BB or it is some
852 // value that dominates BB.
853 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
854 if (InValPhi && InValPhi->getParent() == BB) {
855 // Add all of the input values of the input PHI as inputs of this phi.
856 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
857 PN.addIncoming(InValPhi->getIncomingValue(i),
858 InValPhi->getIncomingBlock(i));
859 } else {
860 // Otherwise, add one instance of the dominating value for each edge that
861 // we will be adding.
862 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
863 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
864 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
865 } else {
866 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
867 PN.addIncoming(InVal, *PI);
868 }
869 }
870 }
871
872 // The PHIs are now updated, change everything that refers to BB to use
873 // DestBB and remove BB.
874 BB->replaceAllUsesWith(DestBB);
875 BB->eraseFromParent();
876 ++NumBlocksElim;
877
878 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
879}
880
881// Computes a map of base pointer relocation instructions to corresponding
882// derived pointer relocation instructions given a vector of all relocate calls
883static void computeBaseDerivedRelocateMap(
884 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
885 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
886 &RelocateInstMap) {
887 // Collect information in two maps: one primarily for locating the base object
888 // while filling the second map; the second map is the final structure holding
889 // a mapping between Base and corresponding Derived relocate calls
890 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
891 for (auto *ThisRelocate : AllRelocateCalls) {
892 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
893 ThisRelocate->getDerivedPtrIndex());
894 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
895 }
896 for (auto &Item : RelocateIdxMap) {
897 std::pair<unsigned, unsigned> Key = Item.first;
898 if (Key.first == Key.second)
899 // Base relocation: nothing to insert
900 continue;
901
902 GCRelocateInst *I = Item.second;
903 auto BaseKey = std::make_pair(Key.first, Key.first);
904
905 // We're iterating over RelocateIdxMap so we cannot modify it.
906 auto MaybeBase = RelocateIdxMap.find(BaseKey);
907 if (MaybeBase == RelocateIdxMap.end())
908 // TODO: We might want to insert a new base object relocate and gep off
909 // that, if there are enough derived object relocates.
910 continue;
911
912 RelocateInstMap[MaybeBase->second].push_back(I);
913 }
914}
915
916// Accepts a GEP and extracts the operands into a vector provided they're all
917// small integer constants
918static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
919 SmallVectorImpl<Value *> &OffsetV) {
920 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
921 // Only accept small constant integer operands
922 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
923 if (!Op || Op->getZExtValue() > 20)
924 return false;
925 }
926
927 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
928 OffsetV.push_back(GEP->getOperand(i));
929 return true;
930}
931
932// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
933// replace, computes a replacement, and affects it.
934static bool
935simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
936 const SmallVectorImpl<GCRelocateInst *> &Targets) {
937 bool MadeChange = false;
938 // We must ensure the relocation of derived pointer is defined after
939 // relocation of base pointer. If we find a relocation corresponding to base
940 // defined earlier than relocation of base then we move relocation of base
941 // right before found relocation. We consider only relocation in the same
942 // basic block as relocation of base. Relocations from other basic block will
943 // be skipped by optimization and we do not care about them.
944 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
945 &*R != RelocatedBase; ++R)
946 if (auto RI = dyn_cast<GCRelocateInst>(R))
947 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
948 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
949 RelocatedBase->moveBefore(RI);
950 break;
951 }
952
953 for (GCRelocateInst *ToReplace : Targets) {
954 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 955, __PRETTY_FUNCTION__))
955 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 955, __PRETTY_FUNCTION__))
;
956 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
957 // A duplicate relocate call. TODO: coalesce duplicates.
958 continue;
959 }
960
961 if (RelocatedBase->getParent() != ToReplace->getParent()) {
962 // Base and derived relocates are in different basic blocks.
963 // In this case transform is only valid when base dominates derived
964 // relocate. However it would be too expensive to check dominance
965 // for each such relocate, so we skip the whole transformation.
966 continue;
967 }
968
969 Value *Base = ToReplace->getBasePtr();
970 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
971 if (!Derived || Derived->getPointerOperand() != Base)
972 continue;
973
974 SmallVector<Value *, 2> OffsetV;
975 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
976 continue;
977
978 // Create a Builder and replace the target callsite with a gep
979 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 980, __PRETTY_FUNCTION__))
980 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 980, __PRETTY_FUNCTION__))
;
981
982 // Insert after RelocatedBase
983 IRBuilder<> Builder(RelocatedBase->getNextNode());
984 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
985
986 // If gc_relocate does not match the actual type, cast it to the right type.
987 // In theory, there must be a bitcast after gc_relocate if the type does not
988 // match, and we should reuse it to get the derived pointer. But it could be
989 // cases like this:
990 // bb1:
991 // ...
992 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
993 // br label %merge
994 //
995 // bb2:
996 // ...
997 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
998 // br label %merge
999 //
1000 // merge:
1001 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1002 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1003 //
1004 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1005 // no matter there is already one or not. In this way, we can handle all cases, and
1006 // the extra bitcast should be optimized away in later passes.
1007 Value *ActualRelocatedBase = RelocatedBase;
1008 if (RelocatedBase->getType() != Base->getType()) {
1009 ActualRelocatedBase =
1010 Builder.CreateBitCast(RelocatedBase, Base->getType());
1011 }
1012 Value *Replacement = Builder.CreateGEP(
1013 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1014 Replacement->takeName(ToReplace);
1015 // If the newly generated derived pointer's type does not match the original derived
1016 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1017 Value *ActualReplacement = Replacement;
1018 if (Replacement->getType() != ToReplace->getType()) {
1019 ActualReplacement =
1020 Builder.CreateBitCast(Replacement, ToReplace->getType());
1021 }
1022 ToReplace->replaceAllUsesWith(ActualReplacement);
1023 ToReplace->eraseFromParent();
1024
1025 MadeChange = true;
1026 }
1027 return MadeChange;
1028}
1029
1030// Turns this:
1031//
1032// %base = ...
1033// %ptr = gep %base + 15
1034// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1035// %base' = relocate(%tok, i32 4, i32 4)
1036// %ptr' = relocate(%tok, i32 4, i32 5)
1037// %val = load %ptr'
1038//
1039// into this:
1040//
1041// %base = ...
1042// %ptr = gep %base + 15
1043// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1044// %base' = gc.relocate(%tok, i32 4, i32 4)
1045// %ptr' = gep %base' + 15
1046// %val = load %ptr'
1047bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1048 bool MadeChange = false;
1049 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1050
1051 for (auto *U : I.users())
1052 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1053 // Collect all the relocate calls associated with a statepoint
1054 AllRelocateCalls.push_back(Relocate);
1055
1056 // We need atleast one base pointer relocation + one derived pointer
1057 // relocation to mangle
1058 if (AllRelocateCalls.size() < 2)
1059 return false;
1060
1061 // RelocateInstMap is a mapping from the base relocate instruction to the
1062 // corresponding derived relocate instructions
1063 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1064 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1065 if (RelocateInstMap.empty())
1066 return false;
1067
1068 for (auto &Item : RelocateInstMap)
1069 // Item.first is the RelocatedBase to offset against
1070 // Item.second is the vector of Targets to replace
1071 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1072 return MadeChange;
1073}
1074
1075/// Sink the specified cast instruction into its user blocks.
1076static bool SinkCast(CastInst *CI) {
1077 BasicBlock *DefBB = CI->getParent();
1078
1079 /// InsertedCasts - Only insert a cast in each block once.
1080 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1081
1082 bool MadeChange = false;
1083 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1084 UI != E; ) {
1085 Use &TheUse = UI.getUse();
1086 Instruction *User = cast<Instruction>(*UI);
1087
1088 // Figure out which BB this cast is used in. For PHI's this is the
1089 // appropriate predecessor block.
1090 BasicBlock *UserBB = User->getParent();
1091 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1092 UserBB = PN->getIncomingBlock(TheUse);
1093 }
1094
1095 // Preincrement use iterator so we don't invalidate it.
1096 ++UI;
1097
1098 // The first insertion point of a block containing an EH pad is after the
1099 // pad. If the pad is the user, we cannot sink the cast past the pad.
1100 if (User->isEHPad())
1101 continue;
1102
1103 // If the block selected to receive the cast is an EH pad that does not
1104 // allow non-PHI instructions before the terminator, we can't sink the
1105 // cast.
1106 if (UserBB->getTerminator()->isEHPad())
1107 continue;
1108
1109 // If this user is in the same block as the cast, don't change the cast.
1110 if (UserBB == DefBB) continue;
1111
1112 // If we have already inserted a cast into this block, use it.
1113 CastInst *&InsertedCast = InsertedCasts[UserBB];
1114
1115 if (!InsertedCast) {
1116 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1117 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1117, __PRETTY_FUNCTION__))
;
1118 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1119 CI->getType(), "", &*InsertPt);
1120 InsertedCast->setDebugLoc(CI->getDebugLoc());
1121 }
1122
1123 // Replace a use of the cast with a use of the new cast.
1124 TheUse = InsertedCast;
1125 MadeChange = true;
1126 ++NumCastUses;
1127 }
1128
1129 // If we removed all uses, nuke the cast.
1130 if (CI->use_empty()) {
1131 salvageDebugInfo(*CI);
1132 CI->eraseFromParent();
1133 MadeChange = true;
1134 }
1135
1136 return MadeChange;
1137}
1138
1139/// If the specified cast instruction is a noop copy (e.g. it's casting from
1140/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1141/// reduce the number of virtual registers that must be created and coalesced.
1142///
1143/// Return true if any changes are made.
1144static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1145 const DataLayout &DL) {
1146 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1147 // than sinking only nop casts, but is helpful on some platforms.
1148 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1149 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1150 ASC->getDestAddressSpace()))
1151 return false;
1152 }
1153
1154 // If this is a noop copy,
1155 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1156 EVT DstVT = TLI.getValueType(DL, CI->getType());
1157
1158 // This is an fp<->int conversion?
1159 if (SrcVT.isInteger() != DstVT.isInteger())
1160 return false;
1161
1162 // If this is an extension, it will be a zero or sign extension, which
1163 // isn't a noop.
1164 if (SrcVT.bitsLT(DstVT)) return false;
1165
1166 // If these values will be promoted, find out what they will be promoted
1167 // to. This helps us consider truncates on PPC as noop copies when they
1168 // are.
1169 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1170 TargetLowering::TypePromoteInteger)
1171 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1172 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1173 TargetLowering::TypePromoteInteger)
1174 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1175
1176 // If, after promotion, these are the same types, this is a noop copy.
1177 if (SrcVT != DstVT)
1178 return false;
1179
1180 return SinkCast(CI);
1181}
1182
1183bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1184 CmpInst *Cmp,
1185 Intrinsic::ID IID) {
1186 if (BO->getParent() != Cmp->getParent()) {
1187 // We used to use a dominator tree here to allow multi-block optimization.
1188 // But that was problematic because:
1189 // 1. It could cause a perf regression by hoisting the math op into the
1190 // critical path.
1191 // 2. It could cause a perf regression by creating a value that was live
1192 // across multiple blocks and increasing register pressure.
1193 // 3. Use of a dominator tree could cause large compile-time regression.
1194 // This is because we recompute the DT on every change in the main CGP
1195 // run-loop. The recomputing is probably unnecessary in many cases, so if
1196 // that was fixed, using a DT here would be ok.
1197 return false;
1198 }
1199
1200 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1201 Value *Arg0 = BO->getOperand(0);
1202 Value *Arg1 = BO->getOperand(1);
1203 if (BO->getOpcode() == Instruction::Add &&
1204 IID == Intrinsic::usub_with_overflow) {
1205 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1205, __PRETTY_FUNCTION__))
;
1206 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1207 }
1208
1209 // Insert at the first instruction of the pair.
1210 Instruction *InsertPt = nullptr;
1211 for (Instruction &Iter : *Cmp->getParent()) {
1212 if (&Iter == BO || &Iter == Cmp) {
1213 InsertPt = &Iter;
1214 break;
1215 }
1216 }
1217 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1217, __PRETTY_FUNCTION__))
;
1218
1219 IRBuilder<> Builder(InsertPt);
1220 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1221 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1222 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1223 BO->replaceAllUsesWith(Math);
1224 Cmp->replaceAllUsesWith(OV);
1225 BO->eraseFromParent();
1226 Cmp->eraseFromParent();
1227 return true;
1228}
1229
1230/// Match special-case patterns that check for unsigned add overflow.
1231static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1232 BinaryOperator *&Add) {
1233 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1234 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1235 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1236
1237 // We are not expecting non-canonical/degenerate code. Just bail out.
1238 if (isa<Constant>(A))
1239 return false;
1240
1241 ICmpInst::Predicate Pred = Cmp->getPredicate();
1242 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1243 B = ConstantInt::get(B->getType(), 1);
1244 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1245 B = ConstantInt::get(B->getType(), -1);
1246 else
1247 return false;
1248
1249 // Check the users of the variable operand of the compare looking for an add
1250 // with the adjusted constant.
1251 for (User *U : A->users()) {
1252 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1253 Add = cast<BinaryOperator>(U);
1254 return true;
1255 }
1256 }
1257 return false;
1258}
1259
1260/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1261/// intrinsic. Return true if any changes were made.
1262bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1263 bool &ModifiedDT) {
1264 Value *A, *B;
1265 BinaryOperator *Add;
1266 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
1267 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1268 return false;
1269
1270 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1271 TLI->getValueType(*DL, Add->getType())))
1272 return false;
1273
1274 // We don't want to move around uses of condition values this late, so we
1275 // check if it is legal to create the call to the intrinsic in the basic
1276 // block containing the icmp.
1277 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1278 return false;
1279
1280 if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
1281 return false;
1282
1283 // Reset callers - do not crash by iterating over a dead instruction.
1284 ModifiedDT = true;
1285 return true;
1286}
1287
1288bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1289 bool &ModifiedDT) {
1290 // We are not expecting non-canonical/degenerate code. Just bail out.
1291 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1292 if (isa<Constant>(A) && isa<Constant>(B))
1293 return false;
1294
1295 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1296 ICmpInst::Predicate Pred = Cmp->getPredicate();
1297 if (Pred == ICmpInst::ICMP_UGT) {
1298 std::swap(A, B);
1299 Pred = ICmpInst::ICMP_ULT;
1300 }
1301 // Convert special-case: (A == 0) is the same as (A u< 1).
1302 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1303 B = ConstantInt::get(B->getType(), 1);
1304 Pred = ICmpInst::ICMP_ULT;
1305 }
1306 // Convert special-case: (A != 0) is the same as (0 u< A).
1307 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1308 std::swap(A, B);
1309 Pred = ICmpInst::ICMP_ULT;
1310 }
1311 if (Pred != ICmpInst::ICMP_ULT)
1312 return false;
1313
1314 // Walk the users of a variable operand of a compare looking for a subtract or
1315 // add with that same operand. Also match the 2nd operand of the compare to
1316 // the add/sub, but that may be a negated constant operand of an add.
1317 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1318 BinaryOperator *Sub = nullptr;
1319 for (User *U : CmpVariableOperand->users()) {
1320 // A - B, A u< B --> usubo(A, B)
1321 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1322 Sub = cast<BinaryOperator>(U);
1323 break;
1324 }
1325
1326 // A + (-C), A u< C (canonicalized form of (sub A, C))
1327 const APInt *CmpC, *AddC;
1328 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1329 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1330 Sub = cast<BinaryOperator>(U);
1331 break;
1332 }
1333 }
1334 if (!Sub)
1335 return false;
1336
1337 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1338 TLI->getValueType(*DL, Sub->getType())))
1339 return false;
1340
1341 if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
1342 return false;
1343
1344 // Reset callers - do not crash by iterating over a dead instruction.
1345 ModifiedDT = true;
1346 return true;
1347}
1348
1349/// Sink the given CmpInst into user blocks to reduce the number of virtual
1350/// registers that must be created and coalesced. This is a clear win except on
1351/// targets with multiple condition code registers (PowerPC), where it might
1352/// lose; some adjustment may be wanted there.
1353///
1354/// Return true if any changes are made.
1355static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1356 if (TLI.hasMultipleConditionRegisters())
1357 return false;
1358
1359 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1360 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1361 return false;
1362
1363 // Only insert a cmp in each block once.
1364 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1365
1366 bool MadeChange = false;
1367 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1368 UI != E; ) {
1369 Use &TheUse = UI.getUse();
1370 Instruction *User = cast<Instruction>(*UI);
1371
1372 // Preincrement use iterator so we don't invalidate it.
1373 ++UI;
1374
1375 // Don't bother for PHI nodes.
1376 if (isa<PHINode>(User))
1377 continue;
1378
1379 // Figure out which BB this cmp is used in.
1380 BasicBlock *UserBB = User->getParent();
1381 BasicBlock *DefBB = Cmp->getParent();
1382
1383 // If this user is in the same block as the cmp, don't change the cmp.
1384 if (UserBB == DefBB) continue;
1385
1386 // If we have already inserted a cmp into this block, use it.
1387 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1388
1389 if (!InsertedCmp) {
1390 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1391 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1391, __PRETTY_FUNCTION__))
;
1392 InsertedCmp =
1393 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1394 Cmp->getOperand(0), Cmp->getOperand(1), "",
1395 &*InsertPt);
1396 // Propagate the debug info.
1397 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1398 }
1399
1400 // Replace a use of the cmp with a use of the new cmp.
1401 TheUse = InsertedCmp;
1402 MadeChange = true;
1403 ++NumCmpUses;
1404 }
1405
1406 // If we removed all uses, nuke the cmp.
1407 if (Cmp->use_empty()) {
1408 Cmp->eraseFromParent();
1409 MadeChange = true;
1410 }
1411
1412 return MadeChange;
1413}
1414
1415/// For pattern like:
1416///
1417/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1418/// ...
1419/// DomBB:
1420/// ...
1421/// br DomCond, TrueBB, CmpBB
1422/// CmpBB: (with DomBB being the single predecessor)
1423/// ...
1424/// Cmp = icmp eq CmpOp0, CmpOp1
1425/// ...
1426///
1427/// It would use two comparison on targets that lowering of icmp sgt/slt is
1428/// different from lowering of icmp eq (PowerPC). This function try to convert
1429/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1430/// After that, DomCond and Cmp can use the same comparison so reduce one
1431/// comparison.
1432///
1433/// Return true if any changes are made.
1434static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
1435 const TargetLowering &TLI) {
1436 if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1437 return false;
1438
1439 ICmpInst::Predicate Pred = Cmp->getPredicate();
1440 if (Pred != ICmpInst::ICMP_EQ)
1441 return false;
1442
1443 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1444 // icmp slt/sgt would introduce more redundant LLVM IR.
1445 for (User *U : Cmp->users()) {
1446 if (isa<BranchInst>(U))
1447 continue;
1448 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1449 continue;
1450 return false;
1451 }
1452
1453 // This is a cheap/incomplete check for dominance - just match a single
1454 // predecessor with a conditional branch.
1455 BasicBlock *CmpBB = Cmp->getParent();
1456 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1457 if (!DomBB)
1458 return false;
1459
1460 // We want to ensure that the only way control gets to the comparison of
1461 // interest is that a less/greater than comparison on the same operands is
1462 // false.
1463 Value *DomCond;
1464 BasicBlock *TrueBB, *FalseBB;
1465 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1466 return false;
1467 if (CmpBB != FalseBB)
1468 return false;
1469
1470 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1471 ICmpInst::Predicate DomPred;
1472 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1473 return false;
1474 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1475 return false;
1476
1477 // Convert the equality comparison to the opposite of the dominating
1478 // comparison and swap the direction for all branch/select users.
1479 // We have conceptually converted:
1480 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1481 // to
1482 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1483 // And similarly for branches.
1484 for (User *U : Cmp->users()) {
1485 if (auto *BI = dyn_cast<BranchInst>(U)) {
1486 assert(BI->isConditional() && "Must be conditional")((BI->isConditional() && "Must be conditional") ? static_cast
<void> (0) : __assert_fail ("BI->isConditional() && \"Must be conditional\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1486, __PRETTY_FUNCTION__))
;
1487 BI->swapSuccessors();
1488 continue;
1489 }
1490 if (auto *SI = dyn_cast<SelectInst>(U)) {
1491 // Swap operands
1492 SI->swapValues();
1493 SI->swapProfMetadata();
1494 continue;
1495 }
1496 llvm_unreachable("Must be a branch or a select")::llvm::llvm_unreachable_internal("Must be a branch or a select"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1496)
;
1497 }
1498 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1499 return true;
1500}
1501
1502bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1503 if (sinkCmpExpression(Cmp, *TLI))
1504 return true;
1505
1506 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1507 return true;
1508
1509 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1510 return true;
1511
1512 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1513 return true;
1514
1515 return false;
1516}
1517
1518/// Duplicate and sink the given 'and' instruction into user blocks where it is
1519/// used in a compare to allow isel to generate better code for targets where
1520/// this operation can be combined.
1521///
1522/// Return true if any changes are made.
1523static bool sinkAndCmp0Expression(Instruction *AndI,
1524 const TargetLowering &TLI,
1525 SetOfInstrs &InsertedInsts) {
1526 // Double-check that we're not trying to optimize an instruction that was
1527 // already optimized by some other part of this pass.
1528 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1529, __PRETTY_FUNCTION__))
1529 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1529, __PRETTY_FUNCTION__))
;
1530 (void) InsertedInsts;
1531
1532 // Nothing to do for single use in same basic block.
1533 if (AndI->hasOneUse() &&
1534 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1535 return false;
1536
1537 // Try to avoid cases where sinking/duplicating is likely to increase register
1538 // pressure.
1539 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1540 !isa<ConstantInt>(AndI->getOperand(1)) &&
1541 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1542 return false;
1543
1544 for (auto *U : AndI->users()) {
1545 Instruction *User = cast<Instruction>(U);
1546
1547 // Only sink 'and' feeding icmp with 0.
1548 if (!isa<ICmpInst>(User))
1549 return false;
1550
1551 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1552 if (!CmpC || !CmpC->isZero())
1553 return false;
1554 }
1555
1556 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1557 return false;
1558
1559 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1560 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1561
1562 // Push the 'and' into the same block as the icmp 0. There should only be
1563 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1564 // others, so we don't need to keep track of which BBs we insert into.
1565 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1566 UI != E; ) {
1567 Use &TheUse = UI.getUse();
1568 Instruction *User = cast<Instruction>(*UI);
1569
1570 // Preincrement use iterator so we don't invalidate it.
1571 ++UI;
1572
1573 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1574
1575 // Keep the 'and' in the same place if the use is already in the same block.
1576 Instruction *InsertPt =
1577 User->getParent() == AndI->getParent() ? AndI : User;
1578 Instruction *InsertedAnd =
1579 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1580 AndI->getOperand(1), "", InsertPt);
1581 // Propagate the debug info.
1582 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1583
1584 // Replace a use of the 'and' with a use of the new 'and'.
1585 TheUse = InsertedAnd;
1586 ++NumAndUses;
1587 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1588 }
1589
1590 // We removed all uses, nuke the and.
1591 AndI->eraseFromParent();
1592 return true;
1593}
1594
1595/// Check if the candidates could be combined with a shift instruction, which
1596/// includes:
1597/// 1. Truncate instruction
1598/// 2. And instruction and the imm is a mask of the low bits:
1599/// imm & (imm+1) == 0
1600static bool isExtractBitsCandidateUse(Instruction *User) {
1601 if (!isa<TruncInst>(User)) {
1602 if (User->getOpcode() != Instruction::And ||
1603 !isa<ConstantInt>(User->getOperand(1)))
1604 return false;
1605
1606 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1607
1608 if ((Cimm & (Cimm + 1)).getBoolValue())
1609 return false;
1610 }
1611 return true;
1612}
1613
1614/// Sink both shift and truncate instruction to the use of truncate's BB.
1615static bool
1616SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1617 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1618 const TargetLowering &TLI, const DataLayout &DL) {
1619 BasicBlock *UserBB = User->getParent();
1620 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1621 auto *TruncI = cast<TruncInst>(User);
1622 bool MadeChange = false;
1623
1624 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1625 TruncE = TruncI->user_end();
1626 TruncUI != TruncE;) {
1627
1628 Use &TruncTheUse = TruncUI.getUse();
1629 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1630 // Preincrement use iterator so we don't invalidate it.
1631
1632 ++TruncUI;
1633
1634 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1635 if (!ISDOpcode)
1636 continue;
1637
1638 // If the use is actually a legal node, there will not be an
1639 // implicit truncate.
1640 // FIXME: always querying the result type is just an
1641 // approximation; some nodes' legality is determined by the
1642 // operand or other means. There's no good way to find out though.
1643 if (TLI.isOperationLegalOrCustom(
1644 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1645 continue;
1646
1647 // Don't bother for PHI nodes.
1648 if (isa<PHINode>(TruncUser))
1649 continue;
1650
1651 BasicBlock *TruncUserBB = TruncUser->getParent();
1652
1653 if (UserBB == TruncUserBB)
1654 continue;
1655
1656 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1657 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1658
1659 if (!InsertedShift && !InsertedTrunc) {
1660 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1661 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1661, __PRETTY_FUNCTION__))
;
1662 // Sink the shift
1663 if (ShiftI->getOpcode() == Instruction::AShr)
1664 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1665 "", &*InsertPt);
1666 else
1667 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1668 "", &*InsertPt);
1669 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1670
1671 // Sink the trunc
1672 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1673 TruncInsertPt++;
1674 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1674, __PRETTY_FUNCTION__))
;
1675
1676 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1677 TruncI->getType(), "", &*TruncInsertPt);
1678 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1679
1680 MadeChange = true;
1681
1682 TruncTheUse = InsertedTrunc;
1683 }
1684 }
1685 return MadeChange;
1686}
1687
1688/// Sink the shift *right* instruction into user blocks if the uses could
1689/// potentially be combined with this shift instruction and generate BitExtract
1690/// instruction. It will only be applied if the architecture supports BitExtract
1691/// instruction. Here is an example:
1692/// BB1:
1693/// %x.extract.shift = lshr i64 %arg1, 32
1694/// BB2:
1695/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1696/// ==>
1697///
1698/// BB2:
1699/// %x.extract.shift.1 = lshr i64 %arg1, 32
1700/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1701///
1702/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1703/// instruction.
1704/// Return true if any changes are made.
1705static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1706 const TargetLowering &TLI,
1707 const DataLayout &DL) {
1708 BasicBlock *DefBB = ShiftI->getParent();
1709
1710 /// Only insert instructions in each block once.
1711 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1712
1713 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1714
1715 bool MadeChange = false;
1716 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1717 UI != E;) {
1718 Use &TheUse = UI.getUse();
1719 Instruction *User = cast<Instruction>(*UI);
1720 // Preincrement use iterator so we don't invalidate it.
1721 ++UI;
1722
1723 // Don't bother for PHI nodes.
1724 if (isa<PHINode>(User))
1725 continue;
1726
1727 if (!isExtractBitsCandidateUse(User))
1728 continue;
1729
1730 BasicBlock *UserBB = User->getParent();
1731
1732 if (UserBB == DefBB) {
1733 // If the shift and truncate instruction are in the same BB. The use of
1734 // the truncate(TruncUse) may still introduce another truncate if not
1735 // legal. In this case, we would like to sink both shift and truncate
1736 // instruction to the BB of TruncUse.
1737 // for example:
1738 // BB1:
1739 // i64 shift.result = lshr i64 opnd, imm
1740 // trunc.result = trunc shift.result to i16
1741 //
1742 // BB2:
1743 // ----> We will have an implicit truncate here if the architecture does
1744 // not have i16 compare.
1745 // cmp i16 trunc.result, opnd2
1746 //
1747 if (isa<TruncInst>(User) && shiftIsLegal
1748 // If the type of the truncate is legal, no truncate will be
1749 // introduced in other basic blocks.
1750 &&
1751 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1752 MadeChange =
1753 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1754
1755 continue;
1756 }
1757 // If we have already inserted a shift into this block, use it.
1758 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1759
1760 if (!InsertedShift) {
1761 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1762 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1762, __PRETTY_FUNCTION__))
;
1763
1764 if (ShiftI->getOpcode() == Instruction::AShr)
1765 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1766 "", &*InsertPt);
1767 else
1768 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1769 "", &*InsertPt);
1770 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1771
1772 MadeChange = true;
1773 }
1774
1775 // Replace a use of the shift with a use of the new shift.
1776 TheUse = InsertedShift;
1777 }
1778
1779 // If we removed all uses, or there are none, nuke the shift.
1780 if (ShiftI->use_empty()) {
1781 salvageDebugInfo(*ShiftI);
1782 ShiftI->eraseFromParent();
1783 MadeChange = true;
1784 }
1785
1786 return MadeChange;
1787}
1788
1789/// If counting leading or trailing zeros is an expensive operation and a zero
1790/// input is defined, add a check for zero to avoid calling the intrinsic.
1791///
1792/// We want to transform:
1793/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1794///
1795/// into:
1796/// entry:
1797/// %cmpz = icmp eq i64 %A, 0
1798/// br i1 %cmpz, label %cond.end, label %cond.false
1799/// cond.false:
1800/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1801/// br label %cond.end
1802/// cond.end:
1803/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1804///
1805/// If the transform is performed, return true and set ModifiedDT to true.
1806static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1807 const TargetLowering *TLI,
1808 const DataLayout *DL,
1809 bool &ModifiedDT) {
1810 if (!TLI || !DL)
1811 return false;
1812
1813 // If a zero input is undefined, it doesn't make sense to despeculate that.
1814 if (match(CountZeros->getOperand(1), m_One()))
1815 return false;
1816
1817 // If it's cheap to speculate, there's nothing to do.
1818 auto IntrinsicID = CountZeros->getIntrinsicID();
1819 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1820 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1821 return false;
1822
1823 // Only handle legal scalar cases. Anything else requires too much work.
1824 Type *Ty = CountZeros->getType();
1825 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1826 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1827 return false;
1828
1829 // The intrinsic will be sunk behind a compare against zero and branch.
1830 BasicBlock *StartBlock = CountZeros->getParent();
1831 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1832
1833 // Create another block after the count zero intrinsic. A PHI will be added
1834 // in this block to select the result of the intrinsic or the bit-width
1835 // constant if the input to the intrinsic is zero.
1836 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1837 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1838
1839 // Set up a builder to create a compare, conditional branch, and PHI.
1840 IRBuilder<> Builder(CountZeros->getContext());
1841 Builder.SetInsertPoint(StartBlock->getTerminator());
1842 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1843
1844 // Replace the unconditional branch that was created by the first split with
1845 // a compare against zero and a conditional branch.
1846 Value *Zero = Constant::getNullValue(Ty);
1847 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1848 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1849 StartBlock->getTerminator()->eraseFromParent();
1850
1851 // Create a PHI in the end block to select either the output of the intrinsic
1852 // or the bit width of the operand.
1853 Builder.SetInsertPoint(&EndBlock->front());
1854 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1855 CountZeros->replaceAllUsesWith(PN);
1856 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1857 PN->addIncoming(BitWidth, StartBlock);
1858 PN->addIncoming(CountZeros, CallBlock);
1859
1860 // We are explicitly handling the zero case, so we can set the intrinsic's
1861 // undefined zero argument to 'true'. This will also prevent reprocessing the
1862 // intrinsic; we only despeculate when a zero input is defined.
1863 CountZeros->setArgOperand(1, Builder.getTrue());
1864 ModifiedDT = true;
1865 return true;
1866}
1867
1868bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1869 BasicBlock *BB = CI->getParent();
1870
1871 // Lower inline assembly if we can.
1872 // If we found an inline asm expession, and if the target knows how to
1873 // lower it to normal LLVM code, do so now.
1874 if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1875 if (TLI->ExpandInlineAsm(CI)) {
1876 // Avoid invalidating the iterator.
1877 CurInstIterator = BB->begin();
1878 // Avoid processing instructions out of order, which could cause
1879 // reuse before a value is defined.
1880 SunkAddrs.clear();
1881 return true;
1882 }
1883 // Sink address computing for memory operands into the block.
1884 if (optimizeInlineAsmInst(CI))
1885 return true;
1886 }
1887
1888 // Align the pointer arguments to this call if the target thinks it's a good
1889 // idea
1890 unsigned MinSize, PrefAlign;
1891 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1892 for (auto &Arg : CI->arg_operands()) {
1893 // We want to align both objects whose address is used directly and
1894 // objects whose address is used in casts and GEPs, though it only makes
1895 // sense for GEPs if the offset is a multiple of the desired alignment and
1896 // if size - offset meets the size threshold.
1897 if (!Arg->getType()->isPointerTy())
1898 continue;
1899 APInt Offset(DL->getIndexSizeInBits(
1900 cast<PointerType>(Arg->getType())->getAddressSpace()),
1901 0);
1902 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1903 uint64_t Offset2 = Offset.getLimitedValue();
1904 if ((Offset2 & (PrefAlign-1)) != 0)
1905 continue;
1906 AllocaInst *AI;
1907 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1908 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1909 AI->setAlignment(MaybeAlign(PrefAlign));
1910 // Global variables can only be aligned if they are defined in this
1911 // object (i.e. they are uniquely initialized in this object), and
1912 // over-aligning global variables that have an explicit section is
1913 // forbidden.
1914 GlobalVariable *GV;
1915 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1916 GV->getPointerAlignment(*DL) < PrefAlign &&
1917 DL->getTypeAllocSize(GV->getValueType()) >=
1918 MinSize + Offset2)
1919 GV->setAlignment(MaybeAlign(PrefAlign));
1920 }
1921 // If this is a memcpy (or similar) then we may be able to improve the
1922 // alignment
1923 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1924 unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1925 if (DestAlign > MI->getDestAlignment())
1926 MI->setDestAlignment(DestAlign);
1927 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1928 unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1929 if (SrcAlign > MTI->getSourceAlignment())
1930 MTI->setSourceAlignment(SrcAlign);
1931 }
1932 }
1933 }
1934
1935 // If we have a cold call site, try to sink addressing computation into the
1936 // cold block. This interacts with our handling for loads and stores to
1937 // ensure that we can fold all uses of a potential addressing computation
1938 // into their uses. TODO: generalize this to work over profiling data
1939 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1940 for (auto &Arg : CI->arg_operands()) {
1941 if (!Arg->getType()->isPointerTy())
1942 continue;
1943 unsigned AS = Arg->getType()->getPointerAddressSpace();
1944 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1945 }
1946
1947 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1948 if (II) {
1949 switch (II->getIntrinsicID()) {
1950 default: break;
1951 case Intrinsic::experimental_widenable_condition: {
1952 // Give up on future widening oppurtunties so that we can fold away dead
1953 // paths and merge blocks before going into block-local instruction
1954 // selection.
1955 if (II->use_empty()) {
1956 II->eraseFromParent();
1957 return true;
1958 }
1959 Constant *RetVal = ConstantInt::getTrue(II->getContext());
1960 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1961 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1962 });
1963 return true;
1964 }
1965 case Intrinsic::objectsize:
1966 llvm_unreachable("llvm.objectsize.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.objectsize.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1966)
;
1967 case Intrinsic::is_constant:
1968 llvm_unreachable("llvm.is.constant.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.is.constant.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1968)
;
1969 case Intrinsic::aarch64_stlxr:
1970 case Intrinsic::aarch64_stxr: {
1971 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1972 if (!ExtVal || !ExtVal->hasOneUse() ||
1973 ExtVal->getParent() == CI->getParent())
1974 return false;
1975 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1976 ExtVal->moveBefore(CI);
1977 // Mark this instruction as "inserted by CGP", so that other
1978 // optimizations don't touch it.
1979 InsertedInsts.insert(ExtVal);
1980 return true;
1981 }
1982
1983 case Intrinsic::launder_invariant_group:
1984 case Intrinsic::strip_invariant_group: {
1985 Value *ArgVal = II->getArgOperand(0);
1986 auto it = LargeOffsetGEPMap.find(II);
1987 if (it != LargeOffsetGEPMap.end()) {
1988 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1989 // Make sure not to have to deal with iterator invalidation
1990 // after possibly adding ArgVal to LargeOffsetGEPMap.
1991 auto GEPs = std::move(it->second);
1992 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
1993 LargeOffsetGEPMap.erase(II);
1994 }
1995
1996 II->replaceAllUsesWith(ArgVal);
1997 II->eraseFromParent();
1998 return true;
1999 }
2000 case Intrinsic::cttz:
2001 case Intrinsic::ctlz:
2002 // If counting zeros is expensive, try to avoid it.
2003 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2004 }
2005
2006 if (TLI) {
2007 SmallVector<Value*, 2> PtrOps;
2008 Type *AccessTy;
2009 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2010 while (!PtrOps.empty()) {
2011 Value *PtrVal = PtrOps.pop_back_val();
2012 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2013 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2014 return true;
2015 }
2016 }
2017 }
2018
2019 // From here on out we're working with named functions.
2020 if (!CI->getCalledFunction()) return false;
2021
2022 // Lower all default uses of _chk calls. This is very similar
2023 // to what InstCombineCalls does, but here we are only lowering calls
2024 // to fortified library functions (e.g. __memcpy_chk) that have the default
2025 // "don't know" as the objectsize. Anything else should be left alone.
2026 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2027 if (Value *V = Simplifier.optimizeCall(CI)) {
2028 CI->replaceAllUsesWith(V);
2029 CI->eraseFromParent();
2030 return true;
2031 }
2032
2033 return false;
2034}
2035
2036/// Look for opportunities to duplicate return instructions to the predecessor
2037/// to enable tail call optimizations. The case it is currently looking for is:
2038/// @code
2039/// bb0:
2040/// %tmp0 = tail call i32 @f0()
2041/// br label %return
2042/// bb1:
2043/// %tmp1 = tail call i32 @f1()
2044/// br label %return
2045/// bb2:
2046/// %tmp2 = tail call i32 @f2()
2047/// br label %return
2048/// return:
2049/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2050/// ret i32 %retval
2051/// @endcode
2052///
2053/// =>
2054///
2055/// @code
2056/// bb0:
2057/// %tmp0 = tail call i32 @f0()
2058/// ret i32 %tmp0
2059/// bb1:
2060/// %tmp1 = tail call i32 @f1()
2061/// ret i32 %tmp1
2062/// bb2:
2063/// %tmp2 = tail call i32 @f2()
2064/// ret i32 %tmp2
2065/// @endcode
2066bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
2067 if (!TLI)
2068 return false;
2069
2070 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2071 if (!RetI)
2072 return false;
2073
2074 PHINode *PN = nullptr;
2075 BitCastInst *BCI = nullptr;
2076 Value *V = RetI->getReturnValue();
2077 if (V) {
2078 BCI = dyn_cast<BitCastInst>(V);
2079 if (BCI)
2080 V = BCI->getOperand(0);
2081
2082 PN = dyn_cast<PHINode>(V);
2083 if (!PN)
2084 return false;
2085 }
2086
2087 if (PN && PN->getParent() != BB)
2088 return false;
2089
2090 // Make sure there are no instructions between the PHI and return, or that the
2091 // return is the first instruction in the block.
2092 if (PN) {
2093 BasicBlock::iterator BI = BB->begin();
2094 // Skip over debug and the bitcast.
2095 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
2096 if (&*BI != RetI)
2097 return false;
2098 } else {
2099 BasicBlock::iterator BI = BB->begin();
2100 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2101 if (&*BI != RetI)
2102 return false;
2103 }
2104
2105 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2106 /// call.
2107 const Function *F = BB->getParent();
2108 SmallVector<BasicBlock*, 4> TailCallBBs;
2109 if (PN) {
2110 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2111 // Look through bitcasts.
2112 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2113 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2114 BasicBlock *PredBB = PN->getIncomingBlock(I);
2115 // Make sure the phi value is indeed produced by the tail call.
2116 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2117 TLI->mayBeEmittedAsTailCall(CI) &&
2118 attributesPermitTailCall(F, CI, RetI, *TLI))
2119 TailCallBBs.push_back(PredBB);
2120 }
2121 } else {
2122 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2123 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2124 if (!VisitedBBs.insert(*PI).second)
2125 continue;
2126
2127 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2128 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2129 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2130 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2131 if (RI == RE)
2132 continue;
2133
2134 CallInst *CI = dyn_cast<CallInst>(&*RI);
2135 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2136 attributesPermitTailCall(F, CI, RetI, *TLI))
2137 TailCallBBs.push_back(*PI);
2138 }
2139 }
2140
2141 bool Changed = false;
2142 for (auto const &TailCallBB : TailCallBBs) {
2143 // Make sure the call instruction is followed by an unconditional branch to
2144 // the return block.
2145 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2146 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2147 continue;
2148
2149 // Duplicate the return into TailCallBB.
2150 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2151 ModifiedDT = Changed = true;
2152 ++NumRetsDup;
2153 }
2154
2155 // If we eliminated all predecessors of the block, delete the block now.
2156 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2157 BB->eraseFromParent();
2158
2159 return Changed;
2160}
2161
2162//===----------------------------------------------------------------------===//
2163// Memory Optimization
2164//===----------------------------------------------------------------------===//
2165
2166namespace {
2167
2168/// This is an extended version of TargetLowering::AddrMode
2169/// which holds actual Value*'s for register values.
2170struct ExtAddrMode : public TargetLowering::AddrMode {
2171 Value *BaseReg = nullptr;
2172 Value *ScaledReg = nullptr;
2173 Value *OriginalValue = nullptr;
2174 bool InBounds = true;
2175
2176 enum FieldName {
2177 NoField = 0x00,
2178 BaseRegField = 0x01,
2179 BaseGVField = 0x02,
2180 BaseOffsField = 0x04,
2181 ScaledRegField = 0x08,
2182 ScaleField = 0x10,
2183 MultipleFields = 0xff
2184 };
2185
2186
2187 ExtAddrMode() = default;
2188
2189 void print(raw_ostream &OS) const;
2190 void dump() const;
2191
2192 FieldName compare(const ExtAddrMode &other) {
2193 // First check that the types are the same on each field, as differing types
2194 // is something we can't cope with later on.
2195 if (BaseReg && other.BaseReg &&
2196 BaseReg->getType() != other.BaseReg->getType())
2197 return MultipleFields;
2198 if (BaseGV && other.BaseGV &&
2199 BaseGV->getType() != other.BaseGV->getType())
2200 return MultipleFields;
2201 if (ScaledReg && other.ScaledReg &&
2202 ScaledReg->getType() != other.ScaledReg->getType())
2203 return MultipleFields;
2204
2205 // Conservatively reject 'inbounds' mismatches.
2206 if (InBounds != other.InBounds)
2207 return MultipleFields;
2208
2209 // Check each field to see if it differs.
2210 unsigned Result = NoField;
2211 if (BaseReg != other.BaseReg)
2212 Result |= BaseRegField;
2213 if (BaseGV != other.BaseGV)
2214 Result |= BaseGVField;
2215 if (BaseOffs != other.BaseOffs)
2216 Result |= BaseOffsField;
2217 if (ScaledReg != other.ScaledReg)
2218 Result |= ScaledRegField;
2219 // Don't count 0 as being a different scale, because that actually means
2220 // unscaled (which will already be counted by having no ScaledReg).
2221 if (Scale && other.Scale && Scale != other.Scale)
2222 Result |= ScaleField;
2223
2224 if (countPopulation(Result) > 1)
2225 return MultipleFields;
2226 else
2227 return static_cast<FieldName>(Result);
2228 }
2229
2230 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2231 // with no offset.
2232 bool isTrivial() {
2233 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2234 // trivial if at most one of these terms is nonzero, except that BaseGV and
2235 // BaseReg both being zero actually means a null pointer value, which we
2236 // consider to be 'non-zero' here.
2237 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2238 }
2239
2240 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2241 switch (Field) {
2242 default:
2243 return nullptr;
2244 case BaseRegField:
2245 return BaseReg;
2246 case BaseGVField:
2247 return BaseGV;
2248 case ScaledRegField:
2249 return ScaledReg;
2250 case BaseOffsField:
2251 return ConstantInt::get(IntPtrTy, BaseOffs);
2252 }
2253 }
2254
2255 void SetCombinedField(FieldName Field, Value *V,
2256 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2257 switch (Field) {
2258 default:
2259 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2259)
;
2260 break;
2261 case ExtAddrMode::BaseRegField:
2262 BaseReg = V;
2263 break;
2264 case ExtAddrMode::BaseGVField:
2265 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2266 // in the BaseReg field.
2267 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2267, __PRETTY_FUNCTION__))
;
2268 BaseReg = V;
2269 BaseGV = nullptr;
2270 break;
2271 case ExtAddrMode::ScaledRegField:
2272 ScaledReg = V;
2273 // If we have a mix of scaled and unscaled addrmodes then we want scale
2274 // to be the scale and not zero.
2275 if (!Scale)
2276 for (const ExtAddrMode &AM : AddrModes)
2277 if (AM.Scale) {
2278 Scale = AM.Scale;
2279 break;
2280 }
2281 break;
2282 case ExtAddrMode::BaseOffsField:
2283 // The offset is no longer a constant, so it goes in ScaledReg with a
2284 // scale of 1.
2285 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2285, __PRETTY_FUNCTION__))
;
2286 ScaledReg = V;
2287 Scale = 1;
2288 BaseOffs = 0;
2289 break;
2290 }
2291 }
2292};
2293
2294} // end anonymous namespace
2295
2296#ifndef NDEBUG
2297static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2298 AM.print(OS);
2299 return OS;
2300}
2301#endif
2302
2303#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2304void ExtAddrMode::print(raw_ostream &OS) const {
2305 bool NeedPlus = false;
2306 OS << "[";
2307 if (InBounds)
2308 OS << "inbounds ";
2309 if (BaseGV) {
2310 OS << (NeedPlus ? " + " : "")
2311 << "GV:";
2312 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2313 NeedPlus = true;
2314 }
2315
2316 if (BaseOffs) {
2317 OS << (NeedPlus ? " + " : "")
2318 << BaseOffs;
2319 NeedPlus = true;
2320 }
2321
2322 if (BaseReg) {
2323 OS << (NeedPlus ? " + " : "")
2324 << "Base:";
2325 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2326 NeedPlus = true;
2327 }
2328 if (Scale) {
2329 OS << (NeedPlus ? " + " : "")
2330 << Scale << "*";
2331 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2332 }
2333
2334 OS << ']';
2335}
2336
2337LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2338 print(dbgs());
2339 dbgs() << '\n';
2340}
2341#endif
2342
2343namespace {
2344
2345/// This class provides transaction based operation on the IR.
2346/// Every change made through this class is recorded in the internal state and
2347/// can be undone (rollback) until commit is called.
2348class TypePromotionTransaction {
2349 /// This represents the common interface of the individual transaction.
2350 /// Each class implements the logic for doing one specific modification on
2351 /// the IR via the TypePromotionTransaction.
2352 class TypePromotionAction {
2353 protected:
2354 /// The Instruction modified.
2355 Instruction *Inst;
2356
2357 public:
2358 /// Constructor of the action.
2359 /// The constructor performs the related action on the IR.
2360 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2361
2362 virtual ~TypePromotionAction() = default;
2363
2364 /// Undo the modification done by this action.
2365 /// When this method is called, the IR must be in the same state as it was
2366 /// before this action was applied.
2367 /// \pre Undoing the action works if and only if the IR is in the exact same
2368 /// state as it was directly after this action was applied.
2369 virtual void undo() = 0;
2370
2371 /// Advocate every change made by this action.
2372 /// When the results on the IR of the action are to be kept, it is important
2373 /// to call this function, otherwise hidden information may be kept forever.
2374 virtual void commit() {
2375 // Nothing to be done, this action is not doing anything.
2376 }
2377 };
2378
2379 /// Utility to remember the position of an instruction.
2380 class InsertionHandler {
2381 /// Position of an instruction.
2382 /// Either an instruction:
2383 /// - Is the first in a basic block: BB is used.
2384 /// - Has a previous instruction: PrevInst is used.
2385 union {
2386 Instruction *PrevInst;
2387 BasicBlock *BB;
2388 } Point;
2389
2390 /// Remember whether or not the instruction had a previous instruction.
2391 bool HasPrevInstruction;
2392
2393 public:
2394 /// Record the position of \p Inst.
2395 InsertionHandler(Instruction *Inst) {
2396 BasicBlock::iterator It = Inst->getIterator();
2397 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2398 if (HasPrevInstruction)
2399 Point.PrevInst = &*--It;
2400 else
2401 Point.BB = Inst->getParent();
2402 }
2403
2404 /// Insert \p Inst at the recorded position.
2405 void insert(Instruction *Inst) {
2406 if (HasPrevInstruction) {
2407 if (Inst->getParent())
2408 Inst->removeFromParent();
2409 Inst->insertAfter(Point.PrevInst);
2410 } else {
2411 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2412 if (Inst->getParent())
2413 Inst->moveBefore(Position);
2414 else
2415 Inst->insertBefore(Position);
2416 }
2417 }
2418 };
2419
2420 /// Move an instruction before another.
2421 class InstructionMoveBefore : public TypePromotionAction {
2422 /// Original position of the instruction.
2423 InsertionHandler Position;
2424
2425 public:
2426 /// Move \p Inst before \p Before.
2427 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2428 : TypePromotionAction(Inst), Position(Inst) {
2429 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2430 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2431 Inst->moveBefore(Before);
2432 }
2433
2434 /// Move the instruction back to its original position.
2435 void undo() override {
2436 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2437 Position.insert(Inst);
2438 }
2439 };
2440
2441 /// Set the operand of an instruction with a new value.
2442 class OperandSetter : public TypePromotionAction {
2443 /// Original operand of the instruction.
2444 Value *Origin;
2445
2446 /// Index of the modified instruction.
2447 unsigned Idx;
2448
2449 public:
2450 /// Set \p Idx operand of \p Inst with \p NewVal.
2451 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2452 : TypePromotionAction(Inst), Idx(Idx) {
2453 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2454 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2455 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2456 Origin = Inst->getOperand(Idx);
2457 Inst->setOperand(Idx, NewVal);
2458 }
2459
2460 /// Restore the original value of the instruction.
2461 void undo() override {
2462 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2463 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2464 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2465 Inst->setOperand(Idx, Origin);
2466 }
2467 };
2468
2469 /// Hide the operands of an instruction.
2470 /// Do as if this instruction was not using any of its operands.
2471 class OperandsHider : public TypePromotionAction {
2472 /// The list of original operands.
2473 SmallVector<Value *, 4> OriginalValues;
2474
2475 public:
2476 /// Remove \p Inst from the uses of the operands of \p Inst.
2477 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2478 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2479 unsigned NumOpnds = Inst->getNumOperands();
2480 OriginalValues.reserve(NumOpnds);
2481 for (unsigned It = 0; It < NumOpnds; ++It) {
2482 // Save the current operand.
2483 Value *Val = Inst->getOperand(It);
2484 OriginalValues.push_back(Val);
2485 // Set a dummy one.
2486 // We could use OperandSetter here, but that would imply an overhead
2487 // that we are not willing to pay.
2488 Inst->setOperand(It, UndefValue::get(Val->getType()));
2489 }
2490 }
2491
2492 /// Restore the original list of uses.
2493 void undo() override {
2494 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2495 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2496 Inst->setOperand(It, OriginalValues[It]);
2497 }
2498 };
2499
2500 /// Build a truncate instruction.
2501 class TruncBuilder : public TypePromotionAction {
2502 Value *Val;
2503
2504 public:
2505 /// Build a truncate instruction of \p Opnd producing a \p Ty
2506 /// result.
2507 /// trunc Opnd to Ty.
2508 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2509 IRBuilder<> Builder(Opnd);
2510 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2511 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2512 }
2513
2514 /// Get the built value.
2515 Value *getBuiltValue() { return Val; }
2516
2517 /// Remove the built instruction.
2518 void undo() override {
2519 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2520 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2521 IVal->eraseFromParent();
2522 }
2523 };
2524
2525 /// Build a sign extension instruction.
2526 class SExtBuilder : public TypePromotionAction {
2527 Value *Val;
2528
2529 public:
2530 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2531 /// result.
2532 /// sext Opnd to Ty.
2533 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2534 : TypePromotionAction(InsertPt) {
2535 IRBuilder<> Builder(InsertPt);
2536 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2537 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2538 }
2539
2540 /// Get the built value.
2541 Value *getBuiltValue() { return Val; }
2542
2543 /// Remove the built instruction.
2544 void undo() override {
2545 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2546 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2547 IVal->eraseFromParent();
2548 }
2549 };
2550
2551 /// Build a zero extension instruction.
2552 class ZExtBuilder : public TypePromotionAction {
2553 Value *Val;
2554
2555 public:
2556 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2557 /// result.
2558 /// zext Opnd to Ty.
2559 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2560 : TypePromotionAction(InsertPt) {
2561 IRBuilder<> Builder(InsertPt);
2562 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2563 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2564 }
2565
2566 /// Get the built value.
2567 Value *getBuiltValue() { return Val; }
2568
2569 /// Remove the built instruction.
2570 void undo() override {
2571 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2572 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2573 IVal->eraseFromParent();
2574 }
2575 };
2576
2577 /// Mutate an instruction to another type.
2578 class TypeMutator : public TypePromotionAction {
2579 /// Record the original type.
2580 Type *OrigTy;
2581
2582 public:
2583 /// Mutate the type of \p Inst into \p NewTy.
2584 TypeMutator(Instruction *Inst, Type *NewTy)
2585 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2586 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2587 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2588 Inst->mutateType(NewTy);
2589 }
2590
2591 /// Mutate the instruction back to its original type.
2592 void undo() override {
2593 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2594 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2595 Inst->mutateType(OrigTy);
2596 }
2597 };
2598
2599 /// Replace the uses of an instruction by another instruction.
2600 class UsesReplacer : public TypePromotionAction {
2601 /// Helper structure to keep track of the replaced uses.
2602 struct InstructionAndIdx {
2603 /// The instruction using the instruction.
2604 Instruction *Inst;
2605
2606 /// The index where this instruction is used for Inst.
2607 unsigned Idx;
2608
2609 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2610 : Inst(Inst), Idx(Idx) {}
2611 };
2612
2613 /// Keep track of the original uses (pair Instruction, Index).
2614 SmallVector<InstructionAndIdx, 4> OriginalUses;
2615 /// Keep track of the debug users.
2616 SmallVector<DbgValueInst *, 1> DbgValues;
2617
2618 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2619
2620 public:
2621 /// Replace all the use of \p Inst by \p New.
2622 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2623 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2624 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2625 // Record the original uses.
2626 for (Use &U : Inst->uses()) {
2627 Instruction *UserI = cast<Instruction>(U.getUser());
2628 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2629 }
2630 // Record the debug uses separately. They are not in the instruction's
2631 // use list, but they are replaced by RAUW.
2632 findDbgValues(DbgValues, Inst);
2633
2634 // Now, we can replace the uses.
2635 Inst->replaceAllUsesWith(New);
2636 }
2637
2638 /// Reassign the original uses of Inst to Inst.
2639 void undo() override {
2640 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2641 for (use_iterator UseIt = OriginalUses.begin(),
2642 EndIt = OriginalUses.end();
2643 UseIt != EndIt; ++UseIt) {
2644 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2645 }
2646 // RAUW has replaced all original uses with references to the new value,
2647 // including the debug uses. Since we are undoing the replacements,
2648 // the original debug uses must also be reinstated to maintain the
2649 // correctness and utility of debug value instructions.
2650 for (auto *DVI: DbgValues) {
2651 LLVMContext &Ctx = Inst->getType()->getContext();
2652 auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2653 DVI->setOperand(0, MV);
2654 }
2655 }
2656 };
2657
2658 /// Remove an instruction from the IR.
2659 class InstructionRemover : public TypePromotionAction {
2660 /// Original position of the instruction.
2661 InsertionHandler Inserter;
2662
2663 /// Helper structure to hide all the link to the instruction. In other
2664 /// words, this helps to do as if the instruction was removed.
2665 OperandsHider Hider;
2666
2667 /// Keep track of the uses replaced, if any.
2668 UsesReplacer *Replacer = nullptr;
2669
2670 /// Keep track of instructions removed.
2671 SetOfInstrs &RemovedInsts;
2672
2673 public:
2674 /// Remove all reference of \p Inst and optionally replace all its
2675 /// uses with New.
2676 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2677 /// \pre If !Inst->use_empty(), then New != nullptr
2678 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2679 Value *New = nullptr)
2680 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2681 RemovedInsts(RemovedInsts) {
2682 if (New)
2683 Replacer = new UsesReplacer(Inst, New);
2684 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2685 RemovedInsts.insert(Inst);
2686 /// The instructions removed here will be freed after completing
2687 /// optimizeBlock() for all blocks as we need to keep track of the
2688 /// removed instructions during promotion.
2689 Inst->removeFromParent();
2690 }
2691
2692 ~InstructionRemover() override { delete Replacer; }
2693
2694 /// Resurrect the instruction and reassign it to the proper uses if
2695 /// new value was provided when build this action.
2696 void undo() override {
2697 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2698 Inserter.insert(Inst);
2699 if (Replacer)
2700 Replacer->undo();
2701 Hider.undo();
2702 RemovedInsts.erase(Inst);
2703 }
2704 };
2705
2706public:
2707 /// Restoration point.
2708 /// The restoration point is a pointer to an action instead of an iterator
2709 /// because the iterator may be invalidated but not the pointer.
2710 using ConstRestorationPt = const TypePromotionAction *;
2711
2712 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2713 : RemovedInsts(RemovedInsts) {}
2714
2715 /// Advocate every changes made in that transaction.
2716 void commit();
2717
2718 /// Undo all the changes made after the given point.
2719 void rollback(ConstRestorationPt Point);
2720
2721 /// Get the current restoration point.
2722 ConstRestorationPt getRestorationPoint() const;
2723
2724 /// \name API for IR modification with state keeping to support rollback.
2725 /// @{
2726 /// Same as Instruction::setOperand.
2727 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2728
2729 /// Same as Instruction::eraseFromParent.
2730 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2731
2732 /// Same as Value::replaceAllUsesWith.
2733 void replaceAllUsesWith(Instruction *Inst, Value *New);
2734
2735 /// Same as Value::mutateType.
2736 void mutateType(Instruction *Inst, Type *NewTy);
2737
2738 /// Same as IRBuilder::createTrunc.
2739 Value *createTrunc(Instruction *Opnd, Type *Ty);
2740
2741 /// Same as IRBuilder::createSExt.
2742 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2743
2744 /// Same as IRBuilder::createZExt.
2745 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2746
2747 /// Same as Instruction::moveBefore.
2748 void moveBefore(Instruction *Inst, Instruction *Before);
2749 /// @}
2750
2751private:
2752 /// The ordered list of actions made so far.
2753 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2754
2755 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2756
2757 SetOfInstrs &RemovedInsts;
2758};
2759
2760} // end anonymous namespace
2761
2762void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2763 Value *NewVal) {
2764 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
2765 Inst, Idx, NewVal));
2766}
2767
2768void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2769 Value *NewVal) {
2770 Actions.push_back(
2771 std::make_unique<TypePromotionTransaction::InstructionRemover>(
2772 Inst, RemovedInsts, NewVal));
2773}
2774
2775void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2776 Value *New) {
2777 Actions.push_back(
2778 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2779}
2780
2781void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2782 Actions.push_back(
2783 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2784}
2785
2786Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2787 Type *Ty) {
2788 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2789 Value *Val = Ptr->getBuiltValue();
2790 Actions.push_back(std::move(Ptr));
2791 return Val;
2792}
2793
2794Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2795 Value *Opnd, Type *Ty) {
2796 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2797 Value *Val = Ptr->getBuiltValue();
2798 Actions.push_back(std::move(Ptr));
2799 return Val;
2800}
2801
2802Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2803 Value *Opnd, Type *Ty) {
2804 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2805 Value *Val = Ptr->getBuiltValue();
2806 Actions.push_back(std::move(Ptr));
2807 return Val;
2808}
2809
2810void TypePromotionTransaction::moveBefore(Instruction *Inst,
2811 Instruction *Before) {
2812 Actions.push_back(
2813 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2814 Inst, Before));
2815}
2816
2817TypePromotionTransaction::ConstRestorationPt
2818TypePromotionTransaction::getRestorationPoint() const {
2819 return !Actions.empty() ? Actions.back().get() : nullptr;
2820}
2821
2822void TypePromotionTransaction::commit() {
2823 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2824 ++It)
2825 (*It)->commit();
2826 Actions.clear();
2827}
2828
2829void TypePromotionTransaction::rollback(
2830 TypePromotionTransaction::ConstRestorationPt Point) {
2831 while (!Actions.empty() && Point != Actions.back().get()) {
2832 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2833 Curr->undo();
2834 }
2835}
2836
2837namespace {
2838
2839/// A helper class for matching addressing modes.
2840///
2841/// This encapsulates the logic for matching the target-legal addressing modes.
2842class AddressingModeMatcher {
2843 SmallVectorImpl<Instruction*> &AddrModeInsts;
2844 const TargetLowering &TLI;
2845 const TargetRegisterInfo &TRI;
2846 const DataLayout &DL;
2847
2848 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2849 /// the memory instruction that we're computing this address for.
2850 Type *AccessTy;
2851 unsigned AddrSpace;
2852 Instruction *MemoryInst;
2853
2854 /// This is the addressing mode that we're building up. This is
2855 /// part of the return value of this addressing mode matching stuff.
2856 ExtAddrMode &AddrMode;
2857
2858 /// The instructions inserted by other CodeGenPrepare optimizations.
2859 const SetOfInstrs &InsertedInsts;
2860
2861 /// A map from the instructions to their type before promotion.
2862 InstrToOrigTy &PromotedInsts;
2863
2864 /// The ongoing transaction where every action should be registered.
2865 TypePromotionTransaction &TPT;
2866
2867 // A GEP which has too large offset to be folded into the addressing mode.
2868 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2869
2870 /// This is set to true when we should not do profitability checks.
2871 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2872 bool IgnoreProfitability;
2873
2874 AddressingModeMatcher(
2875 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2876 const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2877 ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2878 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2879 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2880 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2881 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2882 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2883 PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2884 IgnoreProfitability = false;
2885 }
2886
2887public:
2888 /// Find the maximal addressing mode that a load/store of V can fold,
2889 /// give an access type of AccessTy. This returns a list of involved
2890 /// instructions in AddrModeInsts.
2891 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2892 /// optimizations.
2893 /// \p PromotedInsts maps the instructions to their type before promotion.
2894 /// \p The ongoing transaction where every action should be registered.
2895 static ExtAddrMode
2896 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2897 SmallVectorImpl<Instruction *> &AddrModeInsts,
2898 const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2899 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2900 TypePromotionTransaction &TPT,
2901 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2902 ExtAddrMode Result;
2903
2904 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2905 MemoryInst, Result, InsertedInsts,
2906 PromotedInsts, TPT, LargeOffsetGEP)
2907 .matchAddr(V, 0);
2908 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2908, __PRETTY_FUNCTION__))
;
2909 return Result;
2910 }
2911
2912private:
2913 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2914 bool matchAddr(Value *Addr, unsigned Depth);
2915 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2916 bool *MovedAway = nullptr);
2917 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2918 ExtAddrMode &AMBefore,
2919 ExtAddrMode &AMAfter);
2920 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2921 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2922 Value *PromotedOperand) const;
2923};
2924
2925class PhiNodeSet;
2926
2927/// An iterator for PhiNodeSet.
2928class PhiNodeSetIterator {
2929 PhiNodeSet * const Set;
2930 size_t CurrentIndex = 0;
2931
2932public:
2933 /// The constructor. Start should point to either a valid element, or be equal
2934 /// to the size of the underlying SmallVector of the PhiNodeSet.
2935 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2936 PHINode * operator*() const;
2937 PhiNodeSetIterator& operator++();
2938 bool operator==(const PhiNodeSetIterator &RHS) const;
2939 bool operator!=(const PhiNodeSetIterator &RHS) const;
2940};
2941
2942/// Keeps a set of PHINodes.
2943///
2944/// This is a minimal set implementation for a specific use case:
2945/// It is very fast when there are very few elements, but also provides good
2946/// performance when there are many. It is similar to SmallPtrSet, but also
2947/// provides iteration by insertion order, which is deterministic and stable
2948/// across runs. It is also similar to SmallSetVector, but provides removing
2949/// elements in O(1) time. This is achieved by not actually removing the element
2950/// from the underlying vector, so comes at the cost of using more memory, but
2951/// that is fine, since PhiNodeSets are used as short lived objects.
2952class PhiNodeSet {
2953 friend class PhiNodeSetIterator;
2954
2955 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2956 using iterator = PhiNodeSetIterator;
2957
2958 /// Keeps the elements in the order of their insertion in the underlying
2959 /// vector. To achieve constant time removal, it never deletes any element.
2960 SmallVector<PHINode *, 32> NodeList;
2961
2962 /// Keeps the elements in the underlying set implementation. This (and not the
2963 /// NodeList defined above) is the source of truth on whether an element
2964 /// is actually in the collection.
2965 MapType NodeMap;
2966
2967 /// Points to the first valid (not deleted) element when the set is not empty
2968 /// and the value is not zero. Equals to the size of the underlying vector
2969 /// when the set is empty. When the value is 0, as in the beginning, the
2970 /// first element may or may not be valid.
2971 size_t FirstValidElement = 0;
2972
2973public:
2974 /// Inserts a new element to the collection.
2975 /// \returns true if the element is actually added, i.e. was not in the
2976 /// collection before the operation.
2977 bool insert(PHINode *Ptr) {
2978 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2979 NodeList.push_back(Ptr);
2980 return true;
2981 }
2982 return false;
2983 }
2984
2985 /// Removes the element from the collection.
2986 /// \returns whether the element is actually removed, i.e. was in the
2987 /// collection before the operation.
2988 bool erase(PHINode *Ptr) {
2989 auto it = NodeMap.find(Ptr);
2990 if (it != NodeMap.end()) {
2991 NodeMap.erase(Ptr);
2992 SkipRemovedElements(FirstValidElement);
2993 return true;
2994 }
2995 return false;
2996 }
2997
2998 /// Removes all elements and clears the collection.
2999 void clear() {
3000 NodeMap.clear();
3001 NodeList.clear();
3002 FirstValidElement = 0;
3003 }
3004
3005 /// \returns an iterator that will iterate the elements in the order of
3006 /// insertion.
3007 iterator begin() {
3008 if (FirstValidElement == 0)
3009 SkipRemovedElements(FirstValidElement);
3010 return PhiNodeSetIterator(this, FirstValidElement);
3011 }
3012
3013 /// \returns an iterator that points to the end of the collection.
3014 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3015
3016 /// Returns the number of elements in the collection.
3017 size_t size() const {
3018 return NodeMap.size();
3019 }
3020
3021 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3022 size_t count(PHINode *Ptr) const {
3023 return NodeMap.count(Ptr);
3024 }
3025
3026private:
3027 /// Updates the CurrentIndex so that it will point to a valid element.
3028 ///
3029 /// If the element of NodeList at CurrentIndex is valid, it does not
3030 /// change it. If there are no more valid elements, it updates CurrentIndex
3031 /// to point to the end of the NodeList.
3032 void SkipRemovedElements(size_t &CurrentIndex) {
3033 while (CurrentIndex < NodeList.size()) {
3034 auto it = NodeMap.find(NodeList[CurrentIndex]);
3035 // If the element has been deleted and added again later, NodeMap will
3036 // point to a different index, so CurrentIndex will still be invalid.
3037 if (it != NodeMap.end() && it->second == CurrentIndex)
3038 break;
3039 ++CurrentIndex;
3040 }
3041 }
3042};
3043
3044PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3045 : Set(Set), CurrentIndex(Start) {}
3046
3047PHINode * PhiNodeSetIterator::operator*() const {
3048 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3049, __PRETTY_FUNCTION__))
3049 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3049, __PRETTY_FUNCTION__))
;
3050 return Set->NodeList[CurrentIndex];
3051}
3052
3053PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
3054 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3055, __PRETTY_FUNCTION__))
3055 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3055, __PRETTY_FUNCTION__))
;
3056 ++CurrentIndex;
3057 Set->SkipRemovedElements(CurrentIndex);
3058 return *this;
3059}
3060
3061bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3062 return CurrentIndex == RHS.CurrentIndex;
3063}
3064
3065bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3066 return !((*this) == RHS);
3067}
3068
3069/// Keep track of simplification of Phi nodes.
3070/// Accept the set of all phi nodes and erase phi node from this set
3071/// if it is simplified.
3072class SimplificationTracker {
3073 DenseMap<Value *, Value *> Storage;
3074 const SimplifyQuery &SQ;
3075 // Tracks newly created Phi nodes. The elements are iterated by insertion
3076 // order.
3077 PhiNodeSet AllPhiNodes;
3078 // Tracks newly created Select nodes.
3079 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3080
3081public:
3082 SimplificationTracker(const SimplifyQuery &sq)
3083 : SQ(sq) {}
3084
3085 Value *Get(Value *V) {
3086 do {
3087 auto SV = Storage.find(V);
3088 if (SV == Storage.end())
3089 return V;
3090 V = SV->second;
3091 } while (true);
3092 }
3093
3094 Value *Simplify(Value *Val) {
3095 SmallVector<Value *, 32> WorkList;
3096 SmallPtrSet<Value *, 32> Visited;
3097 WorkList.push_back(Val);
3098 while (!WorkList.empty()) {
3099 auto P = WorkList.pop_back_val();
3100 if (!Visited.insert(P).second)
3101 continue;
3102 if (auto *PI = dyn_cast<Instruction>(P))
3103 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3104 for (auto *U : PI->users())
3105 WorkList.push_back(cast<Value>(U));
3106 Put(PI, V);
3107 PI->replaceAllUsesWith(V);
3108 if (auto *PHI = dyn_cast<PHINode>(PI))
3109 AllPhiNodes.erase(PHI);
3110 if (auto *Select = dyn_cast<SelectInst>(PI))
3111 AllSelectNodes.erase(Select);
3112 PI->eraseFromParent();
3113 }
3114 }
3115 return Get(Val);
3116 }
3117
3118 void Put(Value *From, Value *To) {
3119 Storage.insert({ From, To });
3120 }
3121
3122 void ReplacePhi(PHINode *From, PHINode *To) {
3123 Value* OldReplacement = Get(From);
3124 while (OldReplacement != From) {
34
Assuming 'OldReplacement' is not equal to 'From'
35
Loop condition is true. Entering loop body
38
Assuming 'OldReplacement' is not equal to 'From'
39
Loop condition is true. Entering loop body
42
Assuming 'OldReplacement' is equal to 'From'
43
Loop condition is false. Execution continues on line 3129
3125 From = To;
40
Null pointer value stored to 'From'
3126 To = dyn_cast<PHINode>(OldReplacement);
36
Assuming 'OldReplacement' is not a 'PHINode'
37
Null pointer value stored to 'To'
41
Assuming 'OldReplacement' is a 'PHINode'
3127 OldReplacement = Get(From);
3128 }
3129 assert
43.1
'To' is non-null
(To && Get(To) == To && "Replacement PHI node is already replaced.")((To && Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("To && Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3129, __PRETTY_FUNCTION__))
;
44
Assuming the condition is true
45
'?' condition is true
3130 Put(From, To);
3131 From->replaceAllUsesWith(To);
46
Called C++ object pointer is null
3132 AllPhiNodes.erase(From);
3133 From->eraseFromParent();
3134 }
3135
3136 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3137
3138 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3139
3140 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3141
3142 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3143
3144 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3145
3146 void destroyNewNodes(Type *CommonType) {
3147 // For safe erasing, replace the uses with dummy value first.
3148 auto Dummy = UndefValue::get(CommonType);
3149 for (auto I : AllPhiNodes) {
3150 I->replaceAllUsesWith(Dummy);
3151 I->eraseFromParent();
3152 }
3153 AllPhiNodes.clear();
3154 for (auto I : AllSelectNodes) {
3155 I->replaceAllUsesWith(Dummy);
3156 I->eraseFromParent();
3157 }
3158 AllSelectNodes.clear();
3159 }
3160};
3161
3162/// A helper class for combining addressing modes.
3163class AddressingModeCombiner {
3164 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3165 typedef std::pair<PHINode *, PHINode *> PHIPair;
3166
3167private:
3168 /// The addressing modes we've collected.
3169 SmallVector<ExtAddrMode, 16> AddrModes;
3170
3171 /// The field in which the AddrModes differ, when we have more than one.
3172 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3173
3174 /// Are the AddrModes that we have all just equal to their original values?
3175 bool AllAddrModesTrivial = true;
3176
3177 /// Common Type for all different fields in addressing modes.
3178 Type *CommonType;
3179
3180 /// SimplifyQuery for simplifyInstruction utility.
3181 const SimplifyQuery &SQ;
3182
3183 /// Original Address.
3184 Value *Original;
3185
3186public:
3187 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3188 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3189
3190 /// Get the combined AddrMode
3191 const ExtAddrMode &getAddrMode() const {
3192 return AddrModes[0];
3193 }
3194
3195 /// Add a new AddrMode if it's compatible with the AddrModes we already
3196 /// have.
3197 /// \return True iff we succeeded in doing so.
3198 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3199 // Take note of if we have any non-trivial AddrModes, as we need to detect
3200 // when all AddrModes are trivial as then we would introduce a phi or select
3201 // which just duplicates what's already there.
3202 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3203
3204 // If this is the first addrmode then everything is fine.
3205 if (AddrModes.empty()) {
3206 AddrModes.emplace_back(NewAddrMode);
3207 return true;
3208 }
3209
3210 // Figure out how different this is from the other address modes, which we
3211 // can do just by comparing against the first one given that we only care
3212 // about the cumulative difference.
3213 ExtAddrMode::FieldName ThisDifferentField =
3214 AddrModes[0].compare(NewAddrMode);
3215 if (DifferentField == ExtAddrMode::NoField)
3216 DifferentField = ThisDifferentField;
3217 else if (DifferentField != ThisDifferentField)
3218 DifferentField = ExtAddrMode::MultipleFields;
3219
3220 // If NewAddrMode differs in more than one dimension we cannot handle it.
3221 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3222
3223 // If Scale Field is different then we reject.
3224 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3225
3226 // We also must reject the case when base offset is different and
3227 // scale reg is not null, we cannot handle this case due to merge of
3228 // different offsets will be used as ScaleReg.
3229 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3230 !NewAddrMode.ScaledReg);
3231
3232 // We also must reject the case when GV is different and BaseReg installed
3233 // due to we want to use base reg as a merge of GV values.
3234 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3235 !NewAddrMode.HasBaseReg);
3236
3237 // Even if NewAddMode is the same we still need to collect it due to
3238 // original value is different. And later we will need all original values
3239 // as anchors during finding the common Phi node.
3240 if (CanHandle)
3241 AddrModes.emplace_back(NewAddrMode);
3242 else
3243 AddrModes.clear();
3244
3245 return CanHandle;
3246 }
3247
3248 /// Combine the addressing modes we've collected into a single
3249 /// addressing mode.
3250 /// \return True iff we successfully combined them or we only had one so
3251 /// didn't need to combine them anyway.
3252 bool combineAddrModes() {
3253 // If we have no AddrModes then they can't be combined.
3254 if (AddrModes.size() == 0)
12
Assuming the condition is false
13
Taking false branch
3255 return false;
3256
3257 // A single AddrMode can trivially be combined.
3258 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
14
Assuming the condition is false
15
Assuming field 'DifferentField' is not equal to NoField
16
Taking false branch
3259 return true;
3260
3261 // If the AddrModes we collected are all just equal to the value they are
3262 // derived from then combining them wouldn't do anything useful.
3263 if (AllAddrModesTrivial)
17
Assuming field 'AllAddrModesTrivial' is false
18
Taking false branch
3264 return false;
3265
3266 if (!addrModeCombiningAllowed())
19
Assuming the condition is false
20
Taking false branch
3267 return false;
3268
3269 // Build a map between <original value, basic block where we saw it> to
3270 // value of base register.
3271 // Bail out if there is no common type.
3272 FoldAddrToValueMapping Map;
3273 if (!initializeMap(Map))
21
Taking false branch
3274 return false;
3275
3276 Value *CommonValue = findCommon(Map);
22
Calling 'AddressingModeCombiner::findCommon'
3277 if (CommonValue)
3278 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3279 return CommonValue != nullptr;
3280 }
3281
3282private:
3283 /// Initialize Map with anchor values. For address seen
3284 /// we set the value of different field saw in this address.
3285 /// At the same time we find a common type for different field we will
3286 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3287 /// Return false if there is no common type found.
3288 bool initializeMap(FoldAddrToValueMapping &Map) {
3289 // Keep track of keys where the value is null. We will need to replace it
3290 // with constant null when we know the common type.
3291 SmallVector<Value *, 2> NullValue;
3292 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3293 for (auto &AM : AddrModes) {
3294 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3295 if (DV) {
3296 auto *Type = DV->getType();
3297 if (CommonType && CommonType != Type)
3298 return false;
3299 CommonType = Type;
3300 Map[AM.OriginalValue] = DV;
3301 } else {
3302 NullValue.push_back(AM.OriginalValue);
3303 }
3304 }
3305 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3305, __PRETTY_FUNCTION__))
;
3306 for (auto *V : NullValue)
3307 Map[V] = Constant::getNullValue(CommonType);
3308 return true;
3309 }
3310
3311 /// We have mapping between value A and other value B where B was a field in
3312 /// addressing mode represented by A. Also we have an original value C
3313 /// representing an address we start with. Traversing from C through phi and
3314 /// selects we ended up with A's in a map. This utility function tries to find
3315 /// a value V which is a field in addressing mode C and traversing through phi
3316 /// nodes and selects we will end up in corresponded values B in a map.
3317 /// The utility will create a new Phi/Selects if needed.
3318 // The simple example looks as follows:
3319 // BB1:
3320 // p1 = b1 + 40
3321 // br cond BB2, BB3
3322 // BB2:
3323 // p2 = b2 + 40
3324 // br BB3
3325 // BB3:
3326 // p = phi [p1, BB1], [p2, BB2]
3327 // v = load p
3328 // Map is
3329 // p1 -> b1
3330 // p2 -> b2
3331 // Request is
3332 // p -> ?
3333 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3334 Value *findCommon(FoldAddrToValueMapping &Map) {
3335 // Tracks the simplification of newly created phi nodes. The reason we use
3336 // this mapping is because we will add new created Phi nodes in AddrToBase.
3337 // Simplification of Phi nodes is recursive, so some Phi node may
3338 // be simplified after we added it to AddrToBase. In reality this
3339 // simplification is possible only if original phi/selects were not
3340 // simplified yet.
3341 // Using this mapping we can find the current value in AddrToBase.
3342 SimplificationTracker ST(SQ);
3343
3344 // First step, DFS to create PHI nodes for all intermediate blocks.
3345 // Also fill traverse order for the second step.
3346 SmallVector<Value *, 32> TraverseOrder;
3347 InsertPlaceholders(Map, TraverseOrder, ST);
3348
3349 // Second Step, fill new nodes by merged values and simplify if possible.
3350 FillPlaceholders(Map, TraverseOrder, ST);
3351
3352 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
23
Assuming the condition is false
3353 ST.destroyNewNodes(CommonType);
3354 return nullptr;
3355 }
3356
3357 // Now we'd like to match New Phi nodes to existed ones.
3358 unsigned PhiNotMatchedCount = 0;
3359 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
24
Calling 'AddressingModeCombiner::MatchPhiSet'
3360 ST.destroyNewNodes(CommonType);
3361 return nullptr;
3362 }
3363
3364 auto *Result = ST.Get(Map.find(Original)->second);
3365 if (Result) {
3366 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3367 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3368 }
3369 return Result;
3370 }
3371
3372 /// Try to match PHI node to Candidate.
3373 /// Matcher tracks the matched Phi nodes.
3374 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3375 SmallSetVector<PHIPair, 8> &Matcher,
3376 PhiNodeSet &PhiNodesToMatch) {
3377 SmallVector<PHIPair, 8> WorkList;
3378 Matcher.insert({ PHI, Candidate });
3379 SmallSet<PHINode *, 8> MatchedPHIs;
3380 MatchedPHIs.insert(PHI);
3381 WorkList.push_back({ PHI, Candidate });
3382 SmallSet<PHIPair, 8> Visited;
3383 while (!WorkList.empty()) {
3384 auto Item = WorkList.pop_back_val();
3385 if (!Visited.insert(Item).second)
3386 continue;
3387 // We iterate over all incoming values to Phi to compare them.
3388 // If values are different and both of them Phi and the first one is a
3389 // Phi we added (subject to match) and both of them is in the same basic
3390 // block then we can match our pair if values match. So we state that
3391 // these values match and add it to work list to verify that.
3392 for (auto B : Item.first->blocks()) {
3393 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3394 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3395 if (FirstValue == SecondValue)
3396 continue;
3397
3398 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3399 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3400
3401 // One of them is not Phi or
3402 // The first one is not Phi node from the set we'd like to match or
3403 // Phi nodes from different basic blocks then
3404 // we will not be able to match.
3405 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3406 FirstPhi->getParent() != SecondPhi->getParent())
3407 return false;
3408
3409 // If we already matched them then continue.
3410 if (Matcher.count({ FirstPhi, SecondPhi }))
3411 continue;
3412 // So the values are different and does not match. So we need them to
3413 // match. (But we register no more than one match per PHI node, so that
3414 // we won't later try to replace them twice.)
3415 if (MatchedPHIs.insert(FirstPhi).second)
3416 Matcher.insert({ FirstPhi, SecondPhi });
3417 // But me must check it.
3418 WorkList.push_back({ FirstPhi, SecondPhi });
3419 }
3420 }
3421 return true;
3422 }
3423
3424 /// For the given set of PHI nodes (in the SimplificationTracker) try
3425 /// to find their equivalents.
3426 /// Returns false if this matching fails and creation of new Phi is disabled.
3427 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3428 unsigned &PhiNotMatchedCount) {
3429 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3430 // order, so the replacements (ReplacePhi) are also done in a deterministic
3431 // order.
3432 SmallSetVector<PHIPair, 8> Matched;
3433 SmallPtrSet<PHINode *, 8> WillNotMatch;
3434 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3435 while (PhiNodesToMatch.size()) {
25
Loop condition is true. Entering loop body
3436 PHINode *PHI = *PhiNodesToMatch.begin();
3437
3438 // Add us, if no Phi nodes in the basic block we do not match.
3439 WillNotMatch.clear();
3440 WillNotMatch.insert(PHI);
3441
3442 // Traverse all Phis until we found equivalent or fail to do that.
3443 bool IsMatched = false;
3444 for (auto &P : PHI->getParent()->phis()) {
3445 if (&P == PHI)
26
Assuming the condition is false
27
Taking false branch
3446 continue;
3447 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
28
Assuming 'IsMatched' is true
29
Taking true branch
3448 break;
30
Execution continues on line 3456
3449 // If it does not match, collect all Phi nodes from matcher.
3450 // if we end up with no match, them all these Phi nodes will not match
3451 // later.
3452 for (auto M : Matched)
3453 WillNotMatch.insert(M.first);
3454 Matched.clear();
3455 }
3456 if (IsMatched
30.1
'IsMatched' is true
) {
31
Taking true branch
3457 // Replace all matched values and erase them.
3458 for (auto MV : Matched)
32
Assuming '__begin4' is not equal to '__end4'
3459 ST.ReplacePhi(MV.first, MV.second);
33
Calling 'SimplificationTracker::ReplacePhi'
3460 Matched.clear();
3461 continue;
3462 }
3463 // If we are not allowed to create new nodes then bail out.
3464 if (!AllowNewPhiNodes)
3465 return false;
3466 // Just remove all seen values in matcher. They will not match anything.
3467 PhiNotMatchedCount += WillNotMatch.size();
3468 for (auto *P : WillNotMatch)
3469 PhiNodesToMatch.erase(P);
3470 }
3471 return true;
3472 }
3473 /// Fill the placeholders with values from predecessors and simplify them.
3474 void FillPlaceholders(FoldAddrToValueMapping &Map,
3475 SmallVectorImpl<Value *> &TraverseOrder,
3476 SimplificationTracker &ST) {
3477 while (!TraverseOrder.empty()) {
3478 Value *Current = TraverseOrder.pop_back_val();
3479 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3479, __PRETTY_FUNCTION__))
;
3480 Value *V = Map[Current];
3481
3482 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3483 // CurrentValue also must be Select.
3484 auto *CurrentSelect = cast<SelectInst>(Current);
3485 auto *TrueValue = CurrentSelect->getTrueValue();
3486 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3486, __PRETTY_FUNCTION__))
;
3487 Select->setTrueValue(ST.Get(Map[TrueValue]));
3488 auto *FalseValue = CurrentSelect->getFalseValue();
3489 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3489, __PRETTY_FUNCTION__))
;
3490 Select->setFalseValue(ST.Get(Map[FalseValue]));
3491 } else {
3492 // Must be a Phi node then.
3493 auto *PHI = cast<PHINode>(V);
3494 // Fill the Phi node with values from predecessors.
3495 for (auto B : predecessors(PHI->getParent())) {
3496 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
3497 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3497, __PRETTY_FUNCTION__))
;
3498 PHI->addIncoming(ST.Get(Map[PV]), B);
3499 }
3500 }
3501 Map[Current] = ST.Simplify(V);
3502 }
3503 }
3504
3505 /// Starting from original value recursively iterates over def-use chain up to
3506 /// known ending values represented in a map. For each traversed phi/select
3507 /// inserts a placeholder Phi or Select.
3508 /// Reports all new created Phi/Select nodes by adding them to set.
3509 /// Also reports and order in what values have been traversed.
3510 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3511 SmallVectorImpl<Value *> &TraverseOrder,
3512 SimplificationTracker &ST) {
3513 SmallVector<Value *, 32> Worklist;
3514 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3515, __PRETTY_FUNCTION__))
3515 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3515, __PRETTY_FUNCTION__))
;
3516 auto *Dummy = UndefValue::get(CommonType);
3517 Worklist.push_back(Original);
3518 while (!Worklist.empty()) {
3519 Value *Current = Worklist.pop_back_val();
3520 // if it is already visited or it is an ending value then skip it.
3521 if (Map.find(Current) != Map.end())
3522 continue;
3523 TraverseOrder.push_back(Current);
3524
3525 // CurrentValue must be a Phi node or select. All others must be covered
3526 // by anchors.
3527 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3528 // Is it OK to get metadata from OrigSelect?!
3529 // Create a Select placeholder with dummy value.
3530 SelectInst *Select = SelectInst::Create(
3531 CurrentSelect->getCondition(), Dummy, Dummy,
3532 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3533 Map[Current] = Select;
3534 ST.insertNewSelect(Select);
3535 // We are interested in True and False values.
3536 Worklist.push_back(CurrentSelect->getTrueValue());
3537 Worklist.push_back(CurrentSelect->getFalseValue());
3538 } else {
3539 // It must be a Phi node then.
3540 PHINode *CurrentPhi = cast<PHINode>(Current);
3541 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3542 PHINode *PHI =
3543 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3544 Map[Current] = PHI;
3545 ST.insertNewPhi(PHI);
3546 for (Value *P : CurrentPhi->incoming_values())
3547 Worklist.push_back(P);
3548 }
3549 }
3550 }
3551
3552 bool addrModeCombiningAllowed() {
3553 if (DisableComplexAddrModes)
3554 return false;
3555 switch (DifferentField) {
3556 default:
3557 return false;
3558 case ExtAddrMode::BaseRegField:
3559 return AddrSinkCombineBaseReg;
3560 case ExtAddrMode::BaseGVField:
3561 return AddrSinkCombineBaseGV;
3562 case ExtAddrMode::BaseOffsField:
3563 return AddrSinkCombineBaseOffs;
3564 case ExtAddrMode::ScaledRegField:
3565 return AddrSinkCombineScaledReg;
3566 }
3567 }
3568};
3569} // end anonymous namespace
3570
3571/// Try adding ScaleReg*Scale to the current addressing mode.
3572/// Return true and update AddrMode if this addr mode is legal for the target,
3573/// false if not.
3574bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3575 unsigned Depth) {
3576 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3577 // mode. Just process that directly.
3578 if (Scale == 1)
3579 return matchAddr(ScaleReg, Depth);
3580
3581 // If the scale is 0, it takes nothing to add this.
3582 if (Scale == 0)
3583 return true;
3584
3585 // If we already have a scale of this value, we can add to it, otherwise, we
3586 // need an available scale field.
3587 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3588 return false;
3589
3590 ExtAddrMode TestAddrMode = AddrMode;
3591
3592 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3593 // [A+B + A*7] -> [B+A*8].
3594 TestAddrMode.Scale += Scale;
3595 TestAddrMode.ScaledReg = ScaleReg;
3596
3597 // If the new address isn't legal, bail out.
3598 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3599 return false;
3600
3601 // It was legal, so commit it.
3602 AddrMode = TestAddrMode;
3603
3604 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3605 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3606 // X*Scale + C*Scale to addr mode.
3607 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3608 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3609 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3610 TestAddrMode.InBounds = false;
3611 TestAddrMode.ScaledReg = AddLHS;
3612 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3613
3614 // If this addressing mode is legal, commit it and remember that we folded
3615 // this instruction.
3616 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3617 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3618 AddrMode = TestAddrMode;
3619 return true;
3620 }
3621 }
3622
3623 // Otherwise, not (x+c)*scale, just return what we have.
3624 return true;
3625}
3626
3627/// This is a little filter, which returns true if an addressing computation
3628/// involving I might be folded into a load/store accessing it.
3629/// This doesn't need to be perfect, but needs to accept at least
3630/// the set of instructions that MatchOperationAddr can.
3631static bool MightBeFoldableInst(Instruction *I) {
3632 switch (I->getOpcode()) {
3633 case Instruction::BitCast:
3634 case Instruction::AddrSpaceCast:
3635 // Don't touch identity bitcasts.
3636 if (I->getType() == I->getOperand(0)->getType())
3637 return false;
3638 return I->getType()->isIntOrPtrTy();
3639 case Instruction::PtrToInt:
3640 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3641 return true;
3642 case Instruction::IntToPtr:
3643 // We know the input is intptr_t, so this is foldable.
3644 return true;
3645 case Instruction::Add:
3646 return true;
3647 case Instruction::Mul:
3648 case Instruction::Shl:
3649 // Can only handle X*C and X << C.
3650 return isa<ConstantInt>(I->getOperand(1));
3651 case Instruction::GetElementPtr:
3652 return true;
3653 default:
3654 return false;
3655 }
3656}
3657
3658/// Check whether or not \p Val is a legal instruction for \p TLI.
3659/// \note \p Val is assumed to be the product of some type promotion.
3660/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3661/// to be legal, as the non-promoted value would have had the same state.
3662static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3663 const DataLayout &DL, Value *Val) {
3664 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3665 if (!PromotedInst)
3666 return false;
3667 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3668 // If the ISDOpcode is undefined, it was undefined before the promotion.
3669 if (!ISDOpcode)
3670 return true;
3671 // Otherwise, check if the promoted instruction is legal or not.
3672 return TLI.isOperationLegalOrCustom(
3673 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3674}
3675
3676namespace {
3677
3678/// Hepler class to perform type promotion.
3679class TypePromotionHelper {
3680 /// Utility function to add a promoted instruction \p ExtOpnd to
3681 /// \p PromotedInsts and record the type of extension we have seen.
3682 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3683 Instruction *ExtOpnd,
3684 bool IsSExt) {
3685 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3686 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3687 if (It != PromotedInsts.end()) {
3688 // If the new extension is same as original, the information in
3689 // PromotedInsts[ExtOpnd] is still correct.
3690 if (It->second.getInt() == ExtTy)
3691 return;
3692
3693 // Now the new extension is different from old extension, we make
3694 // the type information invalid by setting extension type to
3695 // BothExtension.
3696 ExtTy = BothExtension;
3697 }
3698 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3699 }
3700
3701 /// Utility function to query the original type of instruction \p Opnd
3702 /// with a matched extension type. If the extension doesn't match, we
3703 /// cannot use the information we had on the original type.
3704 /// BothExtension doesn't match any extension type.
3705 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3706 Instruction *Opnd,
3707 bool IsSExt) {
3708 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3709 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3710 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3711 return It->second.getPointer();
3712 return nullptr;
3713 }
3714
3715 /// Utility function to check whether or not a sign or zero extension
3716 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3717 /// either using the operands of \p Inst or promoting \p Inst.
3718 /// The type of the extension is defined by \p IsSExt.
3719 /// In other words, check if:
3720 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3721 /// #1 Promotion applies:
3722 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3723 /// #2 Operand reuses:
3724 /// ext opnd1 to ConsideredExtType.
3725 /// \p PromotedInsts maps the instructions to their type before promotion.
3726 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3727 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3728
3729 /// Utility function to determine if \p OpIdx should be promoted when
3730 /// promoting \p Inst.
3731 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3732 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3733 }
3734
3735 /// Utility function to promote the operand of \p Ext when this
3736 /// operand is a promotable trunc or sext or zext.
3737 /// \p PromotedInsts maps the instructions to their type before promotion.
3738 /// \p CreatedInstsCost[out] contains the cost of all instructions
3739 /// created to promote the operand of Ext.
3740 /// Newly added extensions are inserted in \p Exts.
3741 /// Newly added truncates are inserted in \p Truncs.
3742 /// Should never be called directly.
3743 /// \return The promoted value which is used instead of Ext.
3744 static Value *promoteOperandForTruncAndAnyExt(
3745 Instruction *Ext, TypePromotionTransaction &TPT,
3746 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3747 SmallVectorImpl<Instruction *> *Exts,
3748 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3749
3750 /// Utility function to promote the operand of \p Ext when this
3751 /// operand is promotable and is not a supported trunc or sext.
3752 /// \p PromotedInsts maps the instructions to their type before promotion.
3753 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3754 /// created to promote the operand of Ext.
3755 /// Newly added extensions are inserted in \p Exts.
3756 /// Newly added truncates are inserted in \p Truncs.
3757 /// Should never be called directly.
3758 /// \return The promoted value which is used instead of Ext.
3759 static Value *promoteOperandForOther(Instruction *Ext,
3760 TypePromotionTransaction &TPT,
3761 InstrToOrigTy &PromotedInsts,
3762 unsigned &CreatedInstsCost,
3763 SmallVectorImpl<Instruction *> *Exts,
3764 SmallVectorImpl<Instruction *> *Truncs,
3765 const TargetLowering &TLI, bool IsSExt);
3766
3767 /// \see promoteOperandForOther.
3768 static Value *signExtendOperandForOther(
3769 Instruction *Ext, TypePromotionTransaction &TPT,
3770 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3771 SmallVectorImpl<Instruction *> *Exts,
3772 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3773 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3774 Exts, Truncs, TLI, true);
3775 }
3776
3777 /// \see promoteOperandForOther.
3778 static Value *zeroExtendOperandForOther(
3779 Instruction *Ext, TypePromotionTransaction &TPT,
3780 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3781 SmallVectorImpl<Instruction *> *Exts,
3782 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3783 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3784 Exts, Truncs, TLI, false);
3785 }
3786
3787public:
3788 /// Type for the utility function that promotes the operand of Ext.
3789 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3790 InstrToOrigTy &PromotedInsts,
3791 unsigned &CreatedInstsCost,
3792 SmallVectorImpl<Instruction *> *Exts,
3793 SmallVectorImpl<Instruction *> *Truncs,
3794 const TargetLowering &TLI);
3795
3796 /// Given a sign/zero extend instruction \p Ext, return the appropriate
3797 /// action to promote the operand of \p Ext instead of using Ext.
3798 /// \return NULL if no promotable action is possible with the current
3799 /// sign extension.
3800 /// \p InsertedInsts keeps track of all the instructions inserted by the
3801 /// other CodeGenPrepare optimizations. This information is important
3802 /// because we do not want to promote these instructions as CodeGenPrepare
3803 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3804 /// \p PromotedInsts maps the instructions to their type before promotion.
3805 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3806 const TargetLowering &TLI,
3807 const InstrToOrigTy &PromotedInsts);
3808};
3809
3810} // end anonymous namespace
3811
3812bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3813 Type *ConsideredExtType,
3814 const InstrToOrigTy &PromotedInsts,
3815 bool IsSExt) {
3816 // The promotion helper does not know how to deal with vector types yet.
3817 // To be able to fix that, we would need to fix the places where we
3818 // statically extend, e.g., constants and such.
3819 if (Inst->getType()->isVectorTy())
3820 return false;
3821
3822 // We can always get through zext.
3823 if (isa<ZExtInst>(Inst))
3824 return true;
3825
3826 // sext(sext) is ok too.
3827 if (IsSExt && isa<SExtInst>(Inst))
3828 return true;
3829
3830 // We can get through binary operator, if it is legal. In other words, the
3831 // binary operator must have a nuw or nsw flag.
3832 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3833 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3834 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3835 (IsSExt && BinOp->hasNoSignedWrap())))
3836 return true;
3837
3838 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3839 if ((Inst->getOpcode() == Instruction::And ||
3840 Inst->getOpcode() == Instruction::Or))
3841 return true;
3842
3843 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3844 if (Inst->getOpcode() == Instruction::Xor) {
3845 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3846 // Make sure it is not a NOT.
3847 if (Cst && !Cst->getValue().isAllOnesValue())
3848 return true;
3849 }
3850
3851 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3852 // It may change a poisoned value into a regular value, like
3853 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3854 // poisoned value regular value
3855 // It should be OK since undef covers valid value.
3856 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3857 return true;
3858
3859 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3860 // It may change a poisoned value into a regular value, like
3861 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3862 // poisoned value regular value
3863 // It should be OK since undef covers valid value.
3864 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3865 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
3866 if (ExtInst->hasOneUse()) {
3867 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
3868 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3869 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3870 if (Cst &&
3871 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3872 return true;
3873 }
3874 }
3875 }
3876
3877 // Check if we can do the following simplification.
3878 // ext(trunc(opnd)) --> ext(opnd)
3879 if (!isa<TruncInst>(Inst))
3880 return false;
3881
3882 Value *OpndVal = Inst->getOperand(0);
3883 // Check if we can use this operand in the extension.
3884 // If the type is larger than the result type of the extension, we cannot.
3885 if (!OpndVal->getType()->isIntegerTy() ||
3886 OpndVal->getType()->getIntegerBitWidth() >
3887 ConsideredExtType->getIntegerBitWidth())
3888 return false;
3889
3890 // If the operand of the truncate is not an instruction, we will not have
3891 // any information on the dropped bits.
3892 // (Actually we could for constant but it is not worth the extra logic).
3893 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3894 if (!Opnd)
3895 return false;
3896
3897 // Check if the source of the type is narrow enough.
3898 // I.e., check that trunc just drops extended bits of the same kind of
3899 // the extension.
3900 // #1 get the type of the operand and check the kind of the extended bits.
3901 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3902 if (OpndType)
3903 ;
3904 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3905 OpndType = Opnd->getOperand(0)->getType();
3906 else
3907 return false;
3908
3909 // #2 check that the truncate just drops extended bits.
3910 return Inst->getType()->getIntegerBitWidth() >=
3911 OpndType->getIntegerBitWidth();
3912}
3913
3914TypePromotionHelper::Action TypePromotionHelper::getAction(
3915 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3916 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3917 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3918, __PRETTY_FUNCTION__))
3918 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3918, __PRETTY_FUNCTION__))
;
3919 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3920 Type *ExtTy = Ext->getType();
3921 bool IsSExt = isa<SExtInst>(Ext);
3922 // If the operand of the extension is not an instruction, we cannot
3923 // get through.
3924 // If it, check we can get through.
3925 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3926 return nullptr;
3927
3928 // Do not promote if the operand has been added by codegenprepare.
3929 // Otherwise, it means we are undoing an optimization that is likely to be
3930 // redone, thus causing potential infinite loop.
3931 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3932 return nullptr;
3933
3934 // SExt or Trunc instructions.
3935 // Return the related handler.
3936 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3937 isa<ZExtInst>(ExtOpnd))
3938 return promoteOperandForTruncAndAnyExt;
3939
3940 // Regular instruction.
3941 // Abort early if we will have to insert non-free instructions.
3942 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3943 return nullptr;
3944 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3945}
3946
3947Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3948 Instruction *SExt, TypePromotionTransaction &TPT,
3949 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3950 SmallVectorImpl<Instruction *> *Exts,
3951 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3952 // By construction, the operand of SExt is an instruction. Otherwise we cannot
3953 // get through it and this method should not be called.
3954 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3955 Value *ExtVal = SExt;
3956 bool HasMergedNonFreeExt = false;
3957 if (isa<ZExtInst>(SExtOpnd)) {
3958 // Replace s|zext(zext(opnd))
3959 // => zext(opnd).
3960 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3961 Value *ZExt =
3962 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3963 TPT.replaceAllUsesWith(SExt, ZExt);
3964 TPT.eraseInstruction(SExt);
3965 ExtVal = ZExt;
3966 } else {
3967 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3968 // => z|sext(opnd).
3969 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3970 }
3971 CreatedInstsCost = 0;
3972
3973 // Remove dead code.
3974 if (SExtOpnd->use_empty())
3975 TPT.eraseInstruction(SExtOpnd);
3976
3977 // Check if the extension is still needed.
3978 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3979 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3980 if (ExtInst) {
3981 if (Exts)
3982 Exts->push_back(ExtInst);
3983 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3984 }
3985 return ExtVal;
3986 }
3987
3988 // At this point we have: ext ty opnd to ty.
3989 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3990 Value *NextVal = ExtInst->getOperand(0);
3991 TPT.eraseInstruction(ExtInst, NextVal);
3992 return NextVal;
3993}
3994
3995Value *TypePromotionHelper::promoteOperandForOther(
3996 Instruction *Ext, TypePromotionTransaction &TPT,
3997 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3998 SmallVectorImpl<Instruction *> *Exts,
3999 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
4000 bool IsSExt) {
4001 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4002 // get through it and this method should not be called.
4003 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4004 CreatedInstsCost = 0;
4005 if (!ExtOpnd->hasOneUse()) {
4006 // ExtOpnd will be promoted.
4007 // All its uses, but Ext, will need to use a truncated value of the
4008 // promoted version.
4009 // Create the truncate now.
4010 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4011 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4012 // Insert it just after the definition.
4013 ITrunc->moveAfter(ExtOpnd);
4014 if (Truncs)
4015 Truncs->push_back(ITrunc);
4016 }
4017
4018 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4019 // Restore the operand of Ext (which has been replaced by the previous call
4020 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4021 TPT.setOperand(Ext, 0, ExtOpnd);
4022 }
4023
4024 // Get through the Instruction:
4025 // 1. Update its type.
4026 // 2. Replace the uses of Ext by Inst.
4027 // 3. Extend each operand that needs to be extended.
4028
4029 // Remember the original type of the instruction before promotion.
4030 // This is useful to know that the high bits are sign extended bits.
4031 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4032 // Step #1.
4033 TPT.mutateType(ExtOpnd, Ext->getType());
4034 // Step #2.
4035 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4036 // Step #3.
4037 Instruction *ExtForOpnd = Ext;
4038
4039 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
4040 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4041 ++OpIdx) {
4042 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
4043 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4044 !shouldExtOperand(ExtOpnd, OpIdx)) {
4045 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
4046 continue;
4047 }
4048 // Check if we can statically extend the operand.
4049 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4050 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4051 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4052 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4053 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4054 : Cst->getValue().zext(BitWidth);
4055 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4056 continue;
4057 }
4058 // UndefValue are typed, so we have to statically sign extend them.
4059 if (isa<UndefValue>(Opnd)) {
4060 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4061 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4062 continue;
4063 }
4064
4065 // Otherwise we have to explicitly sign extend the operand.
4066 // Check if Ext was reused to extend an operand.
4067 if (!ExtForOpnd) {
4068 // If yes, create a new one.
4069 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
4070 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
4071 : TPT.createZExt(Ext, Opnd, Ext->getType());
4072 if (!isa<Instruction>(ValForExtOpnd)) {
4073 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4074 continue;
4075 }
4076 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4077 }
4078 if (Exts)
4079 Exts->push_back(ExtForOpnd);
4080 TPT.setOperand(ExtForOpnd, 0, Opnd);
4081
4082 // Move the sign extension before the insertion point.
4083 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4084 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4085 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
4086 // If more sext are required, new instructions will have to be created.
4087 ExtForOpnd = nullptr;
4088 }
4089 if (ExtForOpnd == Ext) {
4090 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
4091 TPT.eraseInstruction(Ext);
4092 }
4093 return ExtOpnd;
4094}
4095
4096/// Check whether or not promoting an instruction to a wider type is profitable.
4097/// \p NewCost gives the cost of extension instructions created by the
4098/// promotion.
4099/// \p OldCost gives the cost of extension instructions before the promotion
4100/// plus the number of instructions that have been
4101/// matched in the addressing mode the promotion.
4102/// \p PromotedOperand is the value that has been promoted.
4103/// \return True if the promotion is profitable, false otherwise.
4104bool AddressingModeMatcher::isPromotionProfitable(
4105 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4106 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4107 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4108 // The cost of the new extensions is greater than the cost of the
4109 // old extension plus what we folded.
4110 // This is not profitable.
4111 if (NewCost > OldCost)
4112 return false;
4113 if (NewCost < OldCost)
4114 return true;
4115 // The promotion is neutral but it may help folding the sign extension in
4116 // loads for instance.
4117 // Check that we did not create an illegal instruction.
4118 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4119}
4120
4121/// Given an instruction or constant expr, see if we can fold the operation
4122/// into the addressing mode. If so, update the addressing mode and return
4123/// true, otherwise return false without modifying AddrMode.
4124/// If \p MovedAway is not NULL, it contains the information of whether or
4125/// not AddrInst has to be folded into the addressing mode on success.
4126/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4127/// because it has been moved away.
4128/// Thus AddrInst must not be added in the matched instructions.
4129/// This state can happen when AddrInst is a sext, since it may be moved away.
4130/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4131/// not be referenced anymore.
4132bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4133 unsigned Depth,
4134 bool *MovedAway) {
4135 // Avoid exponential behavior on extremely deep expression trees.
4136 if (Depth >= 5) return false;
4137
4138 // By default, all matched instructions stay in place.
4139 if (MovedAway)
4140 *MovedAway = false;
4141
4142 switch (Opcode) {
4143 case Instruction::PtrToInt:
4144 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4145 return matchAddr(AddrInst->getOperand(0), Depth);
4146 case Instruction::IntToPtr: {
4147 auto AS = AddrInst->getType()->getPointerAddressSpace();
4148 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4149 // This inttoptr is a no-op if the integer type is pointer sized.
4150 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4151 return matchAddr(AddrInst->getOperand(0), Depth);
4152 return false;
4153 }
4154 case Instruction::BitCast:
4155 // BitCast is always a noop, and we can handle it as long as it is
4156 // int->int or pointer->pointer (we don't want int<->fp or something).
4157 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4158 // Don't touch identity bitcasts. These were probably put here by LSR,
4159 // and we don't want to mess around with them. Assume it knows what it
4160 // is doing.
4161 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4162 return matchAddr(AddrInst->getOperand(0), Depth);
4163 return false;
4164 case Instruction::AddrSpaceCast: {
4165 unsigned SrcAS
4166 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4167 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4168 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
4169 return matchAddr(AddrInst->getOperand(0), Depth);
4170 return false;
4171 }
4172 case Instruction::Add: {
4173 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4174 ExtAddrMode BackupAddrMode = AddrMode;
4175 unsigned OldSize = AddrModeInsts.size();
4176 // Start a transaction at this point.
4177 // The LHS may match but not the RHS.
4178 // Therefore, we need a higher level restoration point to undo partially
4179 // matched operation.
4180 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4181 TPT.getRestorationPoint();
4182
4183 AddrMode.InBounds = false;
4184 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4185 matchAddr(AddrInst->getOperand(0), Depth+1))
4186 return true;
4187
4188 // Restore the old addr mode info.
4189 AddrMode = BackupAddrMode;
4190 AddrModeInsts.resize(OldSize);
4191 TPT.rollback(LastKnownGood);
4192
4193 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4194 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4195 matchAddr(AddrInst->getOperand(1), Depth+1))
4196 return true;
4197
4198 // Otherwise we definitely can't merge the ADD in.
4199 AddrMode = BackupAddrMode;
4200 AddrModeInsts.resize(OldSize);
4201 TPT.rollback(LastKnownGood);
4202 break;
4203 }
4204 //case Instruction::Or:
4205 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4206 //break;
4207 case Instruction::Mul:
4208 case Instruction::Shl: {
4209 // Can only handle X*C and X << C.
4210 AddrMode.InBounds = false;
4211 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4212 if (!RHS || RHS->getBitWidth() > 64)
4213 return false;
4214 int64_t Scale = RHS->getSExtValue();
4215 if (Opcode == Instruction::Shl)
4216 Scale = 1LL << Scale;
4217
4218 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4219 }
4220 case Instruction::GetElementPtr: {
4221 // Scan the GEP. We check it if it contains constant offsets and at most
4222 // one variable offset.
4223 int VariableOperand = -1;
4224 unsigned VariableScale = 0;
4225
4226 int64_t ConstantOffset = 0;
4227 gep_type_iterator GTI = gep_type_begin(AddrInst);
4228 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4229 if (StructType *STy = GTI.getStructTypeOrNull()) {
4230 const StructLayout *SL = DL.getStructLayout(STy);
4231 unsigned Idx =
4232 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4233 ConstantOffset += SL->getElementOffset(Idx);
4234 } else {
4235 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
4236 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4237 const APInt &CVal = CI->getValue();
4238 if (CVal.getMinSignedBits() <= 64) {
4239 ConstantOffset += CVal.getSExtValue() * TypeSize;
4240 continue;
4241 }
4242 }
4243 if (TypeSize) { // Scales of zero don't do anything.
4244 // We only allow one variable index at the moment.
4245 if (VariableOperand != -1)
4246 return false;
4247
4248 // Remember the variable index.
4249 VariableOperand = i;
4250 VariableScale = TypeSize;
4251 }
4252 }
4253 }
4254
4255 // A common case is for the GEP to only do a constant offset. In this case,
4256 // just add it to the disp field and check validity.
4257 if (VariableOperand == -1) {
4258 AddrMode.BaseOffs += ConstantOffset;
4259 if (ConstantOffset == 0 ||
4260 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4261 // Check to see if we can fold the base pointer in too.
4262 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4263 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4264 AddrMode.InBounds = false;
4265 return true;
4266 }
4267 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4268 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4269 ConstantOffset > 0) {
4270 // Record GEPs with non-zero offsets as candidates for splitting in the
4271 // event that the offset cannot fit into the r+i addressing mode.
4272 // Simple and common case that only one GEP is used in calculating the
4273 // address for the memory access.
4274 Value *Base = AddrInst->getOperand(0);
4275 auto *BaseI = dyn_cast<Instruction>(Base);
4276 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4277 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4278 (BaseI && !isa<CastInst>(BaseI) &&
4279 !isa<GetElementPtrInst>(BaseI))) {
4280 // Make sure the parent block allows inserting non-PHI instructions
4281 // before the terminator.
4282 BasicBlock *Parent =
4283 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4284 if (!Parent->getTerminator()->isEHPad())
4285 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4286 }
4287 }
4288 AddrMode.BaseOffs -= ConstantOffset;
4289 return false;
4290 }
4291
4292 // Save the valid addressing mode in case we can't match.
4293 ExtAddrMode BackupAddrMode = AddrMode;
4294 unsigned OldSize = AddrModeInsts.size();
4295
4296 // See if the scale and offset amount is valid for this target.
4297 AddrMode.BaseOffs += ConstantOffset;
4298 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4299 AddrMode.InBounds = false;
4300
4301 // Match the base operand of the GEP.
4302 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4303 // If it couldn't be matched, just stuff the value in a register.
4304 if (AddrMode.HasBaseReg) {
4305 AddrMode = BackupAddrMode;
4306 AddrModeInsts.resize(OldSize);
4307 return false;
4308 }
4309 AddrMode.HasBaseReg = true;
4310 AddrMode.BaseReg = AddrInst->getOperand(0);
4311 }
4312
4313 // Match the remaining variable portion of the GEP.
4314 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4315 Depth)) {
4316 // If it couldn't be matched, try stuffing the base into a register
4317 // instead of matching it, and retrying the match of the scale.
4318 AddrMode = BackupAddrMode;
4319 AddrModeInsts.resize(OldSize);
4320 if (AddrMode.HasBaseReg)
4321 return false;
4322 AddrMode.HasBaseReg = true;
4323 AddrMode.BaseReg = AddrInst->getOperand(0);
4324 AddrMode.BaseOffs += ConstantOffset;
4325 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4326 VariableScale, Depth)) {
4327 // If even that didn't work, bail.
4328 AddrMode = BackupAddrMode;
4329 AddrModeInsts.resize(OldSize);
4330 return false;
4331 }
4332 }
4333
4334 return true;
4335 }
4336 case Instruction::SExt:
4337 case Instruction::ZExt: {
4338 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4339 if (!Ext)
4340 return false;
4341
4342 // Try to move this ext out of the way of the addressing mode.
4343 // Ask for a method for doing so.
4344 TypePromotionHelper::Action TPH =
4345 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4346 if (!TPH)
4347 return false;
4348
4349 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4350 TPT.getRestorationPoint();
4351 unsigned CreatedInstsCost = 0;
4352 unsigned ExtCost = !TLI.isExtFree(Ext);
4353 Value *PromotedOperand =
4354 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4355 // SExt has been moved away.
4356 // Thus either it will be rematched later in the recursive calls or it is
4357 // gone. Anyway, we must not fold it into the addressing mode at this point.
4358 // E.g.,
4359 // op = add opnd, 1
4360 // idx = ext op
4361 // addr = gep base, idx
4362 // is now:
4363 // promotedOpnd = ext opnd <- no match here
4364 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4365 // addr = gep base, op <- match
4366 if (MovedAway)
4367 *MovedAway = true;
4368
4369 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4370, __PRETTY_FUNCTION__))
4370 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4370, __PRETTY_FUNCTION__))
;
4371
4372 ExtAddrMode BackupAddrMode = AddrMode;
4373 unsigned OldSize = AddrModeInsts.size();
4374
4375 if (!matchAddr(PromotedOperand, Depth) ||
4376 // The total of the new cost is equal to the cost of the created
4377 // instructions.
4378 // The total of the old cost is equal to the cost of the extension plus
4379 // what we have saved in the addressing mode.
4380 !isPromotionProfitable(CreatedInstsCost,
4381 ExtCost + (AddrModeInsts.size() - OldSize),
4382 PromotedOperand)) {
4383 AddrMode = BackupAddrMode;
4384 AddrModeInsts.resize(OldSize);
4385 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4386 TPT.rollback(LastKnownGood);
4387 return false;
4388 }
4389 return true;
4390 }
4391 }
4392 return false;
4393}
4394
4395/// If we can, try to add the value of 'Addr' into the current addressing mode.
4396/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4397/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4398/// for the target.
4399///
4400bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4401 // Start a transaction at this point that we will rollback if the matching
4402 // fails.
4403 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4404 TPT.getRestorationPoint();
4405 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4406 // Fold in immediates if legal for the target.
4407 AddrMode.BaseOffs += CI->getSExtValue();
4408 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4409 return true;
4410 AddrMode.BaseOffs -= CI->getSExtValue();
4411 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4412 // If this is a global variable, try to fold it into the addressing mode.
4413 if (!AddrMode.BaseGV) {
4414 AddrMode.BaseGV = GV;
4415 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4416 return true;
4417 AddrMode.BaseGV = nullptr;
4418 }
4419 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4420 ExtAddrMode BackupAddrMode = AddrMode;
4421 unsigned OldSize = AddrModeInsts.size();
4422
4423 // Check to see if it is possible to fold this operation.
4424 bool MovedAway = false;
4425 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4426 // This instruction may have been moved away. If so, there is nothing
4427 // to check here.
4428 if (MovedAway)
4429 return true;
4430 // Okay, it's possible to fold this. Check to see if it is actually
4431 // *profitable* to do so. We use a simple cost model to avoid increasing
4432 // register pressure too much.
4433 if (I->hasOneUse() ||
4434 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4435 AddrModeInsts.push_back(I);
4436 return true;
4437 }
4438
4439 // It isn't profitable to do this, roll back.
4440 //cerr << "NOT FOLDING: " << *I;
4441 AddrMode = BackupAddrMode;
4442 AddrModeInsts.resize(OldSize);
4443 TPT.rollback(LastKnownGood);
4444 }
4445 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4446 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4447 return true;
4448 TPT.rollback(LastKnownGood);
4449 } else if (isa<ConstantPointerNull>(Addr)) {
4450 // Null pointer gets folded without affecting the addressing mode.
4451 return true;
4452 }
4453
4454 // Worse case, the target should support [reg] addressing modes. :)
4455 if (!AddrMode.HasBaseReg) {
4456 AddrMode.HasBaseReg = true;
4457 AddrMode.BaseReg = Addr;
4458 // Still check for legality in case the target supports [imm] but not [i+r].
4459 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4460 return true;
4461 AddrMode.HasBaseReg = false;
4462 AddrMode.BaseReg = nullptr;
4463 }
4464
4465 // If the base register is already taken, see if we can do [r+r].
4466 if (AddrMode.Scale == 0) {
4467 AddrMode.Scale = 1;
4468 AddrMode.ScaledReg = Addr;
4469 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4470 return true;
4471 AddrMode.Scale = 0;
4472 AddrMode.ScaledReg = nullptr;
4473 }
4474 // Couldn't match.
4475 TPT.rollback(LastKnownGood);
4476 return false;
4477}
4478
4479/// Check to see if all uses of OpVal by the specified inline asm call are due
4480/// to memory operands. If so, return true, otherwise return false.
4481static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4482 const TargetLowering &TLI,
4483 const TargetRegisterInfo &TRI) {
4484 const Function *F = CI->getFunction();
4485 TargetLowering::AsmOperandInfoVector TargetConstraints =
4486 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4487 ImmutableCallSite(CI));
4488
4489 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4490 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4491
4492 // Compute the constraint code and ConstraintType to use.
4493 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4494
4495 // If this asm operand is our Value*, and if it isn't an indirect memory
4496 // operand, we can't fold it!
4497 if (OpInfo.CallOperandVal == OpVal &&
4498 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4499 !OpInfo.isIndirect))
4500 return false;
4501 }
4502
4503 return true;
4504}
4505
4506// Max number of memory uses to look at before aborting the search to conserve
4507// compile time.
4508static constexpr int MaxMemoryUsesToScan = 20;
4509
4510/// Recursively walk all the uses of I until we find a memory use.
4511/// If we find an obviously non-foldable instruction, return true.
4512/// Add the ultimately found memory instructions to MemoryUses.
4513static bool FindAllMemoryUses(
4514 Instruction *I,
4515 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4516 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4517 const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4518 // If we already considered this instruction, we're done.
4519 if (!ConsideredInsts.insert(I).second)
4520 return false;
4521
4522 // If this is an obviously unfoldable instruction, bail out.
4523 if (!MightBeFoldableInst(I))
4524 return true;
4525
4526 const bool OptSize = I->getFunction()->hasOptSize();
4527
4528 // Loop over all the uses, recursively processing them.
4529 for (Use &U : I->uses()) {
4530 // Conservatively return true if we're seeing a large number or a deep chain
4531 // of users. This avoids excessive compilation times in pathological cases.
4532 if (SeenInsts++ >= MaxMemoryUsesToScan)
4533 return true;
4534
4535 Instruction *UserI = cast<Instruction>(U.getUser());
4536 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4537 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4538 continue;
4539 }
4540
4541 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4542 unsigned opNo = U.getOperandNo();
4543 if (opNo != StoreInst::getPointerOperandIndex())
4544 return true; // Storing addr, not into addr.
4545 MemoryUses.push_back(std::make_pair(SI, opNo));
4546 continue;
4547 }
4548
4549 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4550 unsigned opNo = U.getOperandNo();
4551 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4552 return true; // Storing addr, not into addr.
4553 MemoryUses.push_back(std::make_pair(RMW, opNo));
4554 continue;
4555 }
4556
4557 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4558 unsigned opNo = U.getOperandNo();
4559 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4560 return true; // Storing addr, not into addr.
4561 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4562 continue;
4563 }
4564
4565 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4566 // If this is a cold call, we can sink the addressing calculation into
4567 // the cold path. See optimizeCallInst
4568 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4569 continue;
4570
4571 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4572 if (!IA) return true;
4573
4574 // If this is a memory operand, we're cool, otherwise bail out.
4575 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4576 return true;
4577 continue;
4578 }
4579
4580 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4581 SeenInsts))
4582 return true;
4583 }
4584
4585 return false;
4586}
4587
4588/// Return true if Val is already known to be live at the use site that we're
4589/// folding it into. If so, there is no cost to include it in the addressing
4590/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4591/// instruction already.
4592bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4593 Value *KnownLive2) {
4594 // If Val is either of the known-live values, we know it is live!
4595 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4596 return true;
4597
4598 // All values other than instructions and arguments (e.g. constants) are live.
4599 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4600
4601 // If Val is a constant sized alloca in the entry block, it is live, this is
4602 // true because it is just a reference to the stack/frame pointer, which is
4603 // live for the whole function.
4604 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4605 if (AI->isStaticAlloca())
4606 return true;
4607
4608 // Check to see if this value is already used in the memory instruction's
4609 // block. If so, it's already live into the block at the very least, so we
4610 // can reasonably fold it.
4611 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4612}
4613
4614/// It is possible for the addressing mode of the machine to fold the specified
4615/// instruction into a load or store that ultimately uses it.
4616/// However, the specified instruction has multiple uses.
4617/// Given this, it may actually increase register pressure to fold it
4618/// into the load. For example, consider this code:
4619///
4620/// X = ...
4621/// Y = X+1
4622/// use(Y) -> nonload/store
4623/// Z = Y+1
4624/// load Z
4625///
4626/// In this case, Y has multiple uses, and can be folded into the load of Z
4627/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4628/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4629/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4630/// number of computations either.
4631///
4632/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4633/// X was live across 'load Z' for other reasons, we actually *would* want to
4634/// fold the addressing mode in the Z case. This would make Y die earlier.
4635bool AddressingModeMatcher::
4636isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4637 ExtAddrMode &AMAfter) {
4638 if (IgnoreProfitability) return true;
4639
4640 // AMBefore is the addressing mode before this instruction was folded into it,
4641 // and AMAfter is the addressing mode after the instruction was folded. Get
4642 // the set of registers referenced by AMAfter and subtract out those
4643 // referenced by AMBefore: this is the set of values which folding in this
4644 // address extends the lifetime of.
4645 //
4646 // Note that there are only two potential values being referenced here,
4647 // BaseReg and ScaleReg (global addresses are always available, as are any
4648 // folded immediates).
4649 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4650
4651 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4652 // lifetime wasn't extended by adding this instruction.
4653 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4654 BaseReg = nullptr;
4655 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4656 ScaledReg = nullptr;
4657
4658 // If folding this instruction (and it's subexprs) didn't extend any live
4659 // ranges, we're ok with it.
4660 if (!BaseReg && !ScaledReg)
4661 return true;
4662
4663 // If all uses of this instruction can have the address mode sunk into them,
4664 // we can remove the addressing mode and effectively trade one live register
4665 // for another (at worst.) In this context, folding an addressing mode into
4666 // the use is just a particularly nice way of sinking it.
4667 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4668 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4669 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4670 return false; // Has a non-memory, non-foldable use!
4671
4672 // Now that we know that all uses of this instruction are part of a chain of
4673 // computation involving only operations that could theoretically be folded
4674 // into a memory use, loop over each of these memory operation uses and see
4675 // if they could *actually* fold the instruction. The assumption is that
4676 // addressing modes are cheap and that duplicating the computation involved
4677 // many times is worthwhile, even on a fastpath. For sinking candidates
4678 // (i.e. cold call sites), this serves as a way to prevent excessive code
4679 // growth since most architectures have some reasonable small and fast way to
4680 // compute an effective address. (i.e LEA on x86)
4681 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4682 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4683 Instruction *User = MemoryUses[i].first;
4684 unsigned OpNo = MemoryUses[i].second;
4685
4686 // Get the access type of this use. If the use isn't a pointer, we don't
4687 // know what it accesses.
4688 Value *Address = User->getOperand(OpNo);
4689 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4690 if (!AddrTy)
4691 return false;
4692 Type *AddressAccessTy = AddrTy->getElementType();
4693 unsigned AS = AddrTy->getAddressSpace();
4694
4695 // Do a match against the root of this address, ignoring profitability. This
4696 // will tell us if the addressing mode for the memory operation will
4697 // *actually* cover the shared instruction.
4698 ExtAddrMode Result;
4699 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4700 0);
4701 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4702 TPT.getRestorationPoint();
4703 AddressingModeMatcher Matcher(
4704 MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4705 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4706 Matcher.IgnoreProfitability = true;
4707 bool Success = Matcher.matchAddr(Address, 0);
4708 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4708, __PRETTY_FUNCTION__))
;
4709
4710 // The match was to check the profitability, the changes made are not
4711 // part of the original matcher. Therefore, they should be dropped
4712 // otherwise the original matcher will not present the right state.
4713 TPT.rollback(LastKnownGood);
4714
4715 // If the match didn't cover I, then it won't be shared by it.
4716 if (!is_contained(MatchedAddrModeInsts, I))
4717 return false;
4718
4719 MatchedAddrModeInsts.clear();
4720 }
4721
4722 return true;
4723}
4724
4725/// Return true if the specified values are defined in a
4726/// different basic block than BB.
4727static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4728 if (Instruction *I = dyn_cast<Instruction>(V))
4729 return I->getParent() != BB;
4730 return false;
4731}
4732
4733/// Sink addressing mode computation immediate before MemoryInst if doing so
4734/// can be done without increasing register pressure. The need for the
4735/// register pressure constraint means this can end up being an all or nothing
4736/// decision for all uses of the same addressing computation.
4737///
4738/// Load and Store Instructions often have addressing modes that can do
4739/// significant amounts of computation. As such, instruction selection will try
4740/// to get the load or store to do as much computation as possible for the
4741/// program. The problem is that isel can only see within a single block. As
4742/// such, we sink as much legal addressing mode work into the block as possible.
4743///
4744/// This method is used to optimize both load/store and inline asms with memory
4745/// operands. It's also used to sink addressing computations feeding into cold
4746/// call sites into their (cold) basic block.
4747///
4748/// The motivation for handling sinking into cold blocks is that doing so can
4749/// both enable other address mode sinking (by satisfying the register pressure
4750/// constraint above), and reduce register pressure globally (by removing the
4751/// addressing mode computation from the fast path entirely.).
4752bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4753 Type *AccessTy, unsigned AddrSpace) {
4754 Value *Repl = Addr;
4755
4756 // Try to collapse single-value PHI nodes. This is necessary to undo
4757 // unprofitable PRE transformations.
4758 SmallVector<Value*, 8> worklist;
4759 SmallPtrSet<Value*, 16> Visited;
4760 worklist.push_back(Addr);
4761
4762 // Use a worklist to iteratively look through PHI and select nodes, and
4763 // ensure that the addressing mode obtained from the non-PHI/select roots of
4764 // the graph are compatible.
4765 bool PhiOrSelectSeen = false;
4766 SmallVector<Instruction*, 16> AddrModeInsts;
4767 const SimplifyQuery SQ(*DL, TLInfo);
4768 AddressingModeCombiner AddrModes(SQ, Addr);
4769 TypePromotionTransaction TPT(RemovedInsts);
4770 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4771 TPT.getRestorationPoint();
4772 while (!worklist.empty()) {
1
Loop condition is true. Entering loop body
4773 Value *V = worklist.back();
4774 worklist.pop_back();
4775
4776 // We allow traversing cyclic Phi nodes.
4777 // In case of success after this loop we ensure that traversing through
4778 // Phi nodes ends up with all cases to compute address of the form
4779 // BaseGV + Base + Scale * Index + Offset
4780 // where Scale and Offset are constans and BaseGV, Base and Index
4781 // are exactly the same Values in all cases.
4782 // It means that BaseGV, Scale and Offset dominate our memory instruction
4783 // and have the same value as they had in address computation represented
4784 // as Phi. So we can safely sink address computation to memory instruction.
4785 if (!Visited.insert(V).second)
2
Assuming field 'second' is true
3
Taking false branch
4786 continue;
4787
4788 // For a PHI node, push all of its incoming values.
4789 if (PHINode *P
4.1
'P' is null
= dyn_cast<PHINode>(V)) {
4
Assuming 'V' is not a 'PHINode'
5
Taking false branch
4790 for (Value *IncValue : P->incoming_values())
4791 worklist.push_back(IncValue);
4792 PhiOrSelectSeen = true;
4793 continue;
4794 }
4795 // Similar for select.
4796 if (SelectInst *SI
6.1
'SI' is null
= dyn_cast<SelectInst>(V)) {
6
Assuming 'V' is not a 'SelectInst'
7
Taking false branch
4797 worklist.push_back(SI->getFalseValue());
4798 worklist.push_back(SI->getTrueValue());
4799 PhiOrSelectSeen = true;
4800 continue;
4801 }
4802
4803 // For non-PHIs, determine the addressing mode being computed. Note that
4804 // the result may differ depending on what other uses our candidate
4805 // addressing instructions might have.
4806 AddrModeInsts.clear();
4807 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4808 0);
4809 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4810 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4811 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4812
4813 GetElementPtrInst *GEP = LargeOffsetGEP.first;
4814 if (GEP
7.1
'GEP' is null
&& !NewGEPBases.count(GEP)) {
8
Taking false branch
4815 // If splitting the underlying data structure can reduce the offset of a
4816 // GEP, collect the GEP. Skip the GEPs that are the new bases of
4817 // previously split data structures.
4818 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4819 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4820 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4821 }
4822
4823 NewAddrMode.OriginalValue = V;
4824 if (!AddrModes.addNewAddrMode(NewAddrMode))
9
Taking true branch
4825 break;
10
Execution continues on line 4831
4826 }
4827
4828 // Try to combine the AddrModes we've collected. If we couldn't collect any,
4829 // or we have multiple but either couldn't combine them or combining them
4830 // wouldn't do anything useful, bail out now.
4831 if (!AddrModes.combineAddrModes()) {
11
Calling 'AddressingModeCombiner::combineAddrModes'
4832 TPT.rollback(LastKnownGood);
4833 return false;
4834 }
4835 TPT.commit();
4836
4837 // Get the combined AddrMode (or the only AddrMode, if we only had one).
4838 ExtAddrMode AddrMode = AddrModes.getAddrMode();
4839
4840 // If all the instructions matched are already in this BB, don't do anything.
4841 // If we saw a Phi node then it is not local definitely, and if we saw a select
4842 // then we want to push the address calculation past it even if it's already
4843 // in this BB.
4844 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4845 return IsNonLocalValue(V, MemoryInst->getParent());
4846 })) {
4847 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
4848 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4849 return false;
4850 }
4851
4852 // Insert this computation right after this user. Since our caller is
4853 // scanning from the top of the BB to the bottom, reuse of the expr are
4854 // guaranteed to happen later.
4855 IRBuilder<> Builder(MemoryInst);
4856
4857 // Now that we determined the addressing expression we want to use and know
4858 // that we have to sink it into this block. Check to see if we have already
4859 // done this for some other load/store instr in this block. If so, reuse
4860 // the computation. Before attempting reuse, check if the address is valid
4861 // as it may have been erased.
4862
4863 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4864
4865 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4866 if (SunkAddr) {
4867 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4868 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4869 if (SunkAddr->getType() != Addr->getType())
4870 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4871 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
4872 TM && SubtargetInfo->addrSinkUsingGEPs())) {
4873 // By default, we use the GEP-based method when AA is used later. This
4874 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4875 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4876 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4877 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4878 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4879
4880 // First, find the pointer.
4881 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4882 ResultPtr = AddrMode.BaseReg;
4883 AddrMode.BaseReg = nullptr;
4884 }
4885
4886 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4887 // We can't add more than one pointer together, nor can we scale a
4888 // pointer (both of which seem meaningless).
4889 if (ResultPtr || AddrMode.Scale != 1)
4890 return false;
4891
4892 ResultPtr = AddrMode.ScaledReg;
4893 AddrMode.Scale = 0;
4894 }
4895
4896 // It is only safe to sign extend the BaseReg if we know that the math
4897 // required to create it did not overflow before we extend it. Since
4898 // the original IR value was tossed in favor of a constant back when
4899 // the AddrMode was created we need to bail out gracefully if widths
4900 // do not match instead of extending it.
4901 //
4902 // (See below for code to add the scale.)
4903 if (AddrMode.Scale) {
4904 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4905 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4906 cast<IntegerType>(ScaledRegTy)->getBitWidth())
4907 return false;
4908 }
4909
4910 if (AddrMode.BaseGV) {
4911 if (ResultPtr)
4912 return false;
4913
4914 ResultPtr = AddrMode.BaseGV;
4915 }
4916
4917 // If the real base value actually came from an inttoptr, then the matcher
4918 // will look through it and provide only the integer value. In that case,
4919 // use it here.
4920 if (!DL->isNonIntegralPointerType(Addr->getType())) {
4921 if (!ResultPtr && AddrMode.BaseReg) {
4922 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4923 "sunkaddr");
4924 AddrMode.BaseReg = nullptr;
4925 } else if (!ResultPtr && AddrMode.Scale == 1) {
4926 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4927 "sunkaddr");
4928 AddrMode.Scale = 0;
4929 }
4930 }
4931
4932 if (!ResultPtr &&
4933 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4934 SunkAddr = Constant::getNullValue(Addr->getType());
4935 } else if (!ResultPtr) {
4936 return false;
4937 } else {
4938 Type *I8PtrTy =
4939 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4940 Type *I8Ty = Builder.getInt8Ty();
4941
4942 // Start with the base register. Do this first so that subsequent address
4943 // matching finds it last, which will prevent it from trying to match it
4944 // as the scaled value in case it happens to be a mul. That would be
4945 // problematic if we've sunk a different mul for the scale, because then
4946 // we'd end up sinking both muls.
4947 if (AddrMode.BaseReg) {
4948 Value *V = AddrMode.BaseReg;
4949 if (V->getType() != IntPtrTy)
4950 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4951
4952 ResultIndex = V;
4953 }
4954
4955 // Add the scale value.
4956 if (AddrMode.Scale) {
4957 Value *V = AddrMode.ScaledReg;
4958 if (V->getType() == IntPtrTy) {
4959 // done.
4960 } else {
4961 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4963, __PRETTY_FUNCTION__))
4962 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4963, __PRETTY_FUNCTION__))
4963 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4963, __PRETTY_FUNCTION__))
;
4964 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4965 }
4966
4967 if (AddrMode.Scale != 1)
4968 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4969 "sunkaddr");
4970 if (ResultIndex)
4971 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4972 else
4973 ResultIndex = V;
4974 }
4975
4976 // Add in the Base Offset if present.
4977 if (AddrMode.BaseOffs) {
4978 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4979 if (ResultIndex) {
4980 // We need to add this separately from the scale above to help with
4981 // SDAG consecutive load/store merging.
4982 if (ResultPtr->getType() != I8PtrTy)
4983 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4984 ResultPtr =
4985 AddrMode.InBounds
4986 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4987 "sunkaddr")
4988 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4989 }
4990
4991 ResultIndex = V;
4992 }
4993
4994 if (!ResultIndex) {
4995 SunkAddr = ResultPtr;
4996 } else {
4997 if (ResultPtr->getType() != I8PtrTy)
4998 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4999 SunkAddr =
5000 AddrMode.InBounds
5001 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5002 "sunkaddr")
5003 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5004 }
5005
5006 if (SunkAddr->getType() != Addr->getType())
5007 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5008 }
5009 } else {
5010 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5011 // non-integral pointers, so in that case bail out now.
5012 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5013 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5014 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5015 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5016 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5017 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5018 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5019 (AddrMode.BaseGV &&
5020 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5021 return false;
5022
5023 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5024 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5025 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5026 Value *Result = nullptr;
5027
5028 // Start with the base register. Do this first so that subsequent address
5029 // matching finds it last, which will prevent it from trying to match it
5030 // as the scaled value in case it happens to be a mul. That would be
5031 // problematic if we've sunk a different mul for the scale, because then
5032 // we'd end up sinking both muls.
5033 if (AddrMode.BaseReg) {
5034 Value *V = AddrMode.BaseReg;
5035 if (V->getType()->isPointerTy())
5036 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5037 if (V->getType() != IntPtrTy)
5038 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5039 Result = V;
5040 }
5041
5042 // Add the scale value.
5043 if (AddrMode.Scale) {
5044 Value *V = AddrMode.ScaledReg;
5045 if (V->getType() == IntPtrTy) {
5046 // done.
5047 } else if (V->getType()->isPointerTy()) {
5048 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5049 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5050 cast<IntegerType>(V->getType())->getBitWidth()) {
5051 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5052 } else {
5053 // It is only safe to sign extend the BaseReg if we know that the math
5054 // required to create it did not overflow before we extend it. Since
5055 // the original IR value was tossed in favor of a constant back when
5056 // the AddrMode was created we need to bail out gracefully if widths
5057 // do not match instead of extending it.
5058 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5059 if (I && (Result != AddrMode.BaseReg))
5060 I->eraseFromParent();
5061 return false;
5062 }
5063 if (AddrMode.Scale != 1)
5064 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5065 "sunkaddr");
5066 if (Result)
5067 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5068 else
5069 Result = V;
5070 }
5071
5072 // Add in the BaseGV if present.
5073 if (AddrMode.BaseGV) {
5074 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
5075 if (Result)
5076 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5077 else
5078 Result = V;
5079 }
5080
5081 // Add in the Base Offset if present.
5082 if (AddrMode.BaseOffs) {
5083 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5084 if (Result)
5085 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5086 else
5087 Result = V;
5088 }
5089
5090 if (!Result)
5091 SunkAddr = Constant::getNullValue(Addr->getType());
5092 else
5093 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5094 }
5095
5096 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5097 // Store the newly computed address into the cache. In the case we reused a
5098 // value, this should be idempotent.
5099 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5100
5101 // If we have no uses, recursively delete the value and all dead instructions
5102 // using it.
5103 if (Repl->use_empty()) {
5104 // This can cause recursive deletion, which can invalidate our iterator.
5105 // Use a WeakTrackingVH to hold onto it in case this happens.
5106 Value *CurValue = &*CurInstIterator;
5107 WeakTrackingVH IterHandle(CurValue);
5108 BasicBlock *BB = CurInstIterator->getParent();
5109
5110 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
5111
5112 if (IterHandle != CurValue) {
5113 // If the iterator instruction was recursively deleted, start over at the
5114 // start of the block.
5115 CurInstIterator = BB->begin();
5116 SunkAddrs.clear();
5117 }
5118 }
5119 ++NumMemoryInsts;
5120 return true;
5121}
5122
5123/// If there are any memory operands, use OptimizeMemoryInst to sink their
5124/// address computing into the block when possible / profitable.
5125bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5126 bool MadeChange = false;
5127
5128 const TargetRegisterInfo *TRI =
5129 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5130 TargetLowering::AsmOperandInfoVector TargetConstraints =
5131 TLI->ParseConstraints(*DL, TRI, CS);
5132 unsigned ArgNo = 0;
5133 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5134 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5135
5136 // Compute the constraint code and ConstraintType to use.
5137 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5138
5139 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5140 OpInfo.isIndirect) {
5141 Value *OpVal = CS->getArgOperand(ArgNo++);
5142 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5143 } else if (OpInfo.Type == InlineAsm::isInput)
5144 ArgNo++;
5145 }
5146
5147 return MadeChange;
5148}
5149
5150/// Check if all the uses of \p Val are equivalent (or free) zero or
5151/// sign extensions.
5152static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5153 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5153, __PRETTY_FUNCTION__))
;
5154 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5155 bool IsSExt = isa<SExtInst>(FirstUser);
5156 Type *ExtTy = FirstUser->getType();
5157 for (const User *U : Val->users()) {
5158 const Instruction *UI = cast<Instruction>(U);
5159 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5160 return false;
5161 Type *CurTy = UI->getType();
5162 // Same input and output types: Same instruction after CSE.
5163 if (CurTy == ExtTy)
5164 continue;
5165
5166 // If IsSExt is true, we are in this situation:
5167 // a = Val
5168 // b = sext ty1 a to ty2
5169 // c = sext ty1 a to ty3
5170 // Assuming ty2 is shorter than ty3, this could be turned into:
5171 // a = Val
5172 // b = sext ty1 a to ty2
5173 // c = sext ty2 b to ty3
5174 // However, the last sext is not free.
5175 if (IsSExt)
5176 return false;
5177
5178 // This is a ZExt, maybe this is free to extend from one type to another.
5179 // In that case, we would not account for a different use.
5180 Type *NarrowTy;
5181 Type *LargeTy;
5182 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5183 CurTy->getScalarType()->getIntegerBitWidth()) {
5184 NarrowTy = CurTy;
5185 LargeTy = ExtTy;
5186 } else {
5187 NarrowTy = ExtTy;
5188 LargeTy = CurTy;
5189 }
5190
5191 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5192 return false;
5193 }
5194 // All uses are the same or can be derived from one another for free.
5195 return true;
5196}
5197
5198/// Try to speculatively promote extensions in \p Exts and continue
5199/// promoting through newly promoted operands recursively as far as doing so is
5200/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5201/// When some promotion happened, \p TPT contains the proper state to revert
5202/// them.
5203///
5204/// \return true if some promotion happened, false otherwise.
5205bool CodeGenPrepare::tryToPromoteExts(
5206 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5207 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5208 unsigned CreatedInstsCost) {
5209 bool Promoted = false;
5210
5211 // Iterate over all the extensions to try to promote them.
5212 for (auto I : Exts) {
5213 // Early check if we directly have ext(load).
5214 if (isa<LoadInst>(I->getOperand(0))) {
5215 ProfitablyMovedExts.push_back(I);
5216 continue;
5217 }
5218
5219 // Check whether or not we want to do any promotion. The reason we have
5220 // this check inside the for loop is to catch the case where an extension
5221 // is directly fed by a load because in such case the extension can be moved
5222 // up without any promotion on its operands.
5223 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5224 return false;
5225
5226 // Get the action to perform the promotion.
5227 TypePromotionHelper::Action TPH =
5228 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5229 // Check if we can promote.
5230 if (!TPH) {
5231 // Save the current extension as we cannot move up through its operand.
5232 ProfitablyMovedExts.push_back(I);
5233 continue;
5234 }
5235
5236 // Save the current state.
5237 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5238 TPT.getRestorationPoint();
5239 SmallVector<Instruction *, 4> NewExts;
5240 unsigned NewCreatedInstsCost = 0;
5241 unsigned ExtCost = !TLI->isExtFree(I);
5242 // Promote.
5243 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5244 &NewExts, nullptr, *TLI);
5245 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5246, __PRETTY_FUNCTION__))
5246 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5246, __PRETTY_FUNCTION__))
;
5247
5248 // We would be able to merge only one extension in a load.
5249 // Therefore, if we have more than 1 new extension we heuristically
5250 // cut this search path, because it means we degrade the code quality.
5251 // With exactly 2, the transformation is neutral, because we will merge
5252 // one extension but leave one. However, we optimistically keep going,
5253 // because the new extension may be removed too.
5254 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5255 // FIXME: It would be possible to propagate a negative value instead of
5256 // conservatively ceiling it to 0.
5257 TotalCreatedInstsCost =
5258 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5259 if (!StressExtLdPromotion &&
5260 (TotalCreatedInstsCost > 1 ||
5261 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5262 // This promotion is not profitable, rollback to the previous state, and
5263 // save the current extension in ProfitablyMovedExts as the latest
5264 // speculative promotion turned out to be unprofitable.
5265 TPT.rollback(LastKnownGood);
5266 ProfitablyMovedExts.push_back(I);
5267 continue;
5268 }
5269 // Continue promoting NewExts as far as doing so is profitable.
5270 SmallVector<Instruction *, 2> NewlyMovedExts;
5271 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5272 bool NewPromoted = false;
5273 for (auto ExtInst : NewlyMovedExts) {
5274 Instruction *MovedExt = cast<Instruction>(ExtInst);
5275 Value *ExtOperand = MovedExt->getOperand(0);
5276 // If we have reached to a load, we need this extra profitability check
5277 // as it could potentially be merged into an ext(load).
5278 if (isa<LoadInst>(ExtOperand) &&
5279 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5280 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5281 continue;
5282
5283 ProfitablyMovedExts.push_back(MovedExt);
5284 NewPromoted = true;
5285 }
5286
5287 // If none of speculative promotions for NewExts is profitable, rollback
5288 // and save the current extension (I) as the last profitable extension.
5289 if (!NewPromoted) {
5290 TPT.rollback(LastKnownGood);
5291 ProfitablyMovedExts.push_back(I);
5292 continue;
5293 }
5294 // The promotion is profitable.
5295 Promoted = true;
5296 }
5297 return Promoted;
5298}
5299
5300/// Merging redundant sexts when one is dominating the other.
5301bool CodeGenPrepare::mergeSExts(Function &F) {
5302 bool Changed = false;
5303 for (auto &Entry : ValToSExtendedUses) {
5304 SExts &Insts = Entry.second;
5305 SExts CurPts;
5306 for (Instruction *Inst : Insts) {
5307 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5308 Inst->getOperand(0) != Entry.first)
5309 continue;
5310 bool inserted = false;
5311 for (auto &Pt : CurPts) {
5312 if (getDT(F).dominates(Inst, Pt)) {
5313 Pt->replaceAllUsesWith(Inst);
5314 RemovedInsts.insert(Pt);
5315 Pt->removeFromParent();
5316 Pt = Inst;
5317 inserted = true;
5318 Changed = true;
5319 break;
5320 }
5321 if (!getDT(F).dominates(Pt, Inst))
5322 // Give up if we need to merge in a common dominator as the
5323 // experiments show it is not profitable.
5324 continue;
5325 Inst->replaceAllUsesWith(Pt);
5326 RemovedInsts.insert(Inst);
5327 Inst->removeFromParent();
5328 inserted = true;
5329 Changed = true;
5330 break;
5331 }
5332 if (!inserted)
5333 CurPts.push_back(Inst);
5334 }
5335 }
5336 return Changed;
5337}
5338
5339// Spliting large data structures so that the GEPs accessing them can have
5340// smaller offsets so that they can be sunk to the same blocks as their users.
5341// For example, a large struct starting from %base is splitted into two parts
5342// where the second part starts from %new_base.
5343//
5344// Before:
5345// BB0:
5346// %base =
5347//
5348// BB1:
5349// %gep0 = gep %base, off0
5350// %gep1 = gep %base, off1
5351// %gep2 = gep %base, off2
5352//
5353// BB2:
5354// %load1 = load %gep0
5355// %load2 = load %gep1
5356// %load3 = load %gep2
5357//
5358// After:
5359// BB0:
5360// %base =
5361// %new_base = gep %base, off0
5362//
5363// BB1:
5364// %new_gep0 = %new_base
5365// %new_gep1 = gep %new_base, off1 - off0
5366// %new_gep2 = gep %new_base, off2 - off0
5367//
5368// BB2:
5369// %load1 = load i32, i32* %new_gep0
5370// %load2 = load i32, i32* %new_gep1
5371// %load3 = load i32, i32* %new_gep2
5372//
5373// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5374// their offsets are smaller enough to fit into the addressing mode.
5375bool CodeGenPrepare::splitLargeGEPOffsets() {
5376 bool Changed = false;
5377 for (auto &Entry : LargeOffsetGEPMap) {
5378 Value *OldBase = Entry.first;
5379 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5380 &LargeOffsetGEPs = Entry.second;
5381 auto compareGEPOffset =
5382 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5383 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5384 if (LHS.first == RHS.first)
5385 return false;
5386 if (LHS.second != RHS.second)
5387 return LHS.second < RHS.second;
5388 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5389 };
5390 // Sorting all the GEPs of the same data structures based on the offsets.
5391 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5392 LargeOffsetGEPs.erase(
5393 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5394 LargeOffsetGEPs.end());
5395 // Skip if all the GEPs have the same offsets.
5396 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5397 continue;
5398 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5399 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5400 Value *NewBaseGEP = nullptr;
5401
5402 auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5403 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5404 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5405 int64_t Offset = LargeOffsetGEP->second;
5406 if (Offset != BaseOffset) {
5407 TargetLowering::AddrMode AddrMode;
5408 AddrMode.BaseOffs = Offset - BaseOffset;
5409 // The result type of the GEP might not be the type of the memory
5410 // access.
5411 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5412 GEP->getResultElementType(),
5413 GEP->getAddressSpace())) {
5414 // We need to create a new base if the offset to the current base is
5415 // too large to fit into the addressing mode. So, a very large struct
5416 // may be splitted into several parts.
5417 BaseGEP = GEP;
5418 BaseOffset = Offset;
5419 NewBaseGEP = nullptr;
5420 }
5421 }
5422
5423 // Generate a new GEP to replace the current one.
5424 LLVMContext &Ctx = GEP->getContext();
5425 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5426 Type *I8PtrTy =
5427 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5428 Type *I8Ty = Type::getInt8Ty(Ctx);
5429
5430 if (!NewBaseGEP) {
5431 // Create a new base if we don't have one yet. Find the insertion
5432 // pointer for the new base first.
5433 BasicBlock::iterator NewBaseInsertPt;
5434 BasicBlock *NewBaseInsertBB;
5435 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5436 // If the base of the struct is an instruction, the new base will be
5437 // inserted close to it.
5438 NewBaseInsertBB = BaseI->getParent();
5439 if (isa<PHINode>(BaseI))
5440 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5441 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5442 NewBaseInsertBB =
5443 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5444 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5445 } else
5446 NewBaseInsertPt = std::next(BaseI->getIterator());
5447 } else {
5448 // If the current base is an argument or global value, the new base
5449 // will be inserted to the entry block.
5450 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5451 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5452 }
5453 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5454 // Create a new base.
5455 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5456 NewBaseGEP = OldBase;
5457 if (NewBaseGEP->getType() != I8PtrTy)
5458 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5459 NewBaseGEP =
5460 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5461 NewGEPBases.insert(NewBaseGEP);
5462 }
5463
5464 IRBuilder<> Builder(GEP);
5465 Value *NewGEP = NewBaseGEP;
5466 if (Offset == BaseOffset) {
5467 if (GEP->getType() != I8PtrTy)
5468 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5469 } else {
5470 // Calculate the new offset for the new GEP.
5471 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5472 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5473
5474 if (GEP->getType() != I8PtrTy)
5475 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5476 }
5477 GEP->replaceAllUsesWith(NewGEP);
5478 LargeOffsetGEPID.erase(GEP);
5479 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5480 GEP->eraseFromParent();
5481 Changed = true;
5482 }
5483 }
5484 return Changed;
5485}
5486
5487/// Return true, if an ext(load) can be formed from an extension in
5488/// \p MovedExts.
5489bool CodeGenPrepare::canFormExtLd(
5490 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5491 Instruction *&Inst, bool HasPromoted) {
5492 for (auto *MovedExtInst : MovedExts) {
5493 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5494 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5495 Inst = MovedExtInst;
5496 break;
5497 }
5498 }
5499 if (!LI)
5500 return false;
5501
5502 // If they're already in the same block, there's nothing to do.
5503 // Make the cheap checks first if we did not promote.
5504 // If we promoted, we need to check if it is indeed profitable.
5505 if (!HasPromoted && LI->getParent() == Inst->getParent())
5506 return false;
5507
5508 return TLI->isExtLoad(LI, Inst, *DL);
5509}
5510
5511/// Move a zext or sext fed by a load into the same basic block as the load,
5512/// unless conditions are unfavorable. This allows SelectionDAG to fold the
5513/// extend into the load.
5514///
5515/// E.g.,
5516/// \code
5517/// %ld = load i32* %addr
5518/// %add = add nuw i32 %ld, 4
5519/// %zext = zext i32 %add to i64
5520// \endcode
5521/// =>
5522/// \code
5523/// %ld = load i32* %addr
5524/// %zext = zext i32 %ld to i64
5525/// %add = add nuw i64 %zext, 4
5526/// \encode
5527/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5528/// allow us to match zext(load i32*) to i64.
5529///
5530/// Also, try to promote the computations used to obtain a sign extended
5531/// value used into memory accesses.
5532/// E.g.,
5533/// \code
5534/// a = add nsw i32 b, 3
5535/// d = sext i32 a to i64
5536/// e = getelementptr ..., i64 d
5537/// \endcode
5538/// =>
5539/// \code
5540/// f = sext i32 b to i64
5541/// a = add nsw i64 f, 3
5542/// e = getelementptr ..., i64 a
5543/// \endcode
5544///
5545/// \p Inst[in/out] the extension may be modified during the process if some
5546/// promotions apply.
5547bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5548 // ExtLoad formation and address type promotion infrastructure requires TLI to
5549 // be effective.
5550 if (!TLI)
5551 return false;
5552
5553 bool AllowPromotionWithoutCommonHeader = false;
5554 /// See if it is an interesting sext operations for the address type
5555 /// promotion before trying to promote it, e.g., the ones with the right
5556 /// type and used in memory accesses.
5557 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5558 *Inst, AllowPromotionWithoutCommonHeader);
5559 TypePromotionTransaction TPT(RemovedInsts);
5560 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5561 TPT.getRestorationPoint();
5562 SmallVector<Instruction *, 1> Exts;
5563 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5564 Exts.push_back(Inst);
5565
5566 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5567
5568 // Look for a load being extended.
5569 LoadInst *LI = nullptr;
5570 Instruction *ExtFedByLoad;
5571
5572 // Try to promote a chain of computation if it allows to form an extended
5573 // load.
5574 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5575 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5575, __PRETTY_FUNCTION__))
;
5576 TPT.commit();
5577 // Move the extend into the same block as the load
5578 ExtFedByLoad->moveAfter(LI);
5579 // CGP does not check if the zext would be speculatively executed when moved
5580 // to the same basic block as the load. Preserving its original location
5581 // would pessimize the debugging experience, as well as negatively impact
5582 // the quality of sample pgo. We don't want to use "line 0" as that has a
5583 // size cost in the line-table section and logically the zext can be seen as
5584 // part of the load. Therefore we conservatively reuse the same debug
5585 // location for the load and the zext.
5586 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5587 ++NumExtsMoved;
5588 Inst = ExtFedByLoad;
5589 return true;
5590 }
5591
5592 // Continue promoting SExts if known as considerable depending on targets.
5593 if (ATPConsiderable &&
5594 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5595 HasPromoted, TPT, SpeculativelyMovedExts))
5596 return true;
5597
5598 TPT.rollback(LastKnownGood);
5599 return false;
5600}
5601
5602// Perform address type promotion if doing so is profitable.
5603// If AllowPromotionWithoutCommonHeader == false, we should find other sext
5604// instructions that sign extended the same initial value. However, if
5605// AllowPromotionWithoutCommonHeader == true, we expect promoting the
5606// extension is just profitable.
5607bool CodeGenPrepare::performAddressTypePromotion(
5608 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5609 bool HasPromoted, TypePromotionTransaction &TPT,
5610 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5611 bool Promoted = false;
5612 SmallPtrSet<Instruction *, 1> UnhandledExts;
5613 bool AllSeenFirst = true;
5614 for (auto I : SpeculativelyMovedExts) {
5615 Value *HeadOfChain = I->getOperand(0);
5616 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5617 SeenChainsForSExt.find(HeadOfChain);
5618 // If there is an unhandled SExt which has the same header, try to promote
5619 // it as well.
5620 if (AlreadySeen != SeenChainsForSExt.end()) {
5621 if (AlreadySeen->second != nullptr)
5622 UnhandledExts.insert(AlreadySeen->second);
5623 AllSeenFirst = false;
5624 }
5625 }
5626
5627 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5628 SpeculativelyMovedExts.size() == 1)) {
5629 TPT.commit();
5630 if (HasPromoted)
5631 Promoted = true;
5632 for (auto I : SpeculativelyMovedExts) {
5633 Value *HeadOfChain = I->getOperand(0);
5634 SeenChainsForSExt[HeadOfChain] = nullptr;
5635 ValToSExtendedUses[HeadOfChain].push_back(I);
5636 }
5637 // Update Inst as promotion happen.
5638 Inst = SpeculativelyMovedExts.pop_back_val();
5639 } else {
5640 // This is the first chain visited from the header, keep the current chain
5641 // as unhandled. Defer to promote this until we encounter another SExt
5642 // chain derived from the same header.
5643 for (auto I : SpeculativelyMovedExts) {
5644 Value *HeadOfChain = I->getOperand(0);
5645 SeenChainsForSExt[HeadOfChain] = Inst;
5646 }
5647 return false;
5648 }
5649
5650 if (!AllSeenFirst && !UnhandledExts.empty())
5651 for (auto VisitedSExt : UnhandledExts) {
5652 if (RemovedInsts.count(VisitedSExt))
5653 continue;
5654 TypePromotionTransaction TPT(RemovedInsts);
5655 SmallVector<Instruction *, 1> Exts;
5656 SmallVector<Instruction *, 2> Chains;
5657 Exts.push_back(VisitedSExt);
5658 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5659 TPT.commit();
5660 if (HasPromoted)
5661 Promoted = true;
5662 for (auto I : Chains) {
5663 Value *HeadOfChain = I->getOperand(0);
5664 // Mark this as handled.
5665 SeenChainsForSExt[HeadOfChain] = nullptr;
5666 ValToSExtendedUses[HeadOfChain].push_back(I);
5667 }
5668 }
5669 return Promoted;
5670}
5671
5672bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5673 BasicBlock *DefBB = I->getParent();
5674
5675 // If the result of a {s|z}ext and its source are both live out, rewrite all
5676 // other uses of the source with result of extension.
5677 Value *Src = I->getOperand(0);
5678 if (Src->hasOneUse())
5679 return false;
5680
5681 // Only do this xform if truncating is free.
5682 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5683 return false;
5684
5685 // Only safe to perform the optimization if the source is also defined in
5686 // this block.
5687 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5688 return false;
5689
5690 bool DefIsLiveOut = false;
5691 for (User *U : I->users()) {
5692 Instruction *UI = cast<Instruction>(U);
5693
5694 // Figure out which BB this ext is used in.
5695 BasicBlock *UserBB = UI->getParent();
5696 if (UserBB == DefBB) continue;
5697 DefIsLiveOut = true;
5698 break;
5699 }
5700 if (!DefIsLiveOut)
5701 return false;
5702
5703 // Make sure none of the uses are PHI nodes.
5704 for (User *U : Src->users()) {
5705 Instruction *UI = cast<Instruction>(U);
5706 BasicBlock *UserBB = UI->getParent();
5707 if (UserBB == DefBB) continue;
5708 // Be conservative. We don't want this xform to end up introducing
5709 // reloads just before load / store instructions.
5710 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5711 return false;
5712 }
5713
5714 // InsertedTruncs - Only insert one trunc in each block once.
5715 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5716
5717 bool MadeChange = false;
5718 for (Use &U : Src->uses()) {
5719 Instruction *User = cast<Instruction>(U.getUser());
5720
5721 // Figure out which BB this ext is used in.
5722 BasicBlock *UserBB = User->getParent();
5723 if (UserBB == DefBB) continue;
5724
5725 // Both src and def are live in this block. Rewrite the use.
5726 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5727
5728 if (!InsertedTrunc) {
5729 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5730 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5730, __PRETTY_FUNCTION__))
;
5731 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5732 InsertedInsts.insert(InsertedTrunc);
5733 }
5734
5735 // Replace a use of the {s|z}ext source with a use of the result.
5736 U = InsertedTrunc;
5737 ++NumExtUses;
5738 MadeChange = true;
5739 }
5740
5741 return MadeChange;
5742}
5743
5744// Find loads whose uses only use some of the loaded value's bits. Add an "and"
5745// just after the load if the target can fold this into one extload instruction,
5746// with the hope of eliminating some of the other later "and" instructions using
5747// the loaded value. "and"s that are made trivially redundant by the insertion
5748// of the new "and" are removed by this function, while others (e.g. those whose
5749// path from the load goes through a phi) are left for isel to potentially
5750// remove.
5751//
5752// For example:
5753//
5754// b0:
5755// x = load i32
5756// ...
5757// b1:
5758// y = and x, 0xff
5759// z = use y
5760//
5761// becomes:
5762//
5763// b0:
5764// x = load i32
5765// x' = and x, 0xff
5766// ...
5767// b1:
5768// z = use x'
5769//
5770// whereas:
5771//
5772// b0: