Bug Summary

File:llvm/lib/CodeGen/CodeGenPrepare.cpp
Warning:line 3402, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/CodeGen/Analysis.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/SelectionDAGNodes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/CodeGen/ValueTypes.h"
42#include "llvm/Config/llvm-config.h"
43#include "llvm/IR/Argument.h"
44#include "llvm/IR/Attributes.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/DerivedTypes.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/GetElementPtrTypeIterator.h"
53#include "llvm/IR/GlobalValue.h"
54#include "llvm/IR/GlobalVariable.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/InlineAsm.h"
57#include "llvm/IR/InstrTypes.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/IR/MDBuilder.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/PatternMatch.h"
68#include "llvm/IR/Statepoint.h"
69#include "llvm/IR/Type.h"
70#include "llvm/IR/Use.h"
71#include "llvm/IR/User.h"
72#include "llvm/IR/Value.h"
73#include "llvm/IR/ValueHandle.h"
74#include "llvm/IR/ValueMap.h"
75#include "llvm/InitializePasses.h"
76#include "llvm/Pass.h"
77#include "llvm/Support/BlockFrequency.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Compiler.h"
82#include "llvm/Support/Debug.h"
83#include "llvm/Support/ErrorHandling.h"
84#include "llvm/Support/MachineValueType.h"
85#include "llvm/Support/MathExtras.h"
86#include "llvm/Support/raw_ostream.h"
87#include "llvm/Target/TargetMachine.h"
88#include "llvm/Target/TargetOptions.h"
89#include "llvm/Transforms/Utils/BasicBlockUtils.h"
90#include "llvm/Transforms/Utils/BypassSlowDivision.h"
91#include "llvm/Transforms/Utils/Local.h"
92#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
93#include "llvm/Transforms/Utils/SizeOpts.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <limits>
99#include <memory>
100#include <utility>
101#include <vector>
102
103using namespace llvm;
104using namespace llvm::PatternMatch;
105
106#define DEBUG_TYPE"codegenprepare" "codegenprepare"
107
108STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated"}
;
109STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated"}
;
110STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts"}
;
111STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
112 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
}
;
113STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
114 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
}
;
115STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
116 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
}
;
117STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
118 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
119 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions"}
;
120STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
121 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
122 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions"}
;
123STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads"}
;
124STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized"}
;
125STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
126 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads"}
;
127STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized"}
;
128STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated"}
;
129STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved"}
;
130STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
}
;
131STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
}
;
132
133static cl::opt<bool> DisableBranchOpts(
134 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
135 cl::desc("Disable branch optimizations in CodeGenPrepare"));
136
137static cl::opt<bool>
138 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable GC optimizations in CodeGenPrepare"));
140
141static cl::opt<bool> DisableSelectToBranch(
142 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
143 cl::desc("Disable select to branch conversion."));
144
145static cl::opt<bool> AddrSinkUsingGEPs(
146 "addr-sink-using-gep", cl::Hidden, cl::init(true),
147 cl::desc("Address sinking in CGP using GEPs."));
148
149static cl::opt<bool> EnableAndCmpSinking(
150 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
151 cl::desc("Enable sinkinig and/cmp into branches."));
152
153static cl::opt<bool> DisableStoreExtract(
154 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
155 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
156
157static cl::opt<bool> StressStoreExtract(
158 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
160
161static cl::opt<bool> DisableExtLdPromotion(
162 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
163 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
164 "CodeGenPrepare"));
165
166static cl::opt<bool> StressExtLdPromotion(
167 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
169 "optimization in CodeGenPrepare"));
170
171static cl::opt<bool> DisablePreheaderProtect(
172 "disable-preheader-prot", cl::Hidden, cl::init(false),
173 cl::desc("Disable protection against removing loop preheaders"));
174
175static cl::opt<bool> ProfileGuidedSectionPrefix(
176 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
177 cl::desc("Use profile info to add section prefix for hot/cold functions"));
178
179static cl::opt<bool> ProfileUnknownInSpecialSection(
180 "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
181 cl::ZeroOrMore,
182 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
183 "profile, we cannot tell the function is cold for sure because "
184 "it may be a function newly added without ever being sampled. "
185 "With the flag enabled, compiler can put such profile unknown "
186 "functions into a special section, so runtime system can choose "
187 "to handle it in a different way than .text section, to save "
188 "RAM for example. "));
189
190static cl::opt<unsigned> FreqRatioToSkipMerge(
191 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
192 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
193 "(frequency of destination block) is greater than this ratio"));
194
195static cl::opt<bool> ForceSplitStore(
196 "force-split-store", cl::Hidden, cl::init(false),
197 cl::desc("Force store splitting no matter what the target query says."));
198
199static cl::opt<bool>
200EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
201 cl::desc("Enable merging of redundant sexts when one is dominating"
202 " the other."), cl::init(true));
203
204static cl::opt<bool> DisableComplexAddrModes(
205 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
206 cl::desc("Disables combining addressing modes with different parts "
207 "in optimizeMemoryInst."));
208
209static cl::opt<bool>
210AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
211 cl::desc("Allow creation of Phis in Address sinking."));
212
213static cl::opt<bool>
214AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
215 cl::desc("Allow creation of selects in Address sinking."));
216
217static cl::opt<bool> AddrSinkCombineBaseReg(
218 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
219 cl::desc("Allow combining of BaseReg field in Address sinking."));
220
221static cl::opt<bool> AddrSinkCombineBaseGV(
222 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
223 cl::desc("Allow combining of BaseGV field in Address sinking."));
224
225static cl::opt<bool> AddrSinkCombineBaseOffs(
226 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
227 cl::desc("Allow combining of BaseOffs field in Address sinking."));
228
229static cl::opt<bool> AddrSinkCombineScaledReg(
230 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
231 cl::desc("Allow combining of ScaledReg field in Address sinking."));
232
233static cl::opt<bool>
234 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
235 cl::init(true),
236 cl::desc("Enable splitting large offset of GEP."));
237
238static cl::opt<bool> EnableICMP_EQToICMP_ST(
239 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
240 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
241
242static cl::opt<bool>
243 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
244 cl::desc("Enable BFI update verification for "
245 "CodeGenPrepare."));
246
247static cl::opt<bool> OptimizePhiTypes(
248 "cgp-optimize-phi-types", cl::Hidden, cl::init(false),
249 cl::desc("Enable converting phi types in CodeGenPrepare"));
250
251namespace {
252
253enum ExtType {
254 ZeroExtension, // Zero extension has been seen.
255 SignExtension, // Sign extension has been seen.
256 BothExtension // This extension type is used if we saw sext after
257 // ZeroExtension had been set, or if we saw zext after
258 // SignExtension had been set. It makes the type
259 // information of a promoted instruction invalid.
260};
261
262using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
263using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
264using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
265using SExts = SmallVector<Instruction *, 16>;
266using ValueToSExts = DenseMap<Value *, SExts>;
267
268class TypePromotionTransaction;
269
270 class CodeGenPrepare : public FunctionPass {
271 const TargetMachine *TM = nullptr;
272 const TargetSubtargetInfo *SubtargetInfo;
273 const TargetLowering *TLI = nullptr;
274 const TargetRegisterInfo *TRI;
275 const TargetTransformInfo *TTI = nullptr;
276 const TargetLibraryInfo *TLInfo;
277 const LoopInfo *LI;
278 std::unique_ptr<BlockFrequencyInfo> BFI;
279 std::unique_ptr<BranchProbabilityInfo> BPI;
280 ProfileSummaryInfo *PSI;
281
282 /// As we scan instructions optimizing them, this is the next instruction
283 /// to optimize. Transforms that can invalidate this should update it.
284 BasicBlock::iterator CurInstIterator;
285
286 /// Keeps track of non-local addresses that have been sunk into a block.
287 /// This allows us to avoid inserting duplicate code for blocks with
288 /// multiple load/stores of the same address. The usage of WeakTrackingVH
289 /// enables SunkAddrs to be treated as a cache whose entries can be
290 /// invalidated if a sunken address computation has been erased.
291 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
292
293 /// Keeps track of all instructions inserted for the current function.
294 SetOfInstrs InsertedInsts;
295
296 /// Keeps track of the type of the related instruction before their
297 /// promotion for the current function.
298 InstrToOrigTy PromotedInsts;
299
300 /// Keep track of instructions removed during promotion.
301 SetOfInstrs RemovedInsts;
302
303 /// Keep track of sext chains based on their initial value.
304 DenseMap<Value *, Instruction *> SeenChainsForSExt;
305
306 /// Keep track of GEPs accessing the same data structures such as structs or
307 /// arrays that are candidates to be split later because of their large
308 /// size.
309 MapVector<
310 AssertingVH<Value>,
311 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
312 LargeOffsetGEPMap;
313
314 /// Keep track of new GEP base after splitting the GEPs having large offset.
315 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
316
317 /// Map serial numbers to Large offset GEPs.
318 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
319
320 /// Keep track of SExt promoted.
321 ValueToSExts ValToSExtendedUses;
322
323 /// True if the function has the OptSize attribute.
324 bool OptSize;
325
326 /// DataLayout for the Function being processed.
327 const DataLayout *DL = nullptr;
328
329 /// Building the dominator tree can be expensive, so we only build it
330 /// lazily and update it when required.
331 std::unique_ptr<DominatorTree> DT;
332
333 public:
334 static char ID; // Pass identification, replacement for typeid
335
336 CodeGenPrepare() : FunctionPass(ID) {
337 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
338 }
339
340 bool runOnFunction(Function &F) override;
341
342 StringRef getPassName() const override { return "CodeGen Prepare"; }
343
344 void getAnalysisUsage(AnalysisUsage &AU) const override {
345 // FIXME: When we can selectively preserve passes, preserve the domtree.
346 AU.addRequired<ProfileSummaryInfoWrapperPass>();
347 AU.addRequired<TargetLibraryInfoWrapperPass>();
348 AU.addRequired<TargetPassConfig>();
349 AU.addRequired<TargetTransformInfoWrapperPass>();
350 AU.addRequired<LoopInfoWrapperPass>();
351 }
352
353 private:
354 template <typename F>
355 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
356 // Substituting can cause recursive simplifications, which can invalidate
357 // our iterator. Use a WeakTrackingVH to hold onto it in case this
358 // happens.
359 Value *CurValue = &*CurInstIterator;
360 WeakTrackingVH IterHandle(CurValue);
361
362 f();
363
364 // If the iterator instruction was recursively deleted, start over at the
365 // start of the block.
366 if (IterHandle != CurValue) {
367 CurInstIterator = BB->begin();
368 SunkAddrs.clear();
369 }
370 }
371
372 // Get the DominatorTree, building if necessary.
373 DominatorTree &getDT(Function &F) {
374 if (!DT)
375 DT = std::make_unique<DominatorTree>(F);
376 return *DT;
377 }
378
379 void removeAllAssertingVHReferences(Value *V);
380 bool eliminateAssumptions(Function &F);
381 bool eliminateFallThrough(Function &F);
382 bool eliminateMostlyEmptyBlocks(Function &F);
383 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
384 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
385 void eliminateMostlyEmptyBlock(BasicBlock *BB);
386 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
387 bool isPreheader);
388 bool makeBitReverse(Instruction &I);
389 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
390 bool optimizeInst(Instruction *I, bool &ModifiedDT);
391 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
392 Type *AccessTy, unsigned AddrSpace);
393 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
394 bool optimizeInlineAsmInst(CallInst *CS);
395 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
396 bool optimizeExt(Instruction *&I);
397 bool optimizeExtUses(Instruction *I);
398 bool optimizeLoadExt(LoadInst *Load);
399 bool optimizeShiftInst(BinaryOperator *BO);
400 bool optimizeFunnelShift(IntrinsicInst *Fsh);
401 bool optimizeSelectInst(SelectInst *SI);
402 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
403 bool optimizeSwitchInst(SwitchInst *SI);
404 bool optimizeExtractElementInst(Instruction *Inst);
405 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
406 bool fixupDbgValue(Instruction *I);
407 bool placeDbgValues(Function &F);
408 bool placePseudoProbes(Function &F);
409 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
410 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
411 bool tryToPromoteExts(TypePromotionTransaction &TPT,
412 const SmallVectorImpl<Instruction *> &Exts,
413 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
414 unsigned CreatedInstsCost = 0);
415 bool mergeSExts(Function &F);
416 bool splitLargeGEPOffsets();
417 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
418 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
419 bool optimizePhiTypes(Function &F);
420 bool performAddressTypePromotion(
421 Instruction *&Inst,
422 bool AllowPromotionWithoutCommonHeader,
423 bool HasPromoted, TypePromotionTransaction &TPT,
424 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
425 bool splitBranchCondition(Function &F, bool &ModifiedDT);
426 bool simplifyOffsetableRelocate(GCStatepointInst &I);
427
428 bool tryToSinkFreeOperands(Instruction *I);
429 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
430 Value *Arg1, CmpInst *Cmp,
431 Intrinsic::ID IID);
432 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
433 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
434 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
435 void verifyBFIUpdates(Function &F);
436 };
437
438} // end anonymous namespace
439
440char CodeGenPrepare::ID = 0;
441
442INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
443 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
444INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
445INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
446INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
447INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry);
448INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
449INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
450 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
451
452FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
453
454bool CodeGenPrepare::runOnFunction(Function &F) {
455 if (skipFunction(F))
456 return false;
457
458 DL = &F.getParent()->getDataLayout();
459
460 bool EverMadeChange = false;
461 // Clear per function information.
462 InsertedInsts.clear();
463 PromotedInsts.clear();
464
465 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
466 SubtargetInfo = TM->getSubtargetImpl(F);
467 TLI = SubtargetInfo->getTargetLowering();
468 TRI = SubtargetInfo->getRegisterInfo();
469 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
470 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
471 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
472 BPI.reset(new BranchProbabilityInfo(F, *LI));
473 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
474 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
475 OptSize = F.hasOptSize();
476 if (ProfileGuidedSectionPrefix) {
477 // The hot attribute overwrites profile count based hotness while profile
478 // counts based hotness overwrite the cold attribute.
479 // This is a conservative behabvior.
480 if (F.hasFnAttribute(Attribute::Hot) ||
481 PSI->isFunctionHotInCallGraph(&F, *BFI))
482 F.setSectionPrefix("hot");
483 // If PSI shows this function is not hot, we will placed the function
484 // into unlikely section if (1) PSI shows this is a cold function, or
485 // (2) the function has a attribute of cold.
486 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
487 F.hasFnAttribute(Attribute::Cold))
488 F.setSectionPrefix("unlikely");
489 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
490 PSI->isFunctionHotnessUnknown(F))
491 F.setSectionPrefix("unknown");
492 }
493
494 /// This optimization identifies DIV instructions that can be
495 /// profitably bypassed and carried out with a shorter, faster divide.
496 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
497 const DenseMap<unsigned int, unsigned int> &BypassWidths =
498 TLI->getBypassSlowDivWidths();
499 BasicBlock* BB = &*F.begin();
500 while (BB != nullptr) {
501 // bypassSlowDivision may create new BBs, but we don't want to reapply the
502 // optimization to those blocks.
503 BasicBlock* Next = BB->getNextNode();
504 // F.hasOptSize is already checked in the outer if statement.
505 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
506 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
507 BB = Next;
508 }
509 }
510
511 // Get rid of @llvm.assume builtins before attempting to eliminate empty
512 // blocks, since there might be blocks that only contain @llvm.assume calls
513 // (plus arguments that we can get rid of).
514 EverMadeChange |= eliminateAssumptions(F);
515
516 // Eliminate blocks that contain only PHI nodes and an
517 // unconditional branch.
518 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
519
520 bool ModifiedDT = false;
521 if (!DisableBranchOpts)
522 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
523
524 // Split some critical edges where one of the sources is an indirect branch,
525 // to help generate sane code for PHIs involving such edges.
526 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
527
528 bool MadeChange = true;
529 while (MadeChange) {
530 MadeChange = false;
531 DT.reset();
532 for (Function::iterator I = F.begin(); I != F.end(); ) {
533 BasicBlock *BB = &*I++;
534 bool ModifiedDTOnIteration = false;
535 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
536
537 // Restart BB iteration if the dominator tree of the Function was changed
538 if (ModifiedDTOnIteration)
539 break;
540 }
541 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
542 MadeChange |= mergeSExts(F);
543 if (!LargeOffsetGEPMap.empty())
544 MadeChange |= splitLargeGEPOffsets();
545 MadeChange |= optimizePhiTypes(F);
546
547 if (MadeChange)
548 eliminateFallThrough(F);
549
550 // Really free removed instructions during promotion.
551 for (Instruction *I : RemovedInsts)
552 I->deleteValue();
553
554 EverMadeChange |= MadeChange;
555 SeenChainsForSExt.clear();
556 ValToSExtendedUses.clear();
557 RemovedInsts.clear();
558 LargeOffsetGEPMap.clear();
559 LargeOffsetGEPID.clear();
560 }
561
562 NewGEPBases.clear();
563 SunkAddrs.clear();
564
565 if (!DisableBranchOpts) {
566 MadeChange = false;
567 // Use a set vector to get deterministic iteration order. The order the
568 // blocks are removed may affect whether or not PHI nodes in successors
569 // are removed.
570 SmallSetVector<BasicBlock*, 8> WorkList;
571 for (BasicBlock &BB : F) {
572 SmallVector<BasicBlock *, 2> Successors(successors(&BB));
573 MadeChange |= ConstantFoldTerminator(&BB, true);
574 if (!MadeChange) continue;
575
576 for (BasicBlock *Succ : Successors)
577 if (pred_empty(Succ))
578 WorkList.insert(Succ);
579 }
580
581 // Delete the dead blocks and any of their dead successors.
582 MadeChange |= !WorkList.empty();
583 while (!WorkList.empty()) {
584 BasicBlock *BB = WorkList.pop_back_val();
585 SmallVector<BasicBlock*, 2> Successors(successors(BB));
586
587 DeleteDeadBlock(BB);
588
589 for (BasicBlock *Succ : Successors)
590 if (pred_empty(Succ))
591 WorkList.insert(Succ);
592 }
593
594 // Merge pairs of basic blocks with unconditional branches, connected by
595 // a single edge.
596 if (EverMadeChange || MadeChange)
597 MadeChange |= eliminateFallThrough(F);
598
599 EverMadeChange |= MadeChange;
600 }
601
602 if (!DisableGCOpts) {
603 SmallVector<GCStatepointInst *, 2> Statepoints;
604 for (BasicBlock &BB : F)
605 for (Instruction &I : BB)
606 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
607 Statepoints.push_back(SP);
608 for (auto &I : Statepoints)
609 EverMadeChange |= simplifyOffsetableRelocate(*I);
610 }
611
612 // Do this last to clean up use-before-def scenarios introduced by other
613 // preparatory transforms.
614 EverMadeChange |= placeDbgValues(F);
615 EverMadeChange |= placePseudoProbes(F);
616
617#ifndef NDEBUG
618 if (VerifyBFIUpdates)
619 verifyBFIUpdates(F);
620#endif
621
622 return EverMadeChange;
623}
624
625bool CodeGenPrepare::eliminateAssumptions(Function &F) {
626 bool MadeChange = false;
627 for (BasicBlock &BB : F) {
628 CurInstIterator = BB.begin();
629 while (CurInstIterator != BB.end()) {
630 Instruction *I = &*(CurInstIterator++);
631 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
632 MadeChange = true;
633 Value *Operand = Assume->getOperand(0);
634 Assume->eraseFromParent();
635
636 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
637 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
638 });
639 }
640 }
641 }
642 return MadeChange;
643}
644
645/// An instruction is about to be deleted, so remove all references to it in our
646/// GEP-tracking data strcutures.
647void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
648 LargeOffsetGEPMap.erase(V);
649 NewGEPBases.erase(V);
650
651 auto GEP = dyn_cast<GetElementPtrInst>(V);
652 if (!GEP)
653 return;
654
655 LargeOffsetGEPID.erase(GEP);
656
657 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
658 if (VecI == LargeOffsetGEPMap.end())
659 return;
660
661 auto &GEPVector = VecI->second;
662 const auto &I =
663 llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
664 if (I == GEPVector.end())
665 return;
666
667 GEPVector.erase(I);
668 if (GEPVector.empty())
669 LargeOffsetGEPMap.erase(VecI);
670}
671
672// Verify BFI has been updated correctly by recomputing BFI and comparing them.
673void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) CodeGenPrepare::verifyBFIUpdates(Function &F) {
674 DominatorTree NewDT(F);
675 LoopInfo NewLI(NewDT);
676 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
677 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
678 NewBFI.verifyMatch(*BFI);
679}
680
681/// Merge basic blocks which are connected by a single edge, where one of the
682/// basic blocks has a single successor pointing to the other basic block,
683/// which has a single predecessor.
684bool CodeGenPrepare::eliminateFallThrough(Function &F) {
685 bool Changed = false;
686 // Scan all of the blocks in the function, except for the entry block.
687 // Use a temporary array to avoid iterator being invalidated when
688 // deleting blocks.
689 SmallVector<WeakTrackingVH, 16> Blocks;
690 for (auto &Block : llvm::drop_begin(F))
691 Blocks.push_back(&Block);
692
693 SmallSet<WeakTrackingVH, 16> Preds;
694 for (auto &Block : Blocks) {
695 auto *BB = cast_or_null<BasicBlock>(Block);
696 if (!BB)
697 continue;
698 // If the destination block has a single pred, then this is a trivial
699 // edge, just collapse it.
700 BasicBlock *SinglePred = BB->getSinglePredecessor();
701
702 // Don't merge if BB's address is taken.
703 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
704
705 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
706 if (Term && !Term->isConditional()) {
707 Changed = true;
708 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
709
710 // Merge BB into SinglePred and delete it.
711 MergeBlockIntoPredecessor(BB);
712 Preds.insert(SinglePred);
713 }
714 }
715
716 // (Repeatedly) merging blocks into their predecessors can create redundant
717 // debug intrinsics.
718 for (auto &Pred : Preds)
719 if (auto *BB = cast_or_null<BasicBlock>(Pred))
720 RemoveRedundantDbgInstrs(BB);
721
722 return Changed;
723}
724
725/// Find a destination block from BB if BB is mergeable empty block.
726BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
727 // If this block doesn't end with an uncond branch, ignore it.
728 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
729 if (!BI || !BI->isUnconditional())
730 return nullptr;
731
732 // If the instruction before the branch (skipping debug info) isn't a phi
733 // node, then other stuff is happening here.
734 BasicBlock::iterator BBI = BI->getIterator();
735 if (BBI != BB->begin()) {
736 --BBI;
737 while (isa<DbgInfoIntrinsic>(BBI)) {
738 if (BBI == BB->begin())
739 break;
740 --BBI;
741 }
742 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
743 return nullptr;
744 }
745
746 // Do not break infinite loops.
747 BasicBlock *DestBB = BI->getSuccessor(0);
748 if (DestBB == BB)
749 return nullptr;
750
751 if (!canMergeBlocks(BB, DestBB))
752 DestBB = nullptr;
753
754 return DestBB;
755}
756
757/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
758/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
759/// edges in ways that are non-optimal for isel. Start by eliminating these
760/// blocks so we can split them the way we want them.
761bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
762 SmallPtrSet<BasicBlock *, 16> Preheaders;
763 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
764 while (!LoopList.empty()) {
765 Loop *L = LoopList.pop_back_val();
766 llvm::append_range(LoopList, *L);
767 if (BasicBlock *Preheader = L->getLoopPreheader())
768 Preheaders.insert(Preheader);
769 }
770
771 bool MadeChange = false;
772 // Copy blocks into a temporary array to avoid iterator invalidation issues
773 // as we remove them.
774 // Note that this intentionally skips the entry block.
775 SmallVector<WeakTrackingVH, 16> Blocks;
776 for (auto &Block : llvm::drop_begin(F))
777 Blocks.push_back(&Block);
778
779 for (auto &Block : Blocks) {
780 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
781 if (!BB)
782 continue;
783 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
784 if (!DestBB ||
785 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
786 continue;
787
788 eliminateMostlyEmptyBlock(BB);
789 MadeChange = true;
790 }
791 return MadeChange;
792}
793
794bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
795 BasicBlock *DestBB,
796 bool isPreheader) {
797 // Do not delete loop preheaders if doing so would create a critical edge.
798 // Loop preheaders can be good locations to spill registers. If the
799 // preheader is deleted and we create a critical edge, registers may be
800 // spilled in the loop body instead.
801 if (!DisablePreheaderProtect && isPreheader &&
802 !(BB->getSinglePredecessor() &&
803 BB->getSinglePredecessor()->getSingleSuccessor()))
804 return false;
805
806 // Skip merging if the block's successor is also a successor to any callbr
807 // that leads to this block.
808 // FIXME: Is this really needed? Is this a correctness issue?
809 for (BasicBlock *Pred : predecessors(BB)) {
810 if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
811 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
812 if (DestBB == CBI->getSuccessor(i))
813 return false;
814 }
815
816 // Try to skip merging if the unique predecessor of BB is terminated by a
817 // switch or indirect branch instruction, and BB is used as an incoming block
818 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
819 // add COPY instructions in the predecessor of BB instead of BB (if it is not
820 // merged). Note that the critical edge created by merging such blocks wont be
821 // split in MachineSink because the jump table is not analyzable. By keeping
822 // such empty block (BB), ISel will place COPY instructions in BB, not in the
823 // predecessor of BB.
824 BasicBlock *Pred = BB->getUniquePredecessor();
825 if (!Pred ||
826 !(isa<SwitchInst>(Pred->getTerminator()) ||
827 isa<IndirectBrInst>(Pred->getTerminator())))
828 return true;
829
830 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
831 return true;
832
833 // We use a simple cost heuristic which determine skipping merging is
834 // profitable if the cost of skipping merging is less than the cost of
835 // merging : Cost(skipping merging) < Cost(merging BB), where the
836 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
837 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
838 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
839 // Freq(Pred) / Freq(BB) > 2.
840 // Note that if there are multiple empty blocks sharing the same incoming
841 // value for the PHIs in the DestBB, we consider them together. In such
842 // case, Cost(merging BB) will be the sum of their frequencies.
843
844 if (!isa<PHINode>(DestBB->begin()))
845 return true;
846
847 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
848
849 // Find all other incoming blocks from which incoming values of all PHIs in
850 // DestBB are the same as the ones from BB.
851 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
852 if (DestBBPred == BB)
853 continue;
854
855 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
856 return DestPN.getIncomingValueForBlock(BB) ==
857 DestPN.getIncomingValueForBlock(DestBBPred);
858 }))
859 SameIncomingValueBBs.insert(DestBBPred);
860 }
861
862 // See if all BB's incoming values are same as the value from Pred. In this
863 // case, no reason to skip merging because COPYs are expected to be place in
864 // Pred already.
865 if (SameIncomingValueBBs.count(Pred))
866 return true;
867
868 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
869 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
870
871 for (auto *SameValueBB : SameIncomingValueBBs)
872 if (SameValueBB->getUniquePredecessor() == Pred &&
873 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
874 BBFreq += BFI->getBlockFreq(SameValueBB);
875
876 return PredFreq.getFrequency() <=
877 BBFreq.getFrequency() * FreqRatioToSkipMerge;
878}
879
880/// Return true if we can merge BB into DestBB if there is a single
881/// unconditional branch between them, and BB contains no other non-phi
882/// instructions.
883bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
884 const BasicBlock *DestBB) const {
885 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
886 // the successor. If there are more complex condition (e.g. preheaders),
887 // don't mess around with them.
888 for (const PHINode &PN : BB->phis()) {
889 for (const User *U : PN.users()) {
890 const Instruction *UI = cast<Instruction>(U);
891 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
892 return false;
893 // If User is inside DestBB block and it is a PHINode then check
894 // incoming value. If incoming value is not from BB then this is
895 // a complex condition (e.g. preheaders) we want to avoid here.
896 if (UI->getParent() == DestBB) {
897 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
898 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
899 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
900 if (Insn && Insn->getParent() == BB &&
901 Insn->getParent() != UPN->getIncomingBlock(I))
902 return false;
903 }
904 }
905 }
906 }
907
908 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
909 // and DestBB may have conflicting incoming values for the block. If so, we
910 // can't merge the block.
911 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
912 if (!DestBBPN) return true; // no conflict.
913
914 // Collect the preds of BB.
915 SmallPtrSet<const BasicBlock*, 16> BBPreds;
916 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
917 // It is faster to get preds from a PHI than with pred_iterator.
918 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
919 BBPreds.insert(BBPN->getIncomingBlock(i));
920 } else {
921 BBPreds.insert(pred_begin(BB), pred_end(BB));
922 }
923
924 // Walk the preds of DestBB.
925 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
926 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
927 if (BBPreds.count(Pred)) { // Common predecessor?
928 for (const PHINode &PN : DestBB->phis()) {
929 const Value *V1 = PN.getIncomingValueForBlock(Pred);
930 const Value *V2 = PN.getIncomingValueForBlock(BB);
931
932 // If V2 is a phi node in BB, look up what the mapped value will be.
933 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
934 if (V2PN->getParent() == BB)
935 V2 = V2PN->getIncomingValueForBlock(Pred);
936
937 // If there is a conflict, bail out.
938 if (V1 != V2) return false;
939 }
940 }
941 }
942
943 return true;
944}
945
946/// Eliminate a basic block that has only phi's and an unconditional branch in
947/// it.
948void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
949 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
950 BasicBlock *DestBB = BI->getSuccessor(0);
951
952 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
953 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
954
955 // If the destination block has a single pred, then this is a trivial edge,
956 // just collapse it.
957 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
958 if (SinglePred != DestBB) {
959 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 960, __PRETTY_FUNCTION__))
960 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 960, __PRETTY_FUNCTION__))
;
961 // Merge DestBB into SinglePred/BB and delete it.
962 MergeBlockIntoPredecessor(DestBB);
963 // Note: BB(=SinglePred) will not be deleted on this path.
964 // DestBB(=its single successor) is the one that was deleted.
965 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
966 return;
967 }
968 }
969
970 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
971 // to handle the new incoming edges it is about to have.
972 for (PHINode &PN : DestBB->phis()) {
973 // Remove the incoming value for BB, and remember it.
974 Value *InVal = PN.removeIncomingValue(BB, false);
975
976 // Two options: either the InVal is a phi node defined in BB or it is some
977 // value that dominates BB.
978 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
979 if (InValPhi && InValPhi->getParent() == BB) {
980 // Add all of the input values of the input PHI as inputs of this phi.
981 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
982 PN.addIncoming(InValPhi->getIncomingValue(i),
983 InValPhi->getIncomingBlock(i));
984 } else {
985 // Otherwise, add one instance of the dominating value for each edge that
986 // we will be adding.
987 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
988 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
989 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
990 } else {
991 for (BasicBlock *Pred : predecessors(BB))
992 PN.addIncoming(InVal, Pred);
993 }
994 }
995 }
996
997 // The PHIs are now updated, change everything that refers to BB to use
998 // DestBB and remove BB.
999 BB->replaceAllUsesWith(DestBB);
1000 BB->eraseFromParent();
1001 ++NumBlocksElim;
1002
1003 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
1004}
1005
1006// Computes a map of base pointer relocation instructions to corresponding
1007// derived pointer relocation instructions given a vector of all relocate calls
1008static void computeBaseDerivedRelocateMap(
1009 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1010 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
1011 &RelocateInstMap) {
1012 // Collect information in two maps: one primarily for locating the base object
1013 // while filling the second map; the second map is the final structure holding
1014 // a mapping between Base and corresponding Derived relocate calls
1015 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
1016 for (auto *ThisRelocate : AllRelocateCalls) {
1017 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1018 ThisRelocate->getDerivedPtrIndex());
1019 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1020 }
1021 for (auto &Item : RelocateIdxMap) {
1022 std::pair<unsigned, unsigned> Key = Item.first;
1023 if (Key.first == Key.second)
1024 // Base relocation: nothing to insert
1025 continue;
1026
1027 GCRelocateInst *I = Item.second;
1028 auto BaseKey = std::make_pair(Key.first, Key.first);
1029
1030 // We're iterating over RelocateIdxMap so we cannot modify it.
1031 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1032 if (MaybeBase == RelocateIdxMap.end())
1033 // TODO: We might want to insert a new base object relocate and gep off
1034 // that, if there are enough derived object relocates.
1035 continue;
1036
1037 RelocateInstMap[MaybeBase->second].push_back(I);
1038 }
1039}
1040
1041// Accepts a GEP and extracts the operands into a vector provided they're all
1042// small integer constants
1043static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
1044 SmallVectorImpl<Value *> &OffsetV) {
1045 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1046 // Only accept small constant integer operands
1047 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1048 if (!Op || Op->getZExtValue() > 20)
1049 return false;
1050 }
1051
1052 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1053 OffsetV.push_back(GEP->getOperand(i));
1054 return true;
1055}
1056
1057// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1058// replace, computes a replacement, and affects it.
1059static bool
1060simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
1061 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1062 bool MadeChange = false;
1063 // We must ensure the relocation of derived pointer is defined after
1064 // relocation of base pointer. If we find a relocation corresponding to base
1065 // defined earlier than relocation of base then we move relocation of base
1066 // right before found relocation. We consider only relocation in the same
1067 // basic block as relocation of base. Relocations from other basic block will
1068 // be skipped by optimization and we do not care about them.
1069 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1070 &*R != RelocatedBase; ++R)
1071 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1072 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1073 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1074 RelocatedBase->moveBefore(RI);
1075 break;
1076 }
1077
1078 for (GCRelocateInst *ToReplace : Targets) {
1079 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1080, __PRETTY_FUNCTION__))
1080 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1080, __PRETTY_FUNCTION__))
;
1081 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1082 // A duplicate relocate call. TODO: coalesce duplicates.
1083 continue;
1084 }
1085
1086 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1087 // Base and derived relocates are in different basic blocks.
1088 // In this case transform is only valid when base dominates derived
1089 // relocate. However it would be too expensive to check dominance
1090 // for each such relocate, so we skip the whole transformation.
1091 continue;
1092 }
1093
1094 Value *Base = ToReplace->getBasePtr();
1095 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1096 if (!Derived || Derived->getPointerOperand() != Base)
1097 continue;
1098
1099 SmallVector<Value *, 2> OffsetV;
1100 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1101 continue;
1102
1103 // Create a Builder and replace the target callsite with a gep
1104 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1105, __PRETTY_FUNCTION__))
1105 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1105, __PRETTY_FUNCTION__))
;
1106
1107 // Insert after RelocatedBase
1108 IRBuilder<> Builder(RelocatedBase->getNextNode());
1109 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1110
1111 // If gc_relocate does not match the actual type, cast it to the right type.
1112 // In theory, there must be a bitcast after gc_relocate if the type does not
1113 // match, and we should reuse it to get the derived pointer. But it could be
1114 // cases like this:
1115 // bb1:
1116 // ...
1117 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
1118 // br label %merge
1119 //
1120 // bb2:
1121 // ...
1122 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
1123 // br label %merge
1124 //
1125 // merge:
1126 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1127 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1128 //
1129 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1130 // no matter there is already one or not. In this way, we can handle all cases, and
1131 // the extra bitcast should be optimized away in later passes.
1132 Value *ActualRelocatedBase = RelocatedBase;
1133 if (RelocatedBase->getType() != Base->getType()) {
1134 ActualRelocatedBase =
1135 Builder.CreateBitCast(RelocatedBase, Base->getType());
1136 }
1137 Value *Replacement = Builder.CreateGEP(
1138 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1139 Replacement->takeName(ToReplace);
1140 // If the newly generated derived pointer's type does not match the original derived
1141 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1142 Value *ActualReplacement = Replacement;
1143 if (Replacement->getType() != ToReplace->getType()) {
1144 ActualReplacement =
1145 Builder.CreateBitCast(Replacement, ToReplace->getType());
1146 }
1147 ToReplace->replaceAllUsesWith(ActualReplacement);
1148 ToReplace->eraseFromParent();
1149
1150 MadeChange = true;
1151 }
1152 return MadeChange;
1153}
1154
1155// Turns this:
1156//
1157// %base = ...
1158// %ptr = gep %base + 15
1159// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1160// %base' = relocate(%tok, i32 4, i32 4)
1161// %ptr' = relocate(%tok, i32 4, i32 5)
1162// %val = load %ptr'
1163//
1164// into this:
1165//
1166// %base = ...
1167// %ptr = gep %base + 15
1168// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1169// %base' = gc.relocate(%tok, i32 4, i32 4)
1170// %ptr' = gep %base' + 15
1171// %val = load %ptr'
1172bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1173 bool MadeChange = false;
1174 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1175 for (auto *U : I.users())
1176 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1177 // Collect all the relocate calls associated with a statepoint
1178 AllRelocateCalls.push_back(Relocate);
1179
1180 // We need at least one base pointer relocation + one derived pointer
1181 // relocation to mangle
1182 if (AllRelocateCalls.size() < 2)
1183 return false;
1184
1185 // RelocateInstMap is a mapping from the base relocate instruction to the
1186 // corresponding derived relocate instructions
1187 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1188 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1189 if (RelocateInstMap.empty())
1190 return false;
1191
1192 for (auto &Item : RelocateInstMap)
1193 // Item.first is the RelocatedBase to offset against
1194 // Item.second is the vector of Targets to replace
1195 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1196 return MadeChange;
1197}
1198
1199/// Sink the specified cast instruction into its user blocks.
1200static bool SinkCast(CastInst *CI) {
1201 BasicBlock *DefBB = CI->getParent();
1202
1203 /// InsertedCasts - Only insert a cast in each block once.
1204 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1205
1206 bool MadeChange = false;
1207 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1208 UI != E; ) {
1209 Use &TheUse = UI.getUse();
1210 Instruction *User = cast<Instruction>(*UI);
1211
1212 // Figure out which BB this cast is used in. For PHI's this is the
1213 // appropriate predecessor block.
1214 BasicBlock *UserBB = User->getParent();
1215 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1216 UserBB = PN->getIncomingBlock(TheUse);
1217 }
1218
1219 // Preincrement use iterator so we don't invalidate it.
1220 ++UI;
1221
1222 // The first insertion point of a block containing an EH pad is after the
1223 // pad. If the pad is the user, we cannot sink the cast past the pad.
1224 if (User->isEHPad())
1225 continue;
1226
1227 // If the block selected to receive the cast is an EH pad that does not
1228 // allow non-PHI instructions before the terminator, we can't sink the
1229 // cast.
1230 if (UserBB->getTerminator()->isEHPad())
1231 continue;
1232
1233 // If this user is in the same block as the cast, don't change the cast.
1234 if (UserBB == DefBB) continue;
1235
1236 // If we have already inserted a cast into this block, use it.
1237 CastInst *&InsertedCast = InsertedCasts[UserBB];
1238
1239 if (!InsertedCast) {
1240 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1241 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1241, __PRETTY_FUNCTION__))
;
1242 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1243 CI->getType(), "", &*InsertPt);
1244 InsertedCast->setDebugLoc(CI->getDebugLoc());
1245 }
1246
1247 // Replace a use of the cast with a use of the new cast.
1248 TheUse = InsertedCast;
1249 MadeChange = true;
1250 ++NumCastUses;
1251 }
1252
1253 // If we removed all uses, nuke the cast.
1254 if (CI->use_empty()) {
1255 salvageDebugInfo(*CI);
1256 CI->eraseFromParent();
1257 MadeChange = true;
1258 }
1259
1260 return MadeChange;
1261}
1262
1263/// If the specified cast instruction is a noop copy (e.g. it's casting from
1264/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1265/// reduce the number of virtual registers that must be created and coalesced.
1266///
1267/// Return true if any changes are made.
1268static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1269 const DataLayout &DL) {
1270 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1271 // than sinking only nop casts, but is helpful on some platforms.
1272 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1273 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1274 ASC->getDestAddressSpace()))
1275 return false;
1276 }
1277
1278 // If this is a noop copy,
1279 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1280 EVT DstVT = TLI.getValueType(DL, CI->getType());
1281
1282 // This is an fp<->int conversion?
1283 if (SrcVT.isInteger() != DstVT.isInteger())
1284 return false;
1285
1286 // If this is an extension, it will be a zero or sign extension, which
1287 // isn't a noop.
1288 if (SrcVT.bitsLT(DstVT)) return false;
1289
1290 // If these values will be promoted, find out what they will be promoted
1291 // to. This helps us consider truncates on PPC as noop copies when they
1292 // are.
1293 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1294 TargetLowering::TypePromoteInteger)
1295 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1296 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1297 TargetLowering::TypePromoteInteger)
1298 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1299
1300 // If, after promotion, these are the same types, this is a noop copy.
1301 if (SrcVT != DstVT)
1302 return false;
1303
1304 return SinkCast(CI);
1305}
1306
1307// Match a simple increment by constant operation. Note that if a sub is
1308// matched, the step is negated (as if the step had been canonicalized to
1309// an add, even though we leave the instruction alone.)
1310bool matchIncrement(const Instruction* IVInc, Instruction *&LHS,
1311 Constant *&Step) {
1312 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1313 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1314 m_Instruction(LHS), m_Constant(Step)))))
1315 return true;
1316 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1317 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1318 m_Instruction(LHS), m_Constant(Step))))) {
1319 Step = ConstantExpr::getNeg(Step);
1320 return true;
1321 }
1322 return false;
1323}
1324
1325/// If given \p PN is an inductive variable with value IVInc coming from the
1326/// backedge, and on each iteration it gets increased by Step, return pair
1327/// <IVInc, Step>. Otherwise, return None.
1328static Optional<std::pair<Instruction *, Constant *> >
1329getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1330 const Loop *L = LI->getLoopFor(PN->getParent());
1331 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1332 return None;
1333 auto *IVInc =
1334 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1335 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1336 return None;
1337 Instruction *LHS = nullptr;
1338 Constant *Step = nullptr;
1339 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1340 return std::make_pair(IVInc, Step);
1341 return None;
1342}
1343
1344static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1345 auto *I = dyn_cast<Instruction>(V);
1346 if (!I)
1347 return false;
1348 Instruction *LHS = nullptr;
1349 Constant *Step = nullptr;
1350 if (!matchIncrement(I, LHS, Step))
1351 return false;
1352 if (auto *PN = dyn_cast<PHINode>(LHS))
1353 if (auto IVInc = getIVIncrement(PN, LI))
1354 return IVInc->first == I;
1355 return false;
1356}
1357
1358bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1359 Value *Arg0, Value *Arg1,
1360 CmpInst *Cmp,
1361 Intrinsic::ID IID) {
1362 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1363 if (!isIVIncrement(BO, LI))
1364 return false;
1365 const Loop *L = LI->getLoopFor(BO->getParent());
1366 assert(L && "L should not be null after isIVIncrement()")((L && "L should not be null after isIVIncrement()") ?
static_cast<void> (0) : __assert_fail ("L && \"L should not be null after isIVIncrement()\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1366, __PRETTY_FUNCTION__))
;
1367 // Do not risk on moving increment into a child loop.
1368 if (LI->getLoopFor(Cmp->getParent()) != L)
1369 return false;
1370
1371 // Finally, we need to ensure that the insert point will dominate all
1372 // existing uses of the increment.
1373
1374 auto &DT = getDT(*BO->getParent()->getParent());
1375 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1376 // If we're moving up the dom tree, all uses are trivially dominated.
1377 // (This is the common case for code produced by LSR.)
1378 return true;
1379
1380 // Otherwise, special case the single use in the phi recurrence.
1381 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1382 };
1383 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1384 // We used to use a dominator tree here to allow multi-block optimization.
1385 // But that was problematic because:
1386 // 1. It could cause a perf regression by hoisting the math op into the
1387 // critical path.
1388 // 2. It could cause a perf regression by creating a value that was live
1389 // across multiple blocks and increasing register pressure.
1390 // 3. Use of a dominator tree could cause large compile-time regression.
1391 // This is because we recompute the DT on every change in the main CGP
1392 // run-loop. The recomputing is probably unnecessary in many cases, so if
1393 // that was fixed, using a DT here would be ok.
1394 //
1395 // There is one important particular case we still want to handle: if BO is
1396 // the IV increment. Important properties that make it profitable:
1397 // - We can speculate IV increment anywhere in the loop (as long as the
1398 // indvar Phi is its only user);
1399 // - Upon computing Cmp, we effectively compute something equivalent to the
1400 // IV increment (despite it loops differently in the IR). So moving it up
1401 // to the cmp point does not really increase register pressure.
1402 return false;
1403 }
1404
1405 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1406 if (BO->getOpcode() == Instruction::Add &&
1407 IID == Intrinsic::usub_with_overflow) {
1408 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1408, __PRETTY_FUNCTION__))
;
1409 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1410 }
1411
1412 // Insert at the first instruction of the pair.
1413 Instruction *InsertPt = nullptr;
1414 for (Instruction &Iter : *Cmp->getParent()) {
1415 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1416 // the overflow intrinsic are defined.
1417 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1418 InsertPt = &Iter;
1419 break;
1420 }
1421 }
1422 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1422, __PRETTY_FUNCTION__))
;
1423
1424 IRBuilder<> Builder(InsertPt);
1425 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1426 if (BO->getOpcode() != Instruction::Xor) {
1427 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1428 BO->replaceAllUsesWith(Math);
1429 } else
1430 assert(BO->hasOneUse() &&((BO->hasOneUse() && "Patterns with XOr should use the BO only in the compare"
) ? static_cast<void> (0) : __assert_fail ("BO->hasOneUse() && \"Patterns with XOr should use the BO only in the compare\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1431, __PRETTY_FUNCTION__))
1431 "Patterns with XOr should use the BO only in the compare")((BO->hasOneUse() && "Patterns with XOr should use the BO only in the compare"
) ? static_cast<void> (0) : __assert_fail ("BO->hasOneUse() && \"Patterns with XOr should use the BO only in the compare\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1431, __PRETTY_FUNCTION__))
;
1432 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1433 Cmp->replaceAllUsesWith(OV);
1434 Cmp->eraseFromParent();
1435 BO->eraseFromParent();
1436 return true;
1437}
1438
1439/// Match special-case patterns that check for unsigned add overflow.
1440static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1441 BinaryOperator *&Add) {
1442 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1443 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1444 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1445
1446 // We are not expecting non-canonical/degenerate code. Just bail out.
1447 if (isa<Constant>(A))
1448 return false;
1449
1450 ICmpInst::Predicate Pred = Cmp->getPredicate();
1451 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1452 B = ConstantInt::get(B->getType(), 1);
1453 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1454 B = ConstantInt::get(B->getType(), -1);
1455 else
1456 return false;
1457
1458 // Check the users of the variable operand of the compare looking for an add
1459 // with the adjusted constant.
1460 for (User *U : A->users()) {
1461 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1462 Add = cast<BinaryOperator>(U);
1463 return true;
1464 }
1465 }
1466 return false;
1467}
1468
1469/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1470/// intrinsic. Return true if any changes were made.
1471bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1472 bool &ModifiedDT) {
1473 Value *A, *B;
1474 BinaryOperator *Add;
1475 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1476 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1477 return false;
1478 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1479 A = Add->getOperand(0);
1480 B = Add->getOperand(1);
1481 }
1482
1483 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1484 TLI->getValueType(*DL, Add->getType()),
1485 Add->hasNUsesOrMore(2)))
1486 return false;
1487
1488 // We don't want to move around uses of condition values this late, so we
1489 // check if it is legal to create the call to the intrinsic in the basic
1490 // block containing the icmp.
1491 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1492 return false;
1493
1494 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1495 Intrinsic::uadd_with_overflow))
1496 return false;
1497
1498 // Reset callers - do not crash by iterating over a dead instruction.
1499 ModifiedDT = true;
1500 return true;
1501}
1502
1503bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1504 bool &ModifiedDT) {
1505 // We are not expecting non-canonical/degenerate code. Just bail out.
1506 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1507 if (isa<Constant>(A) && isa<Constant>(B))
1508 return false;
1509
1510 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1511 ICmpInst::Predicate Pred = Cmp->getPredicate();
1512 if (Pred == ICmpInst::ICMP_UGT) {
1513 std::swap(A, B);
1514 Pred = ICmpInst::ICMP_ULT;
1515 }
1516 // Convert special-case: (A == 0) is the same as (A u< 1).
1517 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1518 B = ConstantInt::get(B->getType(), 1);
1519 Pred = ICmpInst::ICMP_ULT;
1520 }
1521 // Convert special-case: (A != 0) is the same as (0 u< A).
1522 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1523 std::swap(A, B);
1524 Pred = ICmpInst::ICMP_ULT;
1525 }
1526 if (Pred != ICmpInst::ICMP_ULT)
1527 return false;
1528
1529 // Walk the users of a variable operand of a compare looking for a subtract or
1530 // add with that same operand. Also match the 2nd operand of the compare to
1531 // the add/sub, but that may be a negated constant operand of an add.
1532 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1533 BinaryOperator *Sub = nullptr;
1534 for (User *U : CmpVariableOperand->users()) {
1535 // A - B, A u< B --> usubo(A, B)
1536 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1537 Sub = cast<BinaryOperator>(U);
1538 break;
1539 }
1540
1541 // A + (-C), A u< C (canonicalized form of (sub A, C))
1542 const APInt *CmpC, *AddC;
1543 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1544 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1545 Sub = cast<BinaryOperator>(U);
1546 break;
1547 }
1548 }
1549 if (!Sub)
1550 return false;
1551
1552 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1553 TLI->getValueType(*DL, Sub->getType()),
1554 Sub->hasNUsesOrMore(2)))
1555 return false;
1556
1557 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1558 Cmp, Intrinsic::usub_with_overflow))
1559 return false;
1560
1561 // Reset callers - do not crash by iterating over a dead instruction.
1562 ModifiedDT = true;
1563 return true;
1564}
1565
1566/// Sink the given CmpInst into user blocks to reduce the number of virtual
1567/// registers that must be created and coalesced. This is a clear win except on
1568/// targets with multiple condition code registers (PowerPC), where it might
1569/// lose; some adjustment may be wanted there.
1570///
1571/// Return true if any changes are made.
1572static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1573 if (TLI.hasMultipleConditionRegisters())
1574 return false;
1575
1576 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1577 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1578 return false;
1579
1580 // Only insert a cmp in each block once.
1581 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1582
1583 bool MadeChange = false;
1584 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1585 UI != E; ) {
1586 Use &TheUse = UI.getUse();
1587 Instruction *User = cast<Instruction>(*UI);
1588
1589 // Preincrement use iterator so we don't invalidate it.
1590 ++UI;
1591
1592 // Don't bother for PHI nodes.
1593 if (isa<PHINode>(User))
1594 continue;
1595
1596 // Figure out which BB this cmp is used in.
1597 BasicBlock *UserBB = User->getParent();
1598 BasicBlock *DefBB = Cmp->getParent();
1599
1600 // If this user is in the same block as the cmp, don't change the cmp.
1601 if (UserBB == DefBB) continue;
1602
1603 // If we have already inserted a cmp into this block, use it.
1604 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1605
1606 if (!InsertedCmp) {
1607 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1608 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1608, __PRETTY_FUNCTION__))
;
1609 InsertedCmp =
1610 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1611 Cmp->getOperand(0), Cmp->getOperand(1), "",
1612 &*InsertPt);
1613 // Propagate the debug info.
1614 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1615 }
1616
1617 // Replace a use of the cmp with a use of the new cmp.
1618 TheUse = InsertedCmp;
1619 MadeChange = true;
1620 ++NumCmpUses;
1621 }
1622
1623 // If we removed all uses, nuke the cmp.
1624 if (Cmp->use_empty()) {
1625 Cmp->eraseFromParent();
1626 MadeChange = true;
1627 }
1628
1629 return MadeChange;
1630}
1631
1632/// For pattern like:
1633///
1634/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1635/// ...
1636/// DomBB:
1637/// ...
1638/// br DomCond, TrueBB, CmpBB
1639/// CmpBB: (with DomBB being the single predecessor)
1640/// ...
1641/// Cmp = icmp eq CmpOp0, CmpOp1
1642/// ...
1643///
1644/// It would use two comparison on targets that lowering of icmp sgt/slt is
1645/// different from lowering of icmp eq (PowerPC). This function try to convert
1646/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1647/// After that, DomCond and Cmp can use the same comparison so reduce one
1648/// comparison.
1649///
1650/// Return true if any changes are made.
1651static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
1652 const TargetLowering &TLI) {
1653 if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1654 return false;
1655
1656 ICmpInst::Predicate Pred = Cmp->getPredicate();
1657 if (Pred != ICmpInst::ICMP_EQ)
1658 return false;
1659
1660 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1661 // icmp slt/sgt would introduce more redundant LLVM IR.
1662 for (User *U : Cmp->users()) {
1663 if (isa<BranchInst>(U))
1664 continue;
1665 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1666 continue;
1667 return false;
1668 }
1669
1670 // This is a cheap/incomplete check for dominance - just match a single
1671 // predecessor with a conditional branch.
1672 BasicBlock *CmpBB = Cmp->getParent();
1673 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1674 if (!DomBB)
1675 return false;
1676
1677 // We want to ensure that the only way control gets to the comparison of
1678 // interest is that a less/greater than comparison on the same operands is
1679 // false.
1680 Value *DomCond;
1681 BasicBlock *TrueBB, *FalseBB;
1682 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1683 return false;
1684 if (CmpBB != FalseBB)
1685 return false;
1686
1687 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1688 ICmpInst::Predicate DomPred;
1689 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1690 return false;
1691 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1692 return false;
1693
1694 // Convert the equality comparison to the opposite of the dominating
1695 // comparison and swap the direction for all branch/select users.
1696 // We have conceptually converted:
1697 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1698 // to
1699 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1700 // And similarly for branches.
1701 for (User *U : Cmp->users()) {
1702 if (auto *BI = dyn_cast<BranchInst>(U)) {
1703 assert(BI->isConditional() && "Must be conditional")((BI->isConditional() && "Must be conditional") ? static_cast
<void> (0) : __assert_fail ("BI->isConditional() && \"Must be conditional\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1703, __PRETTY_FUNCTION__))
;
1704 BI->swapSuccessors();
1705 continue;
1706 }
1707 if (auto *SI = dyn_cast<SelectInst>(U)) {
1708 // Swap operands
1709 SI->swapValues();
1710 SI->swapProfMetadata();
1711 continue;
1712 }
1713 llvm_unreachable("Must be a branch or a select")::llvm::llvm_unreachable_internal("Must be a branch or a select"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1713)
;
1714 }
1715 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1716 return true;
1717}
1718
1719bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1720 if (sinkCmpExpression(Cmp, *TLI))
1721 return true;
1722
1723 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1724 return true;
1725
1726 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1727 return true;
1728
1729 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1730 return true;
1731
1732 return false;
1733}
1734
1735/// Duplicate and sink the given 'and' instruction into user blocks where it is
1736/// used in a compare to allow isel to generate better code for targets where
1737/// this operation can be combined.
1738///
1739/// Return true if any changes are made.
1740static bool sinkAndCmp0Expression(Instruction *AndI,
1741 const TargetLowering &TLI,
1742 SetOfInstrs &InsertedInsts) {
1743 // Double-check that we're not trying to optimize an instruction that was
1744 // already optimized by some other part of this pass.
1745 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1746, __PRETTY_FUNCTION__))
1746 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1746, __PRETTY_FUNCTION__))
;
1747 (void) InsertedInsts;
1748
1749 // Nothing to do for single use in same basic block.
1750 if (AndI->hasOneUse() &&
1751 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1752 return false;
1753
1754 // Try to avoid cases where sinking/duplicating is likely to increase register
1755 // pressure.
1756 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1757 !isa<ConstantInt>(AndI->getOperand(1)) &&
1758 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1759 return false;
1760
1761 for (auto *U : AndI->users()) {
1762 Instruction *User = cast<Instruction>(U);
1763
1764 // Only sink 'and' feeding icmp with 0.
1765 if (!isa<ICmpInst>(User))
1766 return false;
1767
1768 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1769 if (!CmpC || !CmpC->isZero())
1770 return false;
1771 }
1772
1773 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1774 return false;
1775
1776 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1777 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1778
1779 // Push the 'and' into the same block as the icmp 0. There should only be
1780 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1781 // others, so we don't need to keep track of which BBs we insert into.
1782 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1783 UI != E; ) {
1784 Use &TheUse = UI.getUse();
1785 Instruction *User = cast<Instruction>(*UI);
1786
1787 // Preincrement use iterator so we don't invalidate it.
1788 ++UI;
1789
1790 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1791
1792 // Keep the 'and' in the same place if the use is already in the same block.
1793 Instruction *InsertPt =
1794 User->getParent() == AndI->getParent() ? AndI : User;
1795 Instruction *InsertedAnd =
1796 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1797 AndI->getOperand(1), "", InsertPt);
1798 // Propagate the debug info.
1799 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1800
1801 // Replace a use of the 'and' with a use of the new 'and'.
1802 TheUse = InsertedAnd;
1803 ++NumAndUses;
1804 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1805 }
1806
1807 // We removed all uses, nuke the and.
1808 AndI->eraseFromParent();
1809 return true;
1810}
1811
1812/// Check if the candidates could be combined with a shift instruction, which
1813/// includes:
1814/// 1. Truncate instruction
1815/// 2. And instruction and the imm is a mask of the low bits:
1816/// imm & (imm+1) == 0
1817static bool isExtractBitsCandidateUse(Instruction *User) {
1818 if (!isa<TruncInst>(User)) {
1819 if (User->getOpcode() != Instruction::And ||
1820 !isa<ConstantInt>(User->getOperand(1)))
1821 return false;
1822
1823 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1824
1825 if ((Cimm & (Cimm + 1)).getBoolValue())
1826 return false;
1827 }
1828 return true;
1829}
1830
1831/// Sink both shift and truncate instruction to the use of truncate's BB.
1832static bool
1833SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1834 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1835 const TargetLowering &TLI, const DataLayout &DL) {
1836 BasicBlock *UserBB = User->getParent();
1837 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1838 auto *TruncI = cast<TruncInst>(User);
1839 bool MadeChange = false;
1840
1841 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1842 TruncE = TruncI->user_end();
1843 TruncUI != TruncE;) {
1844
1845 Use &TruncTheUse = TruncUI.getUse();
1846 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1847 // Preincrement use iterator so we don't invalidate it.
1848
1849 ++TruncUI;
1850
1851 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1852 if (!ISDOpcode)
1853 continue;
1854
1855 // If the use is actually a legal node, there will not be an
1856 // implicit truncate.
1857 // FIXME: always querying the result type is just an
1858 // approximation; some nodes' legality is determined by the
1859 // operand or other means. There's no good way to find out though.
1860 if (TLI.isOperationLegalOrCustom(
1861 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1862 continue;
1863
1864 // Don't bother for PHI nodes.
1865 if (isa<PHINode>(TruncUser))
1866 continue;
1867
1868 BasicBlock *TruncUserBB = TruncUser->getParent();
1869
1870 if (UserBB == TruncUserBB)
1871 continue;
1872
1873 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1874 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1875
1876 if (!InsertedShift && !InsertedTrunc) {
1877 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1878 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1878, __PRETTY_FUNCTION__))
;
1879 // Sink the shift
1880 if (ShiftI->getOpcode() == Instruction::AShr)
1881 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1882 "", &*InsertPt);
1883 else
1884 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1885 "", &*InsertPt);
1886 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1887
1888 // Sink the trunc
1889 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1890 TruncInsertPt++;
1891 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1891, __PRETTY_FUNCTION__))
;
1892
1893 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1894 TruncI->getType(), "", &*TruncInsertPt);
1895 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1896
1897 MadeChange = true;
1898
1899 TruncTheUse = InsertedTrunc;
1900 }
1901 }
1902 return MadeChange;
1903}
1904
1905/// Sink the shift *right* instruction into user blocks if the uses could
1906/// potentially be combined with this shift instruction and generate BitExtract
1907/// instruction. It will only be applied if the architecture supports BitExtract
1908/// instruction. Here is an example:
1909/// BB1:
1910/// %x.extract.shift = lshr i64 %arg1, 32
1911/// BB2:
1912/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1913/// ==>
1914///
1915/// BB2:
1916/// %x.extract.shift.1 = lshr i64 %arg1, 32
1917/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1918///
1919/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1920/// instruction.
1921/// Return true if any changes are made.
1922static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1923 const TargetLowering &TLI,
1924 const DataLayout &DL) {
1925 BasicBlock *DefBB = ShiftI->getParent();
1926
1927 /// Only insert instructions in each block once.
1928 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1929
1930 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1931
1932 bool MadeChange = false;
1933 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1934 UI != E;) {
1935 Use &TheUse = UI.getUse();
1936 Instruction *User = cast<Instruction>(*UI);
1937 // Preincrement use iterator so we don't invalidate it.
1938 ++UI;
1939
1940 // Don't bother for PHI nodes.
1941 if (isa<PHINode>(User))
1942 continue;
1943
1944 if (!isExtractBitsCandidateUse(User))
1945 continue;
1946
1947 BasicBlock *UserBB = User->getParent();
1948
1949 if (UserBB == DefBB) {
1950 // If the shift and truncate instruction are in the same BB. The use of
1951 // the truncate(TruncUse) may still introduce another truncate if not
1952 // legal. In this case, we would like to sink both shift and truncate
1953 // instruction to the BB of TruncUse.
1954 // for example:
1955 // BB1:
1956 // i64 shift.result = lshr i64 opnd, imm
1957 // trunc.result = trunc shift.result to i16
1958 //
1959 // BB2:
1960 // ----> We will have an implicit truncate here if the architecture does
1961 // not have i16 compare.
1962 // cmp i16 trunc.result, opnd2
1963 //
1964 if (isa<TruncInst>(User) && shiftIsLegal
1965 // If the type of the truncate is legal, no truncate will be
1966 // introduced in other basic blocks.
1967 &&
1968 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1969 MadeChange =
1970 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1971
1972 continue;
1973 }
1974 // If we have already inserted a shift into this block, use it.
1975 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1976
1977 if (!InsertedShift) {
1978 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1979 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 1979, __PRETTY_FUNCTION__))
;
1980
1981 if (ShiftI->getOpcode() == Instruction::AShr)
1982 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1983 "", &*InsertPt);
1984 else
1985 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1986 "", &*InsertPt);
1987 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1988
1989 MadeChange = true;
1990 }
1991
1992 // Replace a use of the shift with a use of the new shift.
1993 TheUse = InsertedShift;
1994 }
1995
1996 // If we removed all uses, or there are none, nuke the shift.
1997 if (ShiftI->use_empty()) {
1998 salvageDebugInfo(*ShiftI);
1999 ShiftI->eraseFromParent();
2000 MadeChange = true;
2001 }
2002
2003 return MadeChange;
2004}
2005
2006/// If counting leading or trailing zeros is an expensive operation and a zero
2007/// input is defined, add a check for zero to avoid calling the intrinsic.
2008///
2009/// We want to transform:
2010/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2011///
2012/// into:
2013/// entry:
2014/// %cmpz = icmp eq i64 %A, 0
2015/// br i1 %cmpz, label %cond.end, label %cond.false
2016/// cond.false:
2017/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2018/// br label %cond.end
2019/// cond.end:
2020/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2021///
2022/// If the transform is performed, return true and set ModifiedDT to true.
2023static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2024 const TargetLowering *TLI,
2025 const DataLayout *DL,
2026 bool &ModifiedDT) {
2027 // If a zero input is undefined, it doesn't make sense to despeculate that.
2028 if (match(CountZeros->getOperand(1), m_One()))
2029 return false;
2030
2031 // If it's cheap to speculate, there's nothing to do.
2032 auto IntrinsicID = CountZeros->getIntrinsicID();
2033 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
2034 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
2035 return false;
2036
2037 // Only handle legal scalar cases. Anything else requires too much work.
2038 Type *Ty = CountZeros->getType();
2039 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
2040 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2041 return false;
2042
2043 // The intrinsic will be sunk behind a compare against zero and branch.
2044 BasicBlock *StartBlock = CountZeros->getParent();
2045 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2046
2047 // Create another block after the count zero intrinsic. A PHI will be added
2048 // in this block to select the result of the intrinsic or the bit-width
2049 // constant if the input to the intrinsic is zero.
2050 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
2051 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2052
2053 // Set up a builder to create a compare, conditional branch, and PHI.
2054 IRBuilder<> Builder(CountZeros->getContext());
2055 Builder.SetInsertPoint(StartBlock->getTerminator());
2056 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2057
2058 // Replace the unconditional branch that was created by the first split with
2059 // a compare against zero and a conditional branch.
2060 Value *Zero = Constant::getNullValue(Ty);
2061 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
2062 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2063 StartBlock->getTerminator()->eraseFromParent();
2064
2065 // Create a PHI in the end block to select either the output of the intrinsic
2066 // or the bit width of the operand.
2067 Builder.SetInsertPoint(&EndBlock->front());
2068 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2069 CountZeros->replaceAllUsesWith(PN);
2070 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2071 PN->addIncoming(BitWidth, StartBlock);
2072 PN->addIncoming(CountZeros, CallBlock);
2073
2074 // We are explicitly handling the zero case, so we can set the intrinsic's
2075 // undefined zero argument to 'true'. This will also prevent reprocessing the
2076 // intrinsic; we only despeculate when a zero input is defined.
2077 CountZeros->setArgOperand(1, Builder.getTrue());
2078 ModifiedDT = true;
2079 return true;
2080}
2081
2082bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
2083 BasicBlock *BB = CI->getParent();
2084
2085 // Lower inline assembly if we can.
2086 // If we found an inline asm expession, and if the target knows how to
2087 // lower it to normal LLVM code, do so now.
2088 if (CI->isInlineAsm()) {
2089 if (TLI->ExpandInlineAsm(CI)) {
2090 // Avoid invalidating the iterator.
2091 CurInstIterator = BB->begin();
2092 // Avoid processing instructions out of order, which could cause
2093 // reuse before a value is defined.
2094 SunkAddrs.clear();
2095 return true;
2096 }
2097 // Sink address computing for memory operands into the block.
2098 if (optimizeInlineAsmInst(CI))
2099 return true;
2100 }
2101
2102 // Align the pointer arguments to this call if the target thinks it's a good
2103 // idea
2104 unsigned MinSize, PrefAlign;
2105 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2106 for (auto &Arg : CI->arg_operands()) {
2107 // We want to align both objects whose address is used directly and
2108 // objects whose address is used in casts and GEPs, though it only makes
2109 // sense for GEPs if the offset is a multiple of the desired alignment and
2110 // if size - offset meets the size threshold.
2111 if (!Arg->getType()->isPointerTy())
2112 continue;
2113 APInt Offset(DL->getIndexSizeInBits(
2114 cast<PointerType>(Arg->getType())->getAddressSpace()),
2115 0);
2116 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2117 uint64_t Offset2 = Offset.getLimitedValue();
2118 if ((Offset2 & (PrefAlign-1)) != 0)
2119 continue;
2120 AllocaInst *AI;
2121 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
2122 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2123 AI->setAlignment(Align(PrefAlign));
2124 // Global variables can only be aligned if they are defined in this
2125 // object (i.e. they are uniquely initialized in this object), and
2126 // over-aligning global variables that have an explicit section is
2127 // forbidden.
2128 GlobalVariable *GV;
2129 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2130 GV->getPointerAlignment(*DL) < PrefAlign &&
2131 DL->getTypeAllocSize(GV->getValueType()) >=
2132 MinSize + Offset2)
2133 GV->setAlignment(MaybeAlign(PrefAlign));
2134 }
2135 // If this is a memcpy (or similar) then we may be able to improve the
2136 // alignment
2137 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2138 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2139 MaybeAlign MIDestAlign = MI->getDestAlign();
2140 if (!MIDestAlign || DestAlign > *MIDestAlign)
2141 MI->setDestAlignment(DestAlign);
2142 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2143 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2144 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2145 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2146 MTI->setSourceAlignment(SrcAlign);
2147 }
2148 }
2149 }
2150
2151 // If we have a cold call site, try to sink addressing computation into the
2152 // cold block. This interacts with our handling for loads and stores to
2153 // ensure that we can fold all uses of a potential addressing computation
2154 // into their uses. TODO: generalize this to work over profiling data
2155 if (CI->hasFnAttr(Attribute::Cold) &&
2156 !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2157 for (auto &Arg : CI->arg_operands()) {
2158 if (!Arg->getType()->isPointerTy())
2159 continue;
2160 unsigned AS = Arg->getType()->getPointerAddressSpace();
2161 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
2162 }
2163
2164 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2165 if (II) {
2166 switch (II->getIntrinsicID()) {
2167 default: break;
2168 case Intrinsic::assume:
2169 llvm_unreachable("llvm.assume should have been removed already")::llvm::llvm_unreachable_internal("llvm.assume should have been removed already"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2169)
;
2170 case Intrinsic::experimental_widenable_condition: {
2171 // Give up on future widening oppurtunties so that we can fold away dead
2172 // paths and merge blocks before going into block-local instruction
2173 // selection.
2174 if (II->use_empty()) {
2175 II->eraseFromParent();
2176 return true;
2177 }
2178 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2179 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2180 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2181 });
2182 return true;
2183 }
2184 case Intrinsic::objectsize:
2185 llvm_unreachable("llvm.objectsize.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.objectsize.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2185)
;
2186 case Intrinsic::is_constant:
2187 llvm_unreachable("llvm.is.constant.* should have been lowered already")::llvm::llvm_unreachable_internal("llvm.is.constant.* should have been lowered already"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2187)
;
2188 case Intrinsic::aarch64_stlxr:
2189 case Intrinsic::aarch64_stxr: {
2190 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2191 if (!ExtVal || !ExtVal->hasOneUse() ||
2192 ExtVal->getParent() == CI->getParent())
2193 return false;
2194 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2195 ExtVal->moveBefore(CI);
2196 // Mark this instruction as "inserted by CGP", so that other
2197 // optimizations don't touch it.
2198 InsertedInsts.insert(ExtVal);
2199 return true;
2200 }
2201
2202 case Intrinsic::launder_invariant_group:
2203 case Intrinsic::strip_invariant_group: {
2204 Value *ArgVal = II->getArgOperand(0);
2205 auto it = LargeOffsetGEPMap.find(II);
2206 if (it != LargeOffsetGEPMap.end()) {
2207 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2208 // Make sure not to have to deal with iterator invalidation
2209 // after possibly adding ArgVal to LargeOffsetGEPMap.
2210 auto GEPs = std::move(it->second);
2211 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2212 LargeOffsetGEPMap.erase(II);
2213 }
2214
2215 II->replaceAllUsesWith(ArgVal);
2216 II->eraseFromParent();
2217 return true;
2218 }
2219 case Intrinsic::cttz:
2220 case Intrinsic::ctlz:
2221 // If counting zeros is expensive, try to avoid it.
2222 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2223 case Intrinsic::fshl:
2224 case Intrinsic::fshr:
2225 return optimizeFunnelShift(II);
2226 case Intrinsic::dbg_value:
2227 return fixupDbgValue(II);
2228 case Intrinsic::vscale: {
2229 // If datalayout has no special restrictions on vector data layout,
2230 // replace `llvm.vscale` by an equivalent constant expression
2231 // to benefit from cheap constant propagation.
2232 Type *ScalableVectorTy =
2233 VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
2234 if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) {
2235 auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
2236 auto *One = ConstantInt::getSigned(II->getType(), 1);
2237 auto *CGep =
2238 ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
2239 II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
2240 II->eraseFromParent();
2241 return true;
2242 }
2243 break;
2244 }
2245 case Intrinsic::masked_gather:
2246 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2247 case Intrinsic::masked_scatter:
2248 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2249 }
2250
2251 SmallVector<Value *, 2> PtrOps;
2252 Type *AccessTy;
2253 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2254 while (!PtrOps.empty()) {
2255 Value *PtrVal = PtrOps.pop_back_val();
2256 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2257 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2258 return true;
2259 }
2260 }
2261
2262 // From here on out we're working with named functions.
2263 if (!CI->getCalledFunction()) return false;
2264
2265 // Lower all default uses of _chk calls. This is very similar
2266 // to what InstCombineCalls does, but here we are only lowering calls
2267 // to fortified library functions (e.g. __memcpy_chk) that have the default
2268 // "don't know" as the objectsize. Anything else should be left alone.
2269 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2270 IRBuilder<> Builder(CI);
2271 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2272 CI->replaceAllUsesWith(V);
2273 CI->eraseFromParent();
2274 return true;
2275 }
2276
2277 return false;
2278}
2279
2280/// Look for opportunities to duplicate return instructions to the predecessor
2281/// to enable tail call optimizations. The case it is currently looking for is:
2282/// @code
2283/// bb0:
2284/// %tmp0 = tail call i32 @f0()
2285/// br label %return
2286/// bb1:
2287/// %tmp1 = tail call i32 @f1()
2288/// br label %return
2289/// bb2:
2290/// %tmp2 = tail call i32 @f2()
2291/// br label %return
2292/// return:
2293/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2294/// ret i32 %retval
2295/// @endcode
2296///
2297/// =>
2298///
2299/// @code
2300/// bb0:
2301/// %tmp0 = tail call i32 @f0()
2302/// ret i32 %tmp0
2303/// bb1:
2304/// %tmp1 = tail call i32 @f1()
2305/// ret i32 %tmp1
2306/// bb2:
2307/// %tmp2 = tail call i32 @f2()
2308/// ret i32 %tmp2
2309/// @endcode
2310bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
2311 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2312 if (!RetI)
2313 return false;
2314
2315 PHINode *PN = nullptr;
2316 ExtractValueInst *EVI = nullptr;
2317 BitCastInst *BCI = nullptr;
2318 Value *V = RetI->getReturnValue();
2319 if (V) {
2320 BCI = dyn_cast<BitCastInst>(V);
2321 if (BCI)
2322 V = BCI->getOperand(0);
2323
2324 EVI = dyn_cast<ExtractValueInst>(V);
2325 if (EVI) {
2326 V = EVI->getOperand(0);
2327 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2328 return false;
2329 }
2330
2331 PN = dyn_cast<PHINode>(V);
2332 if (!PN)
2333 return false;
2334 }
2335
2336 if (PN && PN->getParent() != BB)
2337 return false;
2338
2339 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2340 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2341 if (BC && BC->hasOneUse())
2342 Inst = BC->user_back();
2343
2344 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2345 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2346 return false;
2347 };
2348
2349 // Make sure there are no instructions between the first instruction
2350 // and return.
2351 const Instruction *BI = BB->getFirstNonPHI();
2352 // Skip over debug and the bitcast.
2353 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2354 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2355 BI = BI->getNextNode();
2356 if (BI != RetI)
2357 return false;
2358
2359 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2360 /// call.
2361 const Function *F = BB->getParent();
2362 SmallVector<BasicBlock*, 4> TailCallBBs;
2363 if (PN) {
2364 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2365 // Look through bitcasts.
2366 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2367 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2368 BasicBlock *PredBB = PN->getIncomingBlock(I);
2369 // Make sure the phi value is indeed produced by the tail call.
2370 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2371 TLI->mayBeEmittedAsTailCall(CI) &&
2372 attributesPermitTailCall(F, CI, RetI, *TLI))
2373 TailCallBBs.push_back(PredBB);
2374 }
2375 } else {
2376 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2377 for (BasicBlock *Pred : predecessors(BB)) {
2378 if (!VisitedBBs.insert(Pred).second)
2379 continue;
2380 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2381 CallInst *CI = dyn_cast<CallInst>(I);
2382 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2383 attributesPermitTailCall(F, CI, RetI, *TLI))
2384 TailCallBBs.push_back(Pred);
2385 }
2386 }
2387 }
2388
2389 bool Changed = false;
2390 for (auto const &TailCallBB : TailCallBBs) {
2391 // Make sure the call instruction is followed by an unconditional branch to
2392 // the return block.
2393 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2394 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2395 continue;
2396
2397 // Duplicate the return into TailCallBB.
2398 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2399 assert(!VerifyBFIUpdates ||((!VerifyBFIUpdates || BFI->getBlockFreq(BB) >= BFI->
getBlockFreq(TailCallBB)) ? static_cast<void> (0) : __assert_fail
("!VerifyBFIUpdates || BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2400, __PRETTY_FUNCTION__))
2400 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB))((!VerifyBFIUpdates || BFI->getBlockFreq(BB) >= BFI->
getBlockFreq(TailCallBB)) ? static_cast<void> (0) : __assert_fail
("!VerifyBFIUpdates || BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2400, __PRETTY_FUNCTION__))
;
2401 BFI->setBlockFreq(
2402 BB,
2403 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
2404 ModifiedDT = Changed = true;
2405 ++NumRetsDup;
2406 }
2407
2408 // If we eliminated all predecessors of the block, delete the block now.
2409 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2410 BB->eraseFromParent();
2411
2412 return Changed;
2413}
2414
2415//===----------------------------------------------------------------------===//
2416// Memory Optimization
2417//===----------------------------------------------------------------------===//
2418
2419namespace {
2420
2421/// This is an extended version of TargetLowering::AddrMode
2422/// which holds actual Value*'s for register values.
2423struct ExtAddrMode : public TargetLowering::AddrMode {
2424 Value *BaseReg = nullptr;
2425 Value *ScaledReg = nullptr;
2426 Value *OriginalValue = nullptr;
2427 bool InBounds = true;
2428
2429 enum FieldName {
2430 NoField = 0x00,
2431 BaseRegField = 0x01,
2432 BaseGVField = 0x02,
2433 BaseOffsField = 0x04,
2434 ScaledRegField = 0x08,
2435 ScaleField = 0x10,
2436 MultipleFields = 0xff
2437 };
2438
2439
2440 ExtAddrMode() = default;
2441
2442 void print(raw_ostream &OS) const;
2443 void dump() const;
2444
2445 FieldName compare(const ExtAddrMode &other) {
2446 // First check that the types are the same on each field, as differing types
2447 // is something we can't cope with later on.
2448 if (BaseReg && other.BaseReg &&
2449 BaseReg->getType() != other.BaseReg->getType())
2450 return MultipleFields;
2451 if (BaseGV && other.BaseGV &&
2452 BaseGV->getType() != other.BaseGV->getType())
2453 return MultipleFields;
2454 if (ScaledReg && other.ScaledReg &&
2455 ScaledReg->getType() != other.ScaledReg->getType())
2456 return MultipleFields;
2457
2458 // Conservatively reject 'inbounds' mismatches.
2459 if (InBounds != other.InBounds)
2460 return MultipleFields;
2461
2462 // Check each field to see if it differs.
2463 unsigned Result = NoField;
2464 if (BaseReg != other.BaseReg)
2465 Result |= BaseRegField;
2466 if (BaseGV != other.BaseGV)
2467 Result |= BaseGVField;
2468 if (BaseOffs != other.BaseOffs)
2469 Result |= BaseOffsField;
2470 if (ScaledReg != other.ScaledReg)
2471 Result |= ScaledRegField;
2472 // Don't count 0 as being a different scale, because that actually means
2473 // unscaled (which will already be counted by having no ScaledReg).
2474 if (Scale && other.Scale && Scale != other.Scale)
2475 Result |= ScaleField;
2476
2477 if (countPopulation(Result) > 1)
2478 return MultipleFields;
2479 else
2480 return static_cast<FieldName>(Result);
2481 }
2482
2483 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2484 // with no offset.
2485 bool isTrivial() {
2486 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2487 // trivial if at most one of these terms is nonzero, except that BaseGV and
2488 // BaseReg both being zero actually means a null pointer value, which we
2489 // consider to be 'non-zero' here.
2490 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2491 }
2492
2493 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2494 switch (Field) {
2495 default:
2496 return nullptr;
2497 case BaseRegField:
2498 return BaseReg;
2499 case BaseGVField:
2500 return BaseGV;
2501 case ScaledRegField:
2502 return ScaledReg;
2503 case BaseOffsField:
2504 return ConstantInt::get(IntPtrTy, BaseOffs);
2505 }
2506 }
2507
2508 void SetCombinedField(FieldName Field, Value *V,
2509 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2510 switch (Field) {
2511 default:
2512 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2512)
;
2513 break;
2514 case ExtAddrMode::BaseRegField:
2515 BaseReg = V;
2516 break;
2517 case ExtAddrMode::BaseGVField:
2518 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2519 // in the BaseReg field.
2520 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2520, __PRETTY_FUNCTION__))
;
2521 BaseReg = V;
2522 BaseGV = nullptr;
2523 break;
2524 case ExtAddrMode::ScaledRegField:
2525 ScaledReg = V;
2526 // If we have a mix of scaled and unscaled addrmodes then we want scale
2527 // to be the scale and not zero.
2528 if (!Scale)
2529 for (const ExtAddrMode &AM : AddrModes)
2530 if (AM.Scale) {
2531 Scale = AM.Scale;
2532 break;
2533 }
2534 break;
2535 case ExtAddrMode::BaseOffsField:
2536 // The offset is no longer a constant, so it goes in ScaledReg with a
2537 // scale of 1.
2538 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 2538, __PRETTY_FUNCTION__))
;
2539 ScaledReg = V;
2540 Scale = 1;
2541 BaseOffs = 0;
2542 break;
2543 }
2544 }
2545};
2546
2547} // end anonymous namespace
2548
2549#ifndef NDEBUG
2550static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2551 AM.print(OS);
2552 return OS;
2553}
2554#endif
2555
2556#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2557void ExtAddrMode::print(raw_ostream &OS) const {
2558 bool NeedPlus = false;
2559 OS << "[";
2560 if (InBounds)
2561 OS << "inbounds ";
2562 if (BaseGV) {
2563 OS << (NeedPlus ? " + " : "")
2564 << "GV:";
2565 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2566 NeedPlus = true;
2567 }
2568
2569 if (BaseOffs) {
2570 OS << (NeedPlus ? " + " : "")
2571 << BaseOffs;
2572 NeedPlus = true;
2573 }
2574
2575 if (BaseReg) {
2576 OS << (NeedPlus ? " + " : "")
2577 << "Base:";
2578 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2579 NeedPlus = true;
2580 }
2581 if (Scale) {
2582 OS << (NeedPlus ? " + " : "")
2583 << Scale << "*";
2584 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2585 }
2586
2587 OS << ']';
2588}
2589
2590LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2591 print(dbgs());
2592 dbgs() << '\n';
2593}
2594#endif
2595
2596namespace {
2597
2598/// This class provides transaction based operation on the IR.
2599/// Every change made through this class is recorded in the internal state and
2600/// can be undone (rollback) until commit is called.
2601/// CGP does not check if instructions could be speculatively executed when
2602/// moved. Preserving the original location would pessimize the debugging
2603/// experience, as well as negatively impact the quality of sample PGO.
2604class TypePromotionTransaction {
2605 /// This represents the common interface of the individual transaction.
2606 /// Each class implements the logic for doing one specific modification on
2607 /// the IR via the TypePromotionTransaction.
2608 class TypePromotionAction {
2609 protected:
2610 /// The Instruction modified.
2611 Instruction *Inst;
2612
2613 public:
2614 /// Constructor of the action.
2615 /// The constructor performs the related action on the IR.
2616 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2617
2618 virtual ~TypePromotionAction() = default;
2619
2620 /// Undo the modification done by this action.
2621 /// When this method is called, the IR must be in the same state as it was
2622 /// before this action was applied.
2623 /// \pre Undoing the action works if and only if the IR is in the exact same
2624 /// state as it was directly after this action was applied.
2625 virtual void undo() = 0;
2626
2627 /// Advocate every change made by this action.
2628 /// When the results on the IR of the action are to be kept, it is important
2629 /// to call this function, otherwise hidden information may be kept forever.
2630 virtual void commit() {
2631 // Nothing to be done, this action is not doing anything.
2632 }
2633 };
2634
2635 /// Utility to remember the position of an instruction.
2636 class InsertionHandler {
2637 /// Position of an instruction.
2638 /// Either an instruction:
2639 /// - Is the first in a basic block: BB is used.
2640 /// - Has a previous instruction: PrevInst is used.
2641 union {
2642 Instruction *PrevInst;
2643 BasicBlock *BB;
2644 } Point;
2645
2646 /// Remember whether or not the instruction had a previous instruction.
2647 bool HasPrevInstruction;
2648
2649 public:
2650 /// Record the position of \p Inst.
2651 InsertionHandler(Instruction *Inst) {
2652 BasicBlock::iterator It = Inst->getIterator();
2653 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2654 if (HasPrevInstruction)
2655 Point.PrevInst = &*--It;
2656 else
2657 Point.BB = Inst->getParent();
2658 }
2659
2660 /// Insert \p Inst at the recorded position.
2661 void insert(Instruction *Inst) {
2662 if (HasPrevInstruction) {
2663 if (Inst->getParent())
2664 Inst->removeFromParent();
2665 Inst->insertAfter(Point.PrevInst);
2666 } else {
2667 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2668 if (Inst->getParent())
2669 Inst->moveBefore(Position);
2670 else
2671 Inst->insertBefore(Position);
2672 }
2673 }
2674 };
2675
2676 /// Move an instruction before another.
2677 class InstructionMoveBefore : public TypePromotionAction {
2678 /// Original position of the instruction.
2679 InsertionHandler Position;
2680
2681 public:
2682 /// Move \p Inst before \p Before.
2683 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2684 : TypePromotionAction(Inst), Position(Inst) {
2685 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2686 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2687 Inst->moveBefore(Before);
2688 }
2689
2690 /// Move the instruction back to its original position.
2691 void undo() override {
2692 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2693 Position.insert(Inst);
2694 }
2695 };
2696
2697 /// Set the operand of an instruction with a new value.
2698 class OperandSetter : public TypePromotionAction {
2699 /// Original operand of the instruction.
2700 Value *Origin;
2701
2702 /// Index of the modified instruction.
2703 unsigned Idx;
2704
2705 public:
2706 /// Set \p Idx operand of \p Inst with \p NewVal.
2707 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2708 : TypePromotionAction(Inst), Idx(Idx) {
2709 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2710 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2711 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2712 Origin = Inst->getOperand(Idx);
2713 Inst->setOperand(Idx, NewVal);
2714 }
2715
2716 /// Restore the original value of the instruction.
2717 void undo() override {
2718 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2719 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2720 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2721 Inst->setOperand(Idx, Origin);
2722 }
2723 };
2724
2725 /// Hide the operands of an instruction.
2726 /// Do as if this instruction was not using any of its operands.
2727 class OperandsHider : public TypePromotionAction {
2728 /// The list of original operands.
2729 SmallVector<Value *, 4> OriginalValues;
2730
2731 public:
2732 /// Remove \p Inst from the uses of the operands of \p Inst.
2733 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2734 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2735 unsigned NumOpnds = Inst->getNumOperands();
2736 OriginalValues.reserve(NumOpnds);
2737 for (unsigned It = 0; It < NumOpnds; ++It) {
2738 // Save the current operand.
2739 Value *Val = Inst->getOperand(It);
2740 OriginalValues.push_back(Val);
2741 // Set a dummy one.
2742 // We could use OperandSetter here, but that would imply an overhead
2743 // that we are not willing to pay.
2744 Inst->setOperand(It, UndefValue::get(Val->getType()));
2745 }
2746 }
2747
2748 /// Restore the original list of uses.
2749 void undo() override {
2750 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2751 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2752 Inst->setOperand(It, OriginalValues[It]);
2753 }
2754 };
2755
2756 /// Build a truncate instruction.
2757 class TruncBuilder : public TypePromotionAction {
2758 Value *Val;
2759
2760 public:
2761 /// Build a truncate instruction of \p Opnd producing a \p Ty
2762 /// result.
2763 /// trunc Opnd to Ty.
2764 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2765 IRBuilder<> Builder(Opnd);
2766 Builder.SetCurrentDebugLocation(DebugLoc());
2767 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2768 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2769 }
2770
2771 /// Get the built value.
2772 Value *getBuiltValue() { return Val; }
2773
2774 /// Remove the built instruction.
2775 void undo() override {
2776 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2777 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2778 IVal->eraseFromParent();
2779 }
2780 };
2781
2782 /// Build a sign extension instruction.
2783 class SExtBuilder : public TypePromotionAction {
2784 Value *Val;
2785
2786 public:
2787 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2788 /// result.
2789 /// sext Opnd to Ty.
2790 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2791 : TypePromotionAction(InsertPt) {
2792 IRBuilder<> Builder(InsertPt);
2793 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2794 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2795 }
2796
2797 /// Get the built value.
2798 Value *getBuiltValue() { return Val; }
2799
2800 /// Remove the built instruction.
2801 void undo() override {
2802 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2803 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2804 IVal->eraseFromParent();
2805 }
2806 };
2807
2808 /// Build a zero extension instruction.
2809 class ZExtBuilder : public TypePromotionAction {
2810 Value *Val;
2811
2812 public:
2813 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2814 /// result.
2815 /// zext Opnd to Ty.
2816 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2817 : TypePromotionAction(InsertPt) {
2818 IRBuilder<> Builder(InsertPt);
2819 Builder.SetCurrentDebugLocation(DebugLoc());
2820 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2821 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2822 }
2823
2824 /// Get the built value.
2825 Value *getBuiltValue() { return Val; }
2826
2827 /// Remove the built instruction.
2828 void undo() override {
2829 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2830 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2831 IVal->eraseFromParent();
2832 }
2833 };
2834
2835 /// Mutate an instruction to another type.
2836 class TypeMutator : public TypePromotionAction {
2837 /// Record the original type.
2838 Type *OrigTy;
2839
2840 public:
2841 /// Mutate the type of \p Inst into \p NewTy.
2842 TypeMutator(Instruction *Inst, Type *NewTy)
2843 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2844 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2845 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2846 Inst->mutateType(NewTy);
2847 }
2848
2849 /// Mutate the instruction back to its original type.
2850 void undo() override {
2851 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2852 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2853 Inst->mutateType(OrigTy);
2854 }
2855 };
2856
2857 /// Replace the uses of an instruction by another instruction.
2858 class UsesReplacer : public TypePromotionAction {
2859 /// Helper structure to keep track of the replaced uses.
2860 struct InstructionAndIdx {
2861 /// The instruction using the instruction.
2862 Instruction *Inst;
2863
2864 /// The index where this instruction is used for Inst.
2865 unsigned Idx;
2866
2867 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2868 : Inst(Inst), Idx(Idx) {}
2869 };
2870
2871 /// Keep track of the original uses (pair Instruction, Index).
2872 SmallVector<InstructionAndIdx, 4> OriginalUses;
2873 /// Keep track of the debug users.
2874 SmallVector<DbgValueInst *, 1> DbgValues;
2875
2876 /// Keep track of the new value so that we can undo it by replacing
2877 /// instances of the new value with the original value.
2878 Value *New;
2879
2880 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2881
2882 public:
2883 /// Replace all the use of \p Inst by \p New.
2884 UsesReplacer(Instruction *Inst, Value *New)
2885 : TypePromotionAction(Inst), New(New) {
2886 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2887 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2888 // Record the original uses.
2889 for (Use &U : Inst->uses()) {
2890 Instruction *UserI = cast<Instruction>(U.getUser());
2891 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2892 }
2893 // Record the debug uses separately. They are not in the instruction's
2894 // use list, but they are replaced by RAUW.
2895 findDbgValues(DbgValues, Inst);
2896
2897 // Now, we can replace the uses.
2898 Inst->replaceAllUsesWith(New);
2899 }
2900
2901 /// Reassign the original uses of Inst to Inst.
2902 void undo() override {
2903 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2904 for (InstructionAndIdx &Use : OriginalUses)
2905 Use.Inst->setOperand(Use.Idx, Inst);
2906 // RAUW has replaced all original uses with references to the new value,
2907 // including the debug uses. Since we are undoing the replacements,
2908 // the original debug uses must also be reinstated to maintain the
2909 // correctness and utility of debug value instructions.
2910 for (auto *DVI : DbgValues)
2911 DVI->replaceVariableLocationOp(New, Inst);
2912 }
2913 };
2914
2915 /// Remove an instruction from the IR.
2916 class InstructionRemover : public TypePromotionAction {
2917 /// Original position of the instruction.
2918 InsertionHandler Inserter;
2919
2920 /// Helper structure to hide all the link to the instruction. In other
2921 /// words, this helps to do as if the instruction was removed.
2922 OperandsHider Hider;
2923
2924 /// Keep track of the uses replaced, if any.
2925 UsesReplacer *Replacer = nullptr;
2926
2927 /// Keep track of instructions removed.
2928 SetOfInstrs &RemovedInsts;
2929
2930 public:
2931 /// Remove all reference of \p Inst and optionally replace all its
2932 /// uses with New.
2933 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2934 /// \pre If !Inst->use_empty(), then New != nullptr
2935 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2936 Value *New = nullptr)
2937 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2938 RemovedInsts(RemovedInsts) {
2939 if (New)
2940 Replacer = new UsesReplacer(Inst, New);
2941 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2942 RemovedInsts.insert(Inst);
2943 /// The instructions removed here will be freed after completing
2944 /// optimizeBlock() for all blocks as we need to keep track of the
2945 /// removed instructions during promotion.
2946 Inst->removeFromParent();
2947 }
2948
2949 ~InstructionRemover() override { delete Replacer; }
2950
2951 /// Resurrect the instruction and reassign it to the proper uses if
2952 /// new value was provided when build this action.
2953 void undo() override {
2954 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2955 Inserter.insert(Inst);
2956 if (Replacer)
2957 Replacer->undo();
2958 Hider.undo();
2959 RemovedInsts.erase(Inst);
2960 }
2961 };
2962
2963public:
2964 /// Restoration point.
2965 /// The restoration point is a pointer to an action instead of an iterator
2966 /// because the iterator may be invalidated but not the pointer.
2967 using ConstRestorationPt = const TypePromotionAction *;
2968
2969 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2970 : RemovedInsts(RemovedInsts) {}
2971
2972 /// Advocate every changes made in that transaction. Return true if any change
2973 /// happen.
2974 bool commit();
2975
2976 /// Undo all the changes made after the given point.
2977 void rollback(ConstRestorationPt Point);
2978
2979 /// Get the current restoration point.
2980 ConstRestorationPt getRestorationPoint() const;
2981
2982 /// \name API for IR modification with state keeping to support rollback.
2983 /// @{
2984 /// Same as Instruction::setOperand.
2985 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2986
2987 /// Same as Instruction::eraseFromParent.
2988 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2989
2990 /// Same as Value::replaceAllUsesWith.
2991 void replaceAllUsesWith(Instruction *Inst, Value *New);
2992
2993 /// Same as Value::mutateType.
2994 void mutateType(Instruction *Inst, Type *NewTy);
2995
2996 /// Same as IRBuilder::createTrunc.
2997 Value *createTrunc(Instruction *Opnd, Type *Ty);
2998
2999 /// Same as IRBuilder::createSExt.
3000 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3001
3002 /// Same as IRBuilder::createZExt.
3003 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3004
3005 /// Same as Instruction::moveBefore.
3006 void moveBefore(Instruction *Inst, Instruction *Before);
3007 /// @}
3008
3009private:
3010 /// The ordered list of actions made so far.
3011 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
3012
3013 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3014
3015 SetOfInstrs &RemovedInsts;
3016};
3017
3018} // end anonymous namespace
3019
3020void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3021 Value *NewVal) {
3022 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3023 Inst, Idx, NewVal));
3024}
3025
3026void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3027 Value *NewVal) {
3028 Actions.push_back(
3029 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3030 Inst, RemovedInsts, NewVal));
3031}
3032
3033void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3034 Value *New) {
3035 Actions.push_back(
3036 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3037}
3038
3039void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3040 Actions.push_back(
3041 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3042}
3043
3044Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
3045 Type *Ty) {
3046 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3047 Value *Val = Ptr->getBuiltValue();
3048 Actions.push_back(std::move(Ptr));
3049 return Val;
3050}
3051
3052Value *TypePromotionTransaction::createSExt(Instruction *Inst,
3053 Value *Opnd, Type *Ty) {
3054 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3055 Value *Val = Ptr->getBuiltValue();
3056 Actions.push_back(std::move(Ptr));
3057 return Val;
3058}
3059
3060Value *TypePromotionTransaction::createZExt(Instruction *Inst,
3061 Value *Opnd, Type *Ty) {
3062 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3063 Value *Val = Ptr->getBuiltValue();
3064 Actions.push_back(std::move(Ptr));
3065 return Val;
3066}
3067
3068void TypePromotionTransaction::moveBefore(Instruction *Inst,
3069 Instruction *Before) {
3070 Actions.push_back(
3071 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
3072 Inst, Before));
3073}
3074
3075TypePromotionTransaction::ConstRestorationPt
3076TypePromotionTransaction::getRestorationPoint() const {
3077 return !Actions.empty() ? Actions.back().get() : nullptr;
3078}
3079
3080bool TypePromotionTransaction::commit() {
3081 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3082 Action->commit();
3083 bool Modified = !Actions.empty();
3084 Actions.clear();
3085 return Modified;
3086}
3087
3088void TypePromotionTransaction::rollback(
3089 TypePromotionTransaction::ConstRestorationPt Point) {
3090 while (!Actions.empty() && Point != Actions.back().get()) {
3091 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3092 Curr->undo();
3093 }
3094}
3095
3096namespace {
3097
3098/// A helper class for matching addressing modes.
3099///
3100/// This encapsulates the logic for matching the target-legal addressing modes.
3101class AddressingModeMatcher {
3102 SmallVectorImpl<Instruction*> &AddrModeInsts;
3103 const TargetLowering &TLI;
3104 const TargetRegisterInfo &TRI;
3105 const DataLayout &DL;
3106 const LoopInfo &LI;
3107 const std::function<const DominatorTree &()> getDTFn;
3108
3109 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3110 /// the memory instruction that we're computing this address for.
3111 Type *AccessTy;
3112 unsigned AddrSpace;
3113 Instruction *MemoryInst;
3114
3115 /// This is the addressing mode that we're building up. This is
3116 /// part of the return value of this addressing mode matching stuff.
3117 ExtAddrMode &AddrMode;
3118
3119 /// The instructions inserted by other CodeGenPrepare optimizations.
3120 const SetOfInstrs &InsertedInsts;
3121
3122 /// A map from the instructions to their type before promotion.
3123 InstrToOrigTy &PromotedInsts;
3124
3125 /// The ongoing transaction where every action should be registered.
3126 TypePromotionTransaction &TPT;
3127
3128 // A GEP which has too large offset to be folded into the addressing mode.
3129 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3130
3131 /// This is set to true when we should not do profitability checks.
3132 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3133 bool IgnoreProfitability;
3134
3135 /// True if we are optimizing for size.
3136 bool OptSize;
3137
3138 ProfileSummaryInfo *PSI;
3139 BlockFrequencyInfo *BFI;
3140
3141 AddressingModeMatcher(
3142 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3143 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3144 const std::function<const DominatorTree &()> getDTFn,
3145 Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM,
3146 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3147 TypePromotionTransaction &TPT,
3148 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3149 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3150 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3151 DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
3152 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3153 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3154 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3155 IgnoreProfitability = false;
3156 }
3157
3158public:
3159 /// Find the maximal addressing mode that a load/store of V can fold,
3160 /// give an access type of AccessTy. This returns a list of involved
3161 /// instructions in AddrModeInsts.
3162 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3163 /// optimizations.
3164 /// \p PromotedInsts maps the instructions to their type before promotion.
3165 /// \p The ongoing transaction where every action should be registered.
3166 static ExtAddrMode
3167 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3168 SmallVectorImpl<Instruction *> &AddrModeInsts,
3169 const TargetLowering &TLI, const LoopInfo &LI,
3170 const std::function<const DominatorTree &()> getDTFn,
3171 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3172 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3173 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3174 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3175 ExtAddrMode Result;
3176
3177 bool Success = AddressingModeMatcher(
3178 AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result,
3179 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
3180 BFI).matchAddr(V, 0);
3181 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3181, __PRETTY_FUNCTION__))
;
3182 return Result;
3183 }
3184
3185private:
3186 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3187 bool matchAddr(Value *Addr, unsigned Depth);
3188 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3189 bool *MovedAway = nullptr);
3190 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3191 ExtAddrMode &AMBefore,
3192 ExtAddrMode &AMAfter);
3193 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3194 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3195 Value *PromotedOperand) const;
3196};
3197
3198class PhiNodeSet;
3199
3200/// An iterator for PhiNodeSet.
3201class PhiNodeSetIterator {
3202 PhiNodeSet * const Set;
3203 size_t CurrentIndex = 0;
3204
3205public:
3206 /// The constructor. Start should point to either a valid element, or be equal
3207 /// to the size of the underlying SmallVector of the PhiNodeSet.
3208 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
3209 PHINode * operator*() const;
3210 PhiNodeSetIterator& operator++();
3211 bool operator==(const PhiNodeSetIterator &RHS) const;
3212 bool operator!=(const PhiNodeSetIterator &RHS) const;
3213};
3214
3215/// Keeps a set of PHINodes.
3216///
3217/// This is a minimal set implementation for a specific use case:
3218/// It is very fast when there are very few elements, but also provides good
3219/// performance when there are many. It is similar to SmallPtrSet, but also
3220/// provides iteration by insertion order, which is deterministic and stable
3221/// across runs. It is also similar to SmallSetVector, but provides removing
3222/// elements in O(1) time. This is achieved by not actually removing the element
3223/// from the underlying vector, so comes at the cost of using more memory, but
3224/// that is fine, since PhiNodeSets are used as short lived objects.
3225class PhiNodeSet {
3226 friend class PhiNodeSetIterator;
3227
3228 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3229 using iterator = PhiNodeSetIterator;
3230
3231 /// Keeps the elements in the order of their insertion in the underlying
3232 /// vector. To achieve constant time removal, it never deletes any element.
3233 SmallVector<PHINode *, 32> NodeList;
3234
3235 /// Keeps the elements in the underlying set implementation. This (and not the
3236 /// NodeList defined above) is the source of truth on whether an element
3237 /// is actually in the collection.
3238 MapType NodeMap;
3239
3240 /// Points to the first valid (not deleted) element when the set is not empty
3241 /// and the value is not zero. Equals to the size of the underlying vector
3242 /// when the set is empty. When the value is 0, as in the beginning, the
3243 /// first element may or may not be valid.
3244 size_t FirstValidElement = 0;
3245
3246public:
3247 /// Inserts a new element to the collection.
3248 /// \returns true if the element is actually added, i.e. was not in the
3249 /// collection before the operation.
3250 bool insert(PHINode *Ptr) {
3251 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3252 NodeList.push_back(Ptr);
3253 return true;
3254 }
3255 return false;
3256 }
3257
3258 /// Removes the element from the collection.
3259 /// \returns whether the element is actually removed, i.e. was in the
3260 /// collection before the operation.
3261 bool erase(PHINode *Ptr) {
3262 if (NodeMap.erase(Ptr)) {
3263 SkipRemovedElements(FirstValidElement);
3264 return true;
3265 }
3266 return false;
3267 }
3268
3269 /// Removes all elements and clears the collection.
3270 void clear() {
3271 NodeMap.clear();
3272 NodeList.clear();
3273 FirstValidElement = 0;
3274 }
3275
3276 /// \returns an iterator that will iterate the elements in the order of
3277 /// insertion.
3278 iterator begin() {
3279 if (FirstValidElement == 0)
3280 SkipRemovedElements(FirstValidElement);
3281 return PhiNodeSetIterator(this, FirstValidElement);
3282 }
3283
3284 /// \returns an iterator that points to the end of the collection.
3285 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3286
3287 /// Returns the number of elements in the collection.
3288 size_t size() const {
3289 return NodeMap.size();
3290 }
3291
3292 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3293 size_t count(PHINode *Ptr) const {
3294 return NodeMap.count(Ptr);
3295 }
3296
3297private:
3298 /// Updates the CurrentIndex so that it will point to a valid element.
3299 ///
3300 /// If the element of NodeList at CurrentIndex is valid, it does not
3301 /// change it. If there are no more valid elements, it updates CurrentIndex
3302 /// to point to the end of the NodeList.
3303 void SkipRemovedElements(size_t &CurrentIndex) {
3304 while (CurrentIndex < NodeList.size()) {
3305 auto it = NodeMap.find(NodeList[CurrentIndex]);
3306 // If the element has been deleted and added again later, NodeMap will
3307 // point to a different index, so CurrentIndex will still be invalid.
3308 if (it != NodeMap.end() && it->second == CurrentIndex)
3309 break;
3310 ++CurrentIndex;
3311 }
3312 }
3313};
3314
3315PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3316 : Set(Set), CurrentIndex(Start) {}
3317
3318PHINode * PhiNodeSetIterator::operator*() const {
3319 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3320, __PRETTY_FUNCTION__))
3320 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3320, __PRETTY_FUNCTION__))
;
3321 return Set->NodeList[CurrentIndex];
3322}
3323
3324PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
3325 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3326, __PRETTY_FUNCTION__))
3326 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3326, __PRETTY_FUNCTION__))
;
3327 ++CurrentIndex;
3328 Set->SkipRemovedElements(CurrentIndex);
3329 return *this;
3330}
3331
3332bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3333 return CurrentIndex == RHS.CurrentIndex;
3334}
3335
3336bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3337 return !((*this) == RHS);
3338}
3339
3340/// Keep track of simplification of Phi nodes.
3341/// Accept the set of all phi nodes and erase phi node from this set
3342/// if it is simplified.
3343class SimplificationTracker {
3344 DenseMap<Value *, Value *> Storage;
3345 const SimplifyQuery &SQ;
3346 // Tracks newly created Phi nodes. The elements are iterated by insertion
3347 // order.
3348 PhiNodeSet AllPhiNodes;
3349 // Tracks newly created Select nodes.
3350 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3351
3352public:
3353 SimplificationTracker(const SimplifyQuery &sq)
3354 : SQ(sq) {}
3355
3356 Value *Get(Value *V) {
3357 do {
3358 auto SV = Storage.find(V);
3359 if (SV == Storage.end())
3360 return V;
3361 V = SV->second;
3362 } while (true);
3363 }
3364
3365 Value *Simplify(Value *Val) {
3366 SmallVector<Value *, 32> WorkList;
3367 SmallPtrSet<Value *, 32> Visited;
3368 WorkList.push_back(Val);
3369 while (!WorkList.empty()) {
3370 auto *P = WorkList.pop_back_val();
3371 if (!Visited.insert(P).second)
3372 continue;
3373 if (auto *PI = dyn_cast<Instruction>(P))
3374 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3375 for (auto *U : PI->users())
3376 WorkList.push_back(cast<Value>(U));
3377 Put(PI, V);
3378 PI->replaceAllUsesWith(V);
3379 if (auto *PHI = dyn_cast<PHINode>(PI))
3380 AllPhiNodes.erase(PHI);
3381 if (auto *Select = dyn_cast<SelectInst>(PI))
3382 AllSelectNodes.erase(Select);
3383 PI->eraseFromParent();
3384 }
3385 }
3386 return Get(Val);
3387 }
3388
3389 void Put(Value *From, Value *To) {
3390 Storage.insert({ From, To });
3391 }
3392
3393 void ReplacePhi(PHINode *From, PHINode *To) {
3394 Value* OldReplacement = Get(From);
3395 while (OldReplacement != From) {
35
Assuming 'OldReplacement' is not equal to 'From'
36
Loop condition is true. Entering loop body
39
Assuming 'OldReplacement' is not equal to 'From'
40
Loop condition is true. Entering loop body
43
Assuming 'OldReplacement' is equal to 'From'
44
Loop condition is false. Execution continues on line 3400
3396 From = To;
41
Null pointer value stored to 'From'
3397 To = dyn_cast<PHINode>(OldReplacement);
37
Assuming 'OldReplacement' is not a 'PHINode'
38
Null pointer value stored to 'To'
42
Assuming 'OldReplacement' is a 'PHINode'
3398 OldReplacement = Get(From);
3399 }
3400 assert(To && Get(To) == To && "Replacement PHI node is already replaced.")((To && Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("To && Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3400, __PRETTY_FUNCTION__))
;
45
Assuming the condition is true
46
'?' condition is true
3401 Put(From, To);
3402 From->replaceAllUsesWith(To);
47
Called C++ object pointer is null
3403 AllPhiNodes.erase(From);
3404 From->eraseFromParent();
3405 }
3406
3407 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3408
3409 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3410
3411 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3412
3413 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3414
3415 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3416
3417 void destroyNewNodes(Type *CommonType) {
3418 // For safe erasing, replace the uses with dummy value first.
3419 auto *Dummy = UndefValue::get(CommonType);
3420 for (auto *I : AllPhiNodes) {
3421 I->replaceAllUsesWith(Dummy);
3422 I->eraseFromParent();
3423 }
3424 AllPhiNodes.clear();
3425 for (auto *I : AllSelectNodes) {
3426 I->replaceAllUsesWith(Dummy);
3427 I->eraseFromParent();
3428 }
3429 AllSelectNodes.clear();
3430 }
3431};
3432
3433/// A helper class for combining addressing modes.
3434class AddressingModeCombiner {
3435 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3436 typedef std::pair<PHINode *, PHINode *> PHIPair;
3437
3438private:
3439 /// The addressing modes we've collected.
3440 SmallVector<ExtAddrMode, 16> AddrModes;
3441
3442 /// The field in which the AddrModes differ, when we have more than one.
3443 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3444
3445 /// Are the AddrModes that we have all just equal to their original values?
3446 bool AllAddrModesTrivial = true;
3447
3448 /// Common Type for all different fields in addressing modes.
3449 Type *CommonType;
3450
3451 /// SimplifyQuery for simplifyInstruction utility.
3452 const SimplifyQuery &SQ;
3453
3454 /// Original Address.
3455 Value *Original;
3456
3457public:
3458 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3459 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3460
3461 /// Get the combined AddrMode
3462 const ExtAddrMode &getAddrMode() const {
3463 return AddrModes[0];
3464 }
3465
3466 /// Add a new AddrMode if it's compatible with the AddrModes we already
3467 /// have.
3468 /// \return True iff we succeeded in doing so.
3469 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3470 // Take note of if we have any non-trivial AddrModes, as we need to detect
3471 // when all AddrModes are trivial as then we would introduce a phi or select
3472 // which just duplicates what's already there.
3473 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3474
3475 // If this is the first addrmode then everything is fine.
3476 if (AddrModes.empty()) {
3477 AddrModes.emplace_back(NewAddrMode);
3478 return true;
3479 }
3480
3481 // Figure out how different this is from the other address modes, which we
3482 // can do just by comparing against the first one given that we only care
3483 // about the cumulative difference.
3484 ExtAddrMode::FieldName ThisDifferentField =
3485 AddrModes[0].compare(NewAddrMode);
3486 if (DifferentField == ExtAddrMode::NoField)
3487 DifferentField = ThisDifferentField;
3488 else if (DifferentField != ThisDifferentField)
3489 DifferentField = ExtAddrMode::MultipleFields;
3490
3491 // If NewAddrMode differs in more than one dimension we cannot handle it.
3492 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3493
3494 // If Scale Field is different then we reject.
3495 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3496
3497 // We also must reject the case when base offset is different and
3498 // scale reg is not null, we cannot handle this case due to merge of
3499 // different offsets will be used as ScaleReg.
3500 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3501 !NewAddrMode.ScaledReg);
3502
3503 // We also must reject the case when GV is different and BaseReg installed
3504 // due to we want to use base reg as a merge of GV values.
3505 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3506 !NewAddrMode.HasBaseReg);
3507
3508 // Even if NewAddMode is the same we still need to collect it due to
3509 // original value is different. And later we will need all original values
3510 // as anchors during finding the common Phi node.
3511 if (CanHandle)
3512 AddrModes.emplace_back(NewAddrMode);
3513 else
3514 AddrModes.clear();
3515
3516 return CanHandle;
3517 }
3518
3519 /// Combine the addressing modes we've collected into a single
3520 /// addressing mode.
3521 /// \return True iff we successfully combined them or we only had one so
3522 /// didn't need to combine them anyway.
3523 bool combineAddrModes() {
3524 // If we have no AddrModes then they can't be combined.
3525 if (AddrModes.size() == 0)
13
Assuming the condition is false
14
Taking false branch
3526 return false;
3527
3528 // A single AddrMode can trivially be combined.
3529 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
15
Assuming the condition is false
16
Assuming field 'DifferentField' is not equal to NoField
17
Taking false branch
3530 return true;
3531
3532 // If the AddrModes we collected are all just equal to the value they are
3533 // derived from then combining them wouldn't do anything useful.
3534 if (AllAddrModesTrivial)
18
Assuming field 'AllAddrModesTrivial' is false
19
Taking false branch
3535 return false;
3536
3537 if (!addrModeCombiningAllowed())
20
Assuming the condition is false
21
Taking false branch
3538 return false;
3539
3540 // Build a map between <original value, basic block where we saw it> to
3541 // value of base register.
3542 // Bail out if there is no common type.
3543 FoldAddrToValueMapping Map;
3544 if (!initializeMap(Map))
22
Taking false branch
3545 return false;
3546
3547 Value *CommonValue = findCommon(Map);
23
Calling 'AddressingModeCombiner::findCommon'
3548 if (CommonValue)
3549 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3550 return CommonValue != nullptr;
3551 }
3552
3553private:
3554 /// Initialize Map with anchor values. For address seen
3555 /// we set the value of different field saw in this address.
3556 /// At the same time we find a common type for different field we will
3557 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3558 /// Return false if there is no common type found.
3559 bool initializeMap(FoldAddrToValueMapping &Map) {
3560 // Keep track of keys where the value is null. We will need to replace it
3561 // with constant null when we know the common type.
3562 SmallVector<Value *, 2> NullValue;
3563 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3564 for (auto &AM : AddrModes) {
3565 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3566 if (DV) {
3567 auto *Type = DV->getType();
3568 if (CommonType && CommonType != Type)
3569 return false;
3570 CommonType = Type;
3571 Map[AM.OriginalValue] = DV;
3572 } else {
3573 NullValue.push_back(AM.OriginalValue);
3574 }
3575 }
3576 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3576, __PRETTY_FUNCTION__))
;
3577 for (auto *V : NullValue)
3578 Map[V] = Constant::getNullValue(CommonType);
3579 return true;
3580 }
3581
3582 /// We have mapping between value A and other value B where B was a field in
3583 /// addressing mode represented by A. Also we have an original value C
3584 /// representing an address we start with. Traversing from C through phi and
3585 /// selects we ended up with A's in a map. This utility function tries to find
3586 /// a value V which is a field in addressing mode C and traversing through phi
3587 /// nodes and selects we will end up in corresponded values B in a map.
3588 /// The utility will create a new Phi/Selects if needed.
3589 // The simple example looks as follows:
3590 // BB1:
3591 // p1 = b1 + 40
3592 // br cond BB2, BB3
3593 // BB2:
3594 // p2 = b2 + 40
3595 // br BB3
3596 // BB3:
3597 // p = phi [p1, BB1], [p2, BB2]
3598 // v = load p
3599 // Map is
3600 // p1 -> b1
3601 // p2 -> b2
3602 // Request is
3603 // p -> ?
3604 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3605 Value *findCommon(FoldAddrToValueMapping &Map) {
3606 // Tracks the simplification of newly created phi nodes. The reason we use
3607 // this mapping is because we will add new created Phi nodes in AddrToBase.
3608 // Simplification of Phi nodes is recursive, so some Phi node may
3609 // be simplified after we added it to AddrToBase. In reality this
3610 // simplification is possible only if original phi/selects were not
3611 // simplified yet.
3612 // Using this mapping we can find the current value in AddrToBase.
3613 SimplificationTracker ST(SQ);
3614
3615 // First step, DFS to create PHI nodes for all intermediate blocks.
3616 // Also fill traverse order for the second step.
3617 SmallVector<Value *, 32> TraverseOrder;
3618 InsertPlaceholders(Map, TraverseOrder, ST);
3619
3620 // Second Step, fill new nodes by merged values and simplify if possible.
3621 FillPlaceholders(Map, TraverseOrder, ST);
3622
3623 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
24
Assuming the condition is false
3624 ST.destroyNewNodes(CommonType);
3625 return nullptr;
3626 }
3627
3628 // Now we'd like to match New Phi nodes to existed ones.
3629 unsigned PhiNotMatchedCount = 0;
3630 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
25
Calling 'AddressingModeCombiner::MatchPhiSet'
3631 ST.destroyNewNodes(CommonType);
3632 return nullptr;
3633 }
3634
3635 auto *Result = ST.Get(Map.find(Original)->second);
3636 if (Result) {
3637 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3638 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3639 }
3640 return Result;
3641 }
3642
3643 /// Try to match PHI node to Candidate.
3644 /// Matcher tracks the matched Phi nodes.
3645 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3646 SmallSetVector<PHIPair, 8> &Matcher,
3647 PhiNodeSet &PhiNodesToMatch) {
3648 SmallVector<PHIPair, 8> WorkList;
3649 Matcher.insert({ PHI, Candidate });
3650 SmallSet<PHINode *, 8> MatchedPHIs;
3651 MatchedPHIs.insert(PHI);
3652 WorkList.push_back({ PHI, Candidate });
3653 SmallSet<PHIPair, 8> Visited;
3654 while (!WorkList.empty()) {
3655 auto Item = WorkList.pop_back_val();
3656 if (!Visited.insert(Item).second)
3657 continue;
3658 // We iterate over all incoming values to Phi to compare them.
3659 // If values are different and both of them Phi and the first one is a
3660 // Phi we added (subject to match) and both of them is in the same basic
3661 // block then we can match our pair if values match. So we state that
3662 // these values match and add it to work list to verify that.
3663 for (auto B : Item.first->blocks()) {
3664 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3665 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3666 if (FirstValue == SecondValue)
3667 continue;
3668
3669 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3670 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3671
3672 // One of them is not Phi or
3673 // The first one is not Phi node from the set we'd like to match or
3674 // Phi nodes from different basic blocks then
3675 // we will not be able to match.
3676 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3677 FirstPhi->getParent() != SecondPhi->getParent())
3678 return false;
3679
3680 // If we already matched them then continue.
3681 if (Matcher.count({ FirstPhi, SecondPhi }))
3682 continue;
3683 // So the values are different and does not match. So we need them to
3684 // match. (But we register no more than one match per PHI node, so that
3685 // we won't later try to replace them twice.)
3686 if (MatchedPHIs.insert(FirstPhi).second)
3687 Matcher.insert({ FirstPhi, SecondPhi });
3688 // But me must check it.
3689 WorkList.push_back({ FirstPhi, SecondPhi });
3690 }
3691 }
3692 return true;
3693 }
3694
3695 /// For the given set of PHI nodes (in the SimplificationTracker) try
3696 /// to find their equivalents.
3697 /// Returns false if this matching fails and creation of new Phi is disabled.
3698 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3699 unsigned &PhiNotMatchedCount) {
3700 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3701 // order, so the replacements (ReplacePhi) are also done in a deterministic
3702 // order.
3703 SmallSetVector<PHIPair, 8> Matched;
3704 SmallPtrSet<PHINode *, 8> WillNotMatch;
3705 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3706 while (PhiNodesToMatch.size()) {
26
Loop condition is true. Entering loop body
3707 PHINode *PHI = *PhiNodesToMatch.begin();
3708
3709 // Add us, if no Phi nodes in the basic block we do not match.
3710 WillNotMatch.clear();
3711 WillNotMatch.insert(PHI);
3712
3713 // Traverse all Phis until we found equivalent or fail to do that.
3714 bool IsMatched = false;
3715 for (auto &P : PHI->getParent()->phis()) {
3716 if (&P == PHI)
27
Assuming the condition is false
28
Taking false branch
3717 continue;
3718 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
29
Assuming 'IsMatched' is true
30
Taking true branch
3719 break;
31
Execution continues on line 3727
3720 // If it does not match, collect all Phi nodes from matcher.
3721 // if we end up with no match, them all these Phi nodes will not match
3722 // later.
3723 for (auto M : Matched)
3724 WillNotMatch.insert(M.first);
3725 Matched.clear();
3726 }
3727 if (IsMatched
31.1
'IsMatched' is true
) {
32
Taking true branch
3728 // Replace all matched values and erase them.
3729 for (auto MV : Matched)
33
Assuming '__begin4' is not equal to '__end4'
3730 ST.ReplacePhi(MV.first, MV.second);
34
Calling 'SimplificationTracker::ReplacePhi'
3731 Matched.clear();
3732 continue;
3733 }
3734 // If we are not allowed to create new nodes then bail out.
3735 if (!AllowNewPhiNodes)
3736 return false;
3737 // Just remove all seen values in matcher. They will not match anything.
3738 PhiNotMatchedCount += WillNotMatch.size();
3739 for (auto *P : WillNotMatch)
3740 PhiNodesToMatch.erase(P);
3741 }
3742 return true;
3743 }
3744 /// Fill the placeholders with values from predecessors and simplify them.
3745 void FillPlaceholders(FoldAddrToValueMapping &Map,
3746 SmallVectorImpl<Value *> &TraverseOrder,
3747 SimplificationTracker &ST) {
3748 while (!TraverseOrder.empty()) {
3749 Value *Current = TraverseOrder.pop_back_val();
3750 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3750, __PRETTY_FUNCTION__))
;
3751 Value *V = Map[Current];
3752
3753 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3754 // CurrentValue also must be Select.
3755 auto *CurrentSelect = cast<SelectInst>(Current);
3756 auto *TrueValue = CurrentSelect->getTrueValue();
3757 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3757, __PRETTY_FUNCTION__))
;
3758 Select->setTrueValue(ST.Get(Map[TrueValue]));
3759 auto *FalseValue = CurrentSelect->getFalseValue();
3760 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3760, __PRETTY_FUNCTION__))
;
3761 Select->setFalseValue(ST.Get(Map[FalseValue]));
3762 } else {
3763 // Must be a Phi node then.
3764 auto *PHI = cast<PHINode>(V);
3765 // Fill the Phi node with values from predecessors.
3766 for (auto *B : predecessors(PHI->getParent())) {
3767 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
3768 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3768, __PRETTY_FUNCTION__))
;
3769 PHI->addIncoming(ST.Get(Map[PV]), B);
3770 }
3771 }
3772 Map[Current] = ST.Simplify(V);
3773 }
3774 }
3775
3776 /// Starting from original value recursively iterates over def-use chain up to
3777 /// known ending values represented in a map. For each traversed phi/select
3778 /// inserts a placeholder Phi or Select.
3779 /// Reports all new created Phi/Select nodes by adding them to set.
3780 /// Also reports and order in what values have been traversed.
3781 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3782 SmallVectorImpl<Value *> &TraverseOrder,
3783 SimplificationTracker &ST) {
3784 SmallVector<Value *, 32> Worklist;
3785 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3786, __PRETTY_FUNCTION__))
3786 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3786, __PRETTY_FUNCTION__))
;
3787 auto *Dummy = UndefValue::get(CommonType);
3788 Worklist.push_back(Original);
3789 while (!Worklist.empty()) {
3790 Value *Current = Worklist.pop_back_val();
3791 // if it is already visited or it is an ending value then skip it.
3792 if (Map.find(Current) != Map.end())
3793 continue;
3794 TraverseOrder.push_back(Current);
3795
3796 // CurrentValue must be a Phi node or select. All others must be covered
3797 // by anchors.
3798 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3799 // Is it OK to get metadata from OrigSelect?!
3800 // Create a Select placeholder with dummy value.
3801 SelectInst *Select = SelectInst::Create(
3802 CurrentSelect->getCondition(), Dummy, Dummy,
3803 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3804 Map[Current] = Select;
3805 ST.insertNewSelect(Select);
3806 // We are interested in True and False values.
3807 Worklist.push_back(CurrentSelect->getTrueValue());
3808 Worklist.push_back(CurrentSelect->getFalseValue());
3809 } else {
3810 // It must be a Phi node then.
3811 PHINode *CurrentPhi = cast<PHINode>(Current);
3812 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3813 PHINode *PHI =
3814 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3815 Map[Current] = PHI;
3816 ST.insertNewPhi(PHI);
3817 append_range(Worklist, CurrentPhi->incoming_values());
3818 }
3819 }
3820 }
3821
3822 bool addrModeCombiningAllowed() {
3823 if (DisableComplexAddrModes)
3824 return false;
3825 switch (DifferentField) {
3826 default:
3827 return false;
3828 case ExtAddrMode::BaseRegField:
3829 return AddrSinkCombineBaseReg;
3830 case ExtAddrMode::BaseGVField:
3831 return AddrSinkCombineBaseGV;
3832 case ExtAddrMode::BaseOffsField:
3833 return AddrSinkCombineBaseOffs;
3834 case ExtAddrMode::ScaledRegField:
3835 return AddrSinkCombineScaledReg;
3836 }
3837 }
3838};
3839} // end anonymous namespace
3840
3841/// Try adding ScaleReg*Scale to the current addressing mode.
3842/// Return true and update AddrMode if this addr mode is legal for the target,
3843/// false if not.
3844bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3845 unsigned Depth) {
3846 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3847 // mode. Just process that directly.
3848 if (Scale == 1)
3849 return matchAddr(ScaleReg, Depth);
3850
3851 // If the scale is 0, it takes nothing to add this.
3852 if (Scale == 0)
3853 return true;
3854
3855 // If we already have a scale of this value, we can add to it, otherwise, we
3856 // need an available scale field.
3857 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3858 return false;
3859
3860 ExtAddrMode TestAddrMode = AddrMode;
3861
3862 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3863 // [A+B + A*7] -> [B+A*8].
3864 TestAddrMode.Scale += Scale;
3865 TestAddrMode.ScaledReg = ScaleReg;
3866
3867 // If the new address isn't legal, bail out.
3868 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3869 return false;
3870
3871 // It was legal, so commit it.
3872 AddrMode = TestAddrMode;
3873
3874 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3875 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3876 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
3877 // go any further: we can reuse it and cannot eliminate it.
3878 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3879 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3880 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
3881 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
3882 TestAddrMode.InBounds = false;
3883 TestAddrMode.ScaledReg = AddLHS;
3884 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
3885
3886 // If this addressing mode is legal, commit it and remember that we folded
3887 // this instruction.
3888 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3889 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3890 AddrMode = TestAddrMode;
3891 return true;
3892 }
3893 // Restore status quo.
3894 TestAddrMode = AddrMode;
3895 }
3896
3897 // If this is an add recurrence with a constant step, return the increment
3898 // instruction and the canonicalized step.
3899 auto GetConstantStep = [this](const Value * V)
3900 ->Optional<std::pair<Instruction *, APInt> > {
3901 auto *PN = dyn_cast<PHINode>(V);
3902 if (!PN)
3903 return None;
3904 auto IVInc = getIVIncrement(PN, &LI);
3905 if (!IVInc)
3906 return None;
3907 // TODO: The result of the intrinsics above is two-compliment. However when
3908 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
3909 // If it has nuw or nsw flags, we need to make sure that these flags are
3910 // inferrable at the point of memory instruction. Otherwise we are replacing
3911 // well-defined two-compliment computation with poison. Currently, to avoid
3912 // potentially complex analysis needed to prove this, we reject such cases.
3913 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
3914 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
3915 return None;
3916 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
3917 return std::make_pair(IVInc->first, ConstantStep->getValue());
3918 return None;
3919 };
3920
3921 // Try to account for the following special case:
3922 // 1. ScaleReg is an inductive variable;
3923 // 2. We use it with non-zero offset;
3924 // 3. IV's increment is available at the point of memory instruction.
3925 //
3926 // In this case, we may reuse the IV increment instead of the IV Phi to
3927 // achieve the following advantages:
3928 // 1. If IV step matches the offset, we will have no need in the offset;
3929 // 2. Even if they don't match, we will reduce the overlap of living IV
3930 // and IV increment, that will potentially lead to better register
3931 // assignment.
3932 if (AddrMode.BaseOffs) {
3933 if (auto IVStep = GetConstantStep(ScaleReg)) {
3934 Instruction *IVInc = IVStep->first;
3935 // The following assert is important to ensure a lack of infinite loops.
3936 // This transforms is (intentionally) the inverse of the one just above.
3937 // If they don't agree on the definition of an increment, we'd alternate
3938 // back and forth indefinitely.
3939 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep")((isIVIncrement(IVInc, &LI) && "implied by GetConstantStep"
) ? static_cast<void> (0) : __assert_fail ("isIVIncrement(IVInc, &LI) && \"implied by GetConstantStep\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 3939, __PRETTY_FUNCTION__))
;
3940 APInt Step = IVStep->second;
3941 APInt Offset = Step * AddrMode.Scale;
3942 if (Offset.isSignedIntN(64)) {
3943 TestAddrMode.InBounds = false;
3944 TestAddrMode.ScaledReg = IVInc;
3945 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
3946 // If this addressing mode is legal, commit it..
3947 // (Note that we defer the (expensive) domtree base legality check
3948 // to the very last possible point.)
3949 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
3950 getDTFn().dominates(IVInc, MemoryInst)) {
3951 AddrModeInsts.push_back(cast<Instruction>(IVInc));
3952 AddrMode = TestAddrMode;
3953 return true;
3954 }
3955 // Restore status quo.
3956 TestAddrMode = AddrMode;
3957 }
3958 }
3959 }
3960
3961 // Otherwise, just return what we have.
3962 return true;
3963}
3964
3965/// This is a little filter, which returns true if an addressing computation
3966/// involving I might be folded into a load/store accessing it.
3967/// This doesn't need to be perfect, but needs to accept at least
3968/// the set of instructions that MatchOperationAddr can.
3969static bool MightBeFoldableInst(Instruction *I) {
3970 switch (I->getOpcode()) {
3971 case Instruction::BitCast:
3972 case Instruction::AddrSpaceCast:
3973 // Don't touch identity bitcasts.
3974 if (I->getType() == I->getOperand(0)->getType())
3975 return false;
3976 return I->getType()->isIntOrPtrTy();
3977 case Instruction::PtrToInt:
3978 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3979 return true;
3980 case Instruction::IntToPtr:
3981 // We know the input is intptr_t, so this is foldable.
3982 return true;
3983 case Instruction::Add:
3984 return true;
3985 case Instruction::Mul:
3986 case Instruction::Shl:
3987 // Can only handle X*C and X << C.
3988 return isa<ConstantInt>(I->getOperand(1));
3989 case Instruction::GetElementPtr:
3990 return true;
3991 default:
3992 return false;
3993 }
3994}
3995
3996/// Check whether or not \p Val is a legal instruction for \p TLI.
3997/// \note \p Val is assumed to be the product of some type promotion.
3998/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3999/// to be legal, as the non-promoted value would have had the same state.
4000static bool isPromotedInstructionLegal(const TargetLowering &TLI,
4001 const DataLayout &DL, Value *Val) {
4002 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4003 if (!PromotedInst)
4004 return false;
4005 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4006 // If the ISDOpcode is undefined, it was undefined before the promotion.
4007 if (!ISDOpcode)
4008 return true;
4009 // Otherwise, check if the promoted instruction is legal or not.
4010 return TLI.isOperationLegalOrCustom(
4011 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4012}
4013
4014namespace {
4015
4016/// Hepler class to perform type promotion.
4017class TypePromotionHelper {
4018 /// Utility function to add a promoted instruction \p ExtOpnd to
4019 /// \p PromotedInsts and record the type of extension we have seen.
4020 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4021 Instruction *ExtOpnd,
4022 bool IsSExt) {
4023 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4024 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4025 if (It != PromotedInsts.end()) {
4026 // If the new extension is same as original, the information in
4027 // PromotedInsts[ExtOpnd] is still correct.
4028 if (It->second.getInt() == ExtTy)
4029 return;
4030
4031 // Now the new extension is different from old extension, we make
4032 // the type information invalid by setting extension type to
4033 // BothExtension.
4034 ExtTy = BothExtension;
4035 }
4036 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4037 }
4038
4039 /// Utility function to query the original type of instruction \p Opnd
4040 /// with a matched extension type. If the extension doesn't match, we
4041 /// cannot use the information we had on the original type.
4042 /// BothExtension doesn't match any extension type.
4043 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4044 Instruction *Opnd,
4045 bool IsSExt) {
4046 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4047 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4048 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4049 return It->second.getPointer();
4050 return nullptr;
4051 }
4052
4053 /// Utility function to check whether or not a sign or zero extension
4054 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4055 /// either using the operands of \p Inst or promoting \p Inst.
4056 /// The type of the extension is defined by \p IsSExt.
4057 /// In other words, check if:
4058 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4059 /// #1 Promotion applies:
4060 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4061 /// #2 Operand reuses:
4062 /// ext opnd1 to ConsideredExtType.
4063 /// \p PromotedInsts maps the instructions to their type before promotion.
4064 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4065 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4066
4067 /// Utility function to determine if \p OpIdx should be promoted when
4068 /// promoting \p Inst.
4069 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4070 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4071 }
4072
4073 /// Utility function to promote the operand of \p Ext when this
4074 /// operand is a promotable trunc or sext or zext.
4075 /// \p PromotedInsts maps the instructions to their type before promotion.
4076 /// \p CreatedInstsCost[out] contains the cost of all instructions
4077 /// created to promote the operand of Ext.
4078 /// Newly added extensions are inserted in \p Exts.
4079 /// Newly added truncates are inserted in \p Truncs.
4080 /// Should never be called directly.
4081 /// \return The promoted value which is used instead of Ext.
4082 static Value *promoteOperandForTruncAndAnyExt(
4083 Instruction *Ext, TypePromotionTransaction &TPT,
4084 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4085 SmallVectorImpl<Instruction *> *Exts,
4086 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4087
4088 /// Utility function to promote the operand of \p Ext when this
4089 /// operand is promotable and is not a supported trunc or sext.
4090 /// \p PromotedInsts maps the instructions to their type before promotion.
4091 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4092 /// created to promote the operand of Ext.
4093 /// Newly added extensions are inserted in \p Exts.
4094 /// Newly added truncates are inserted in \p Truncs.
4095 /// Should never be called directly.
4096 /// \return The promoted value which is used instead of Ext.
4097 static Value *promoteOperandForOther(Instruction *Ext,
4098 TypePromotionTransaction &TPT,
4099 InstrToOrigTy &PromotedInsts,
4100 unsigned &CreatedInstsCost,
4101 SmallVectorImpl<Instruction *> *Exts,
4102 SmallVectorImpl<Instruction *> *Truncs,
4103 const TargetLowering &TLI, bool IsSExt);
4104
4105 /// \see promoteOperandForOther.
4106 static Value *signExtendOperandForOther(
4107 Instruction *Ext, TypePromotionTransaction &TPT,
4108 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4109 SmallVectorImpl<Instruction *> *Exts,
4110 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4111 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4112 Exts, Truncs, TLI, true);
4113 }
4114
4115 /// \see promoteOperandForOther.
4116 static Value *zeroExtendOperandForOther(
4117 Instruction *Ext, TypePromotionTransaction &TPT,
4118 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4119 SmallVectorImpl<Instruction *> *Exts,
4120 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4121 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4122 Exts, Truncs, TLI, false);
4123 }
4124
4125public:
4126 /// Type for the utility function that promotes the operand of Ext.
4127 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4128 InstrToOrigTy &PromotedInsts,
4129 unsigned &CreatedInstsCost,
4130 SmallVectorImpl<Instruction *> *Exts,
4131 SmallVectorImpl<Instruction *> *Truncs,
4132 const TargetLowering &TLI);
4133
4134 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4135 /// action to promote the operand of \p Ext instead of using Ext.
4136 /// \return NULL if no promotable action is possible with the current
4137 /// sign extension.
4138 /// \p InsertedInsts keeps track of all the instructions inserted by the
4139 /// other CodeGenPrepare optimizations. This information is important
4140 /// because we do not want to promote these instructions as CodeGenPrepare
4141 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4142 /// \p PromotedInsts maps the instructions to their type before promotion.
4143 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4144 const TargetLowering &TLI,
4145 const InstrToOrigTy &PromotedInsts);
4146};
4147
4148} // end anonymous namespace
4149
4150bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4151 Type *ConsideredExtType,
4152 const InstrToOrigTy &PromotedInsts,
4153 bool IsSExt) {
4154 // The promotion helper does not know how to deal with vector types yet.
4155 // To be able to fix that, we would need to fix the places where we
4156 // statically extend, e.g., constants and such.
4157 if (Inst->getType()->isVectorTy())
4158 return false;
4159
4160 // We can always get through zext.
4161 if (isa<ZExtInst>(Inst))
4162 return true;
4163
4164 // sext(sext) is ok too.
4165 if (IsSExt && isa<SExtInst>(Inst))
4166 return true;
4167
4168 // We can get through binary operator, if it is legal. In other words, the
4169 // binary operator must have a nuw or nsw flag.
4170 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
4171 if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) &&
4172 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4173 (IsSExt && BinOp->hasNoSignedWrap())))
4174 return true;
4175
4176 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4177 if ((Inst->getOpcode() == Instruction::And ||
4178 Inst->getOpcode() == Instruction::Or))
4179 return true;
4180
4181 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4182 if (Inst->getOpcode() == Instruction::Xor) {
4183 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
4184 // Make sure it is not a NOT.
4185 if (Cst && !Cst->getValue().isAllOnesValue())
4186 return true;
4187 }
4188
4189 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4190 // It may change a poisoned value into a regular value, like
4191 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4192 // poisoned value regular value
4193 // It should be OK since undef covers valid value.
4194 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4195 return true;
4196
4197 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4198 // It may change a poisoned value into a regular value, like
4199 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4200 // poisoned value regular value
4201 // It should be OK since undef covers valid value.
4202 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4203 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4204 if (ExtInst->hasOneUse()) {
4205 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4206 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4207 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4208 if (Cst &&
4209 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4210 return true;
4211 }
4212 }
4213 }
4214
4215 // Check if we can do the following simplification.
4216 // ext(trunc(opnd)) --> ext(opnd)
4217 if (!isa<TruncInst>(Inst))
4218 return false;
4219
4220 Value *OpndVal = Inst->getOperand(0);
4221 // Check if we can use this operand in the extension.
4222 // If the type is larger than the result type of the extension, we cannot.
4223 if (!OpndVal->getType()->isIntegerTy() ||
4224 OpndVal->getType()->getIntegerBitWidth() >
4225 ConsideredExtType->getIntegerBitWidth())
4226 return false;
4227
4228 // If the operand of the truncate is not an instruction, we will not have
4229 // any information on the dropped bits.
4230 // (Actually we could for constant but it is not worth the extra logic).
4231 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4232 if (!Opnd)
4233 return false;
4234
4235 // Check if the source of the type is narrow enough.
4236 // I.e., check that trunc just drops extended bits of the same kind of
4237 // the extension.
4238 // #1 get the type of the operand and check the kind of the extended bits.
4239 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4240 if (OpndType)
4241 ;
4242 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4243 OpndType = Opnd->getOperand(0)->getType();
4244 else
4245 return false;
4246
4247 // #2 check that the truncate just drops extended bits.
4248 return Inst->getType()->getIntegerBitWidth() >=
4249 OpndType->getIntegerBitWidth();
4250}
4251
4252TypePromotionHelper::Action TypePromotionHelper::getAction(
4253 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4254 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4255 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4256, __PRETTY_FUNCTION__))
4256 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4256, __PRETTY_FUNCTION__))
;
4257 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4258 Type *ExtTy = Ext->getType();
4259 bool IsSExt = isa<SExtInst>(Ext);
4260 // If the operand of the extension is not an instruction, we cannot
4261 // get through.
4262 // If it, check we can get through.
4263 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4264 return nullptr;
4265
4266 // Do not promote if the operand has been added by codegenprepare.
4267 // Otherwise, it means we are undoing an optimization that is likely to be
4268 // redone, thus causing potential infinite loop.
4269 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4270 return nullptr;
4271
4272 // SExt or Trunc instructions.
4273 // Return the related handler.
4274 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4275 isa<ZExtInst>(ExtOpnd))
4276 return promoteOperandForTruncAndAnyExt;
4277
4278 // Regular instruction.
4279 // Abort early if we will have to insert non-free instructions.
4280 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4281 return nullptr;
4282 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4283}
4284
4285Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4286 Instruction *SExt, TypePromotionTransaction &TPT,
4287 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4288 SmallVectorImpl<Instruction *> *Exts,
4289 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4290 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4291 // get through it and this method should not be called.
4292 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4293 Value *ExtVal = SExt;
4294 bool HasMergedNonFreeExt = false;
4295 if (isa<ZExtInst>(SExtOpnd)) {
4296 // Replace s|zext(zext(opnd))
4297 // => zext(opnd).
4298 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4299 Value *ZExt =
4300 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4301 TPT.replaceAllUsesWith(SExt, ZExt);
4302 TPT.eraseInstruction(SExt);
4303 ExtVal = ZExt;
4304 } else {
4305 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4306 // => z|sext(opnd).
4307 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4308 }
4309 CreatedInstsCost = 0;
4310
4311 // Remove dead code.
4312 if (SExtOpnd->use_empty())
4313 TPT.eraseInstruction(SExtOpnd);
4314
4315 // Check if the extension is still needed.
4316 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4317 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4318 if (ExtInst) {
4319 if (Exts)
4320 Exts->push_back(ExtInst);
4321 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4322 }
4323 return ExtVal;
4324 }
4325
4326 // At this point we have: ext ty opnd to ty.
4327 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4328 Value *NextVal = ExtInst->getOperand(0);
4329 TPT.eraseInstruction(ExtInst, NextVal);
4330 return NextVal;
4331}
4332
4333Value *TypePromotionHelper::promoteOperandForOther(
4334 Instruction *Ext, TypePromotionTransaction &TPT,
4335 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4336 SmallVectorImpl<Instruction *> *Exts,
4337 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
4338 bool IsSExt) {
4339 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4340 // get through it and this method should not be called.
4341 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4342 CreatedInstsCost = 0;
4343 if (!ExtOpnd->hasOneUse()) {
4344 // ExtOpnd will be promoted.
4345 // All its uses, but Ext, will need to use a truncated value of the
4346 // promoted version.
4347 // Create the truncate now.
4348 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4349 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4350 // Insert it just after the definition.
4351 ITrunc->moveAfter(ExtOpnd);
4352 if (Truncs)
4353 Truncs->push_back(ITrunc);
4354 }
4355
4356 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4357 // Restore the operand of Ext (which has been replaced by the previous call
4358 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4359 TPT.setOperand(Ext, 0, ExtOpnd);
4360 }
4361
4362 // Get through the Instruction:
4363 // 1. Update its type.
4364 // 2. Replace the uses of Ext by Inst.
4365 // 3. Extend each operand that needs to be extended.
4366
4367 // Remember the original type of the instruction before promotion.
4368 // This is useful to know that the high bits are sign extended bits.
4369 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4370 // Step #1.
4371 TPT.mutateType(ExtOpnd, Ext->getType());
4372 // Step #2.
4373 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4374 // Step #3.
4375 Instruction *ExtForOpnd = Ext;
4376
4377 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
4378 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4379 ++OpIdx) {
4380 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
4381 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4382 !shouldExtOperand(ExtOpnd, OpIdx)) {
4383 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
4384 continue;
4385 }
4386 // Check if we can statically extend the operand.
4387 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4388 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4389 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4390 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4391 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4392 : Cst->getValue().zext(BitWidth);
4393 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4394 continue;
4395 }
4396 // UndefValue are typed, so we have to statically sign extend them.
4397 if (isa<UndefValue>(Opnd)) {
4398 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
4399 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4400 continue;
4401 }
4402
4403 // Otherwise we have to explicitly sign extend the operand.
4404 // Check if Ext was reused to extend an operand.
4405 if (!ExtForOpnd) {
4406 // If yes, create a new one.
4407 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
4408 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
4409 : TPT.createZExt(Ext, Opnd, Ext->getType());
4410 if (!isa<Instruction>(ValForExtOpnd)) {
4411 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4412 continue;
4413 }
4414 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4415 }
4416 if (Exts)
4417 Exts->push_back(ExtForOpnd);
4418 TPT.setOperand(ExtForOpnd, 0, Opnd);
4419
4420 // Move the sign extension before the insertion point.
4421 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4422 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4423 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
4424 // If more sext are required, new instructions will have to be created.
4425 ExtForOpnd = nullptr;
4426 }
4427 if (ExtForOpnd == Ext) {
4428 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
4429 TPT.eraseInstruction(Ext);
4430 }
4431 return ExtOpnd;
4432}
4433
4434/// Check whether or not promoting an instruction to a wider type is profitable.
4435/// \p NewCost gives the cost of extension instructions created by the
4436/// promotion.
4437/// \p OldCost gives the cost of extension instructions before the promotion
4438/// plus the number of instructions that have been
4439/// matched in the addressing mode the promotion.
4440/// \p PromotedOperand is the value that has been promoted.
4441/// \return True if the promotion is profitable, false otherwise.
4442bool AddressingModeMatcher::isPromotionProfitable(
4443 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4444 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4445 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4446 // The cost of the new extensions is greater than the cost of the
4447 // old extension plus what we folded.
4448 // This is not profitable.
4449 if (NewCost > OldCost)
4450 return false;
4451 if (NewCost < OldCost)
4452 return true;
4453 // The promotion is neutral but it may help folding the sign extension in
4454 // loads for instance.
4455 // Check that we did not create an illegal instruction.
4456 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4457}
4458
4459/// Given an instruction or constant expr, see if we can fold the operation
4460/// into the addressing mode. If so, update the addressing mode and return
4461/// true, otherwise return false without modifying AddrMode.
4462/// If \p MovedAway is not NULL, it contains the information of whether or
4463/// not AddrInst has to be folded into the addressing mode on success.
4464/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4465/// because it has been moved away.
4466/// Thus AddrInst must not be added in the matched instructions.
4467/// This state can happen when AddrInst is a sext, since it may be moved away.
4468/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4469/// not be referenced anymore.
4470bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4471 unsigned Depth,
4472 bool *MovedAway) {
4473 // Avoid exponential behavior on extremely deep expression trees.
4474 if (Depth >= 5) return false;
4475
4476 // By default, all matched instructions stay in place.
4477 if (MovedAway)
4478 *MovedAway = false;
4479
4480 switch (Opcode) {
4481 case Instruction::PtrToInt:
4482 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4483 return matchAddr(AddrInst->getOperand(0), Depth);
4484 case Instruction::IntToPtr: {
4485 auto AS = AddrInst->getType()->getPointerAddressSpace();
4486 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4487 // This inttoptr is a no-op if the integer type is pointer sized.
4488 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4489 return matchAddr(AddrInst->getOperand(0), Depth);
4490 return false;
4491 }
4492 case Instruction::BitCast:
4493 // BitCast is always a noop, and we can handle it as long as it is
4494 // int->int or pointer->pointer (we don't want int<->fp or something).
4495 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4496 // Don't touch identity bitcasts. These were probably put here by LSR,
4497 // and we don't want to mess around with them. Assume it knows what it
4498 // is doing.
4499 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4500 return matchAddr(AddrInst->getOperand(0), Depth);
4501 return false;
4502 case Instruction::AddrSpaceCast: {
4503 unsigned SrcAS
4504 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4505 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4506 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4507 return matchAddr(AddrInst->getOperand(0), Depth);
4508 return false;
4509 }
4510 case Instruction::Add: {
4511 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4512 ExtAddrMode BackupAddrMode = AddrMode;
4513 unsigned OldSize = AddrModeInsts.size();
4514 // Start a transaction at this point.
4515 // The LHS may match but not the RHS.
4516 // Therefore, we need a higher level restoration point to undo partially
4517 // matched operation.
4518 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4519 TPT.getRestorationPoint();
4520
4521 AddrMode.InBounds = false;
4522 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4523 matchAddr(AddrInst->getOperand(0), Depth+1))
4524 return true;
4525
4526 // Restore the old addr mode info.
4527 AddrMode = BackupAddrMode;
4528 AddrModeInsts.resize(OldSize);
4529 TPT.rollback(LastKnownGood);
4530
4531 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4532 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4533 matchAddr(AddrInst->getOperand(1), Depth+1))
4534 return true;
4535
4536 // Otherwise we definitely can't merge the ADD in.
4537 AddrMode = BackupAddrMode;
4538 AddrModeInsts.resize(OldSize);
4539 TPT.rollback(LastKnownGood);
4540 break;
4541 }
4542 //case Instruction::Or:
4543 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4544 //break;
4545 case Instruction::Mul:
4546 case Instruction::Shl: {
4547 // Can only handle X*C and X << C.
4548 AddrMode.InBounds = false;
4549 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4550 if (!RHS || RHS->getBitWidth() > 64)
4551 return false;
4552 int64_t Scale = RHS->getSExtValue();
4553 if (Opcode == Instruction::Shl)
4554 Scale = 1LL << Scale;
4555
4556 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4557 }
4558 case Instruction::GetElementPtr: {
4559 // Scan the GEP. We check it if it contains constant offsets and at most
4560 // one variable offset.
4561 int VariableOperand = -1;
4562 unsigned VariableScale = 0;
4563
4564 int64_t ConstantOffset = 0;
4565 gep_type_iterator GTI = gep_type_begin(AddrInst);
4566 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4567 if (StructType *STy = GTI.getStructTypeOrNull()) {
4568 const StructLayout *SL = DL.getStructLayout(STy);
4569 unsigned Idx =
4570 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4571 ConstantOffset += SL->getElementOffset(Idx);
4572 } else {
4573 TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
4574 if (TS.isNonZero()) {
4575 // The optimisations below currently only work for fixed offsets.
4576 if (TS.isScalable())
4577 return false;
4578 int64_t TypeSize = TS.getFixedSize();
4579 if (ConstantInt *CI =
4580 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4581 const APInt &CVal = CI->getValue();
4582 if (CVal.getMinSignedBits() <= 64) {
4583 ConstantOffset += CVal.getSExtValue() * TypeSize;
4584 continue;
4585 }
4586 }
4587 // We only allow one variable index at the moment.
4588 if (VariableOperand != -1)
4589 return false;
4590
4591 // Remember the variable index.
4592 VariableOperand = i;
4593 VariableScale = TypeSize;
4594 }
4595 }
4596 }
4597
4598 // A common case is for the GEP to only do a constant offset. In this case,
4599 // just add it to the disp field and check validity.
4600 if (VariableOperand == -1) {
4601 AddrMode.BaseOffs += ConstantOffset;
4602 if (ConstantOffset == 0 ||
4603 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4604 // Check to see if we can fold the base pointer in too.
4605 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4606 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4607 AddrMode.InBounds = false;
4608 return true;
4609 }
4610 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4611 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4612 ConstantOffset > 0) {
4613 // Record GEPs with non-zero offsets as candidates for splitting in the
4614 // event that the offset cannot fit into the r+i addressing mode.
4615 // Simple and common case that only one GEP is used in calculating the
4616 // address for the memory access.
4617 Value *Base = AddrInst->getOperand(0);
4618 auto *BaseI = dyn_cast<Instruction>(Base);
4619 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4620 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4621 (BaseI && !isa<CastInst>(BaseI) &&
4622 !isa<GetElementPtrInst>(BaseI))) {
4623 // Make sure the parent block allows inserting non-PHI instructions
4624 // before the terminator.
4625 BasicBlock *Parent =
4626 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4627 if (!Parent->getTerminator()->isEHPad())
4628 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4629 }
4630 }
4631 AddrMode.BaseOffs -= ConstantOffset;
4632 return false;
4633 }
4634
4635 // Save the valid addressing mode in case we can't match.
4636 ExtAddrMode BackupAddrMode = AddrMode;
4637 unsigned OldSize = AddrModeInsts.size();
4638
4639 // See if the scale and offset amount is valid for this target.
4640 AddrMode.BaseOffs += ConstantOffset;
4641 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4642 AddrMode.InBounds = false;
4643
4644 // Match the base operand of the GEP.
4645 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4646 // If it couldn't be matched, just stuff the value in a register.
4647 if (AddrMode.HasBaseReg) {
4648 AddrMode = BackupAddrMode;
4649 AddrModeInsts.resize(OldSize);
4650 return false;
4651 }
4652 AddrMode.HasBaseReg = true;
4653 AddrMode.BaseReg = AddrInst->getOperand(0);
4654 }
4655
4656 // Match the remaining variable portion of the GEP.
4657 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4658 Depth)) {
4659 // If it couldn't be matched, try stuffing the base into a register
4660 // instead of matching it, and retrying the match of the scale.
4661 AddrMode = BackupAddrMode;
4662 AddrModeInsts.resize(OldSize);
4663 if (AddrMode.HasBaseReg)
4664 return false;
4665 AddrMode.HasBaseReg = true;
4666 AddrMode.BaseReg = AddrInst->getOperand(0);
4667 AddrMode.BaseOffs += ConstantOffset;
4668 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4669 VariableScale, Depth)) {
4670 // If even that didn't work, bail.
4671 AddrMode = BackupAddrMode;
4672 AddrModeInsts.resize(OldSize);
4673 return false;
4674 }
4675 }
4676
4677 return true;
4678 }
4679 case Instruction::SExt:
4680 case Instruction::ZExt: {
4681 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4682 if (!Ext)
4683 return false;
4684
4685 // Try to move this ext out of the way of the addressing mode.
4686 // Ask for a method for doing so.
4687 TypePromotionHelper::Action TPH =
4688 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4689 if (!TPH)
4690 return false;
4691
4692 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4693 TPT.getRestorationPoint();
4694 unsigned CreatedInstsCost = 0;
4695 unsigned ExtCost = !TLI.isExtFree(Ext);
4696 Value *PromotedOperand =
4697 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4698 // SExt has been moved away.
4699 // Thus either it will be rematched later in the recursive calls or it is
4700 // gone. Anyway, we must not fold it into the addressing mode at this point.
4701 // E.g.,
4702 // op = add opnd, 1
4703 // idx = ext op
4704 // addr = gep base, idx
4705 // is now:
4706 // promotedOpnd = ext opnd <- no match here
4707 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4708 // addr = gep base, op <- match
4709 if (MovedAway)
4710 *MovedAway = true;
4711
4712 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4713, __PRETTY_FUNCTION__))
4713 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 4713, __PRETTY_FUNCTION__))
;
4714
4715 ExtAddrMode BackupAddrMode = AddrMode;
4716 unsigned OldSize = AddrModeInsts.size();
4717
4718 if (!matchAddr(PromotedOperand, Depth) ||
4719 // The total of the new cost is equal to the cost of the created
4720 // instructions.
4721 // The total of the old cost is equal to the cost of the extension plus
4722 // what we have saved in the addressing mode.
4723 !isPromotionProfitable(CreatedInstsCost,
4724 ExtCost + (AddrModeInsts.size() - OldSize),
4725 PromotedOperand)) {
4726 AddrMode = BackupAddrMode;
4727 AddrModeInsts.resize(OldSize);
4728 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4729 TPT.rollback(LastKnownGood);
4730 return false;
4731 }
4732 return true;
4733 }
4734 }
4735 return false;
4736}
4737
4738/// If we can, try to add the value of 'Addr' into the current addressing mode.
4739/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4740/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4741/// for the target.
4742///
4743bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4744 // Start a transaction at this point that we will rollback if the matching
4745 // fails.
4746 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4747 TPT.getRestorationPoint();
4748 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4749 if (CI->getValue().isSignedIntN(64)) {
4750 // Fold in immediates if legal for the target.
4751 AddrMode.BaseOffs += CI->getSExtValue();
4752 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4753 return true;
4754 AddrMode.BaseOffs -= CI->getSExtValue();
4755 }
4756 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4757 // If this is a global variable, try to fold it into the addressing mode.
4758 if (!AddrMode.BaseGV) {
4759 AddrMode.BaseGV = GV;
4760 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4761 return true;
4762 AddrMode.BaseGV = nullptr;
4763 }
4764 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4765 ExtAddrMode BackupAddrMode = AddrMode;
4766 unsigned OldSize = AddrModeInsts.size();
4767
4768 // Check to see if it is possible to fold this operation.
4769 bool MovedAway = false;
4770 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4771 // This instruction may have been moved away. If so, there is nothing
4772 // to check here.
4773 if (MovedAway)
4774 return true;
4775 // Okay, it's possible to fold this. Check to see if it is actually
4776 // *profitable* to do so. We use a simple cost model to avoid increasing
4777 // register pressure too much.
4778 if (I->hasOneUse() ||
4779 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4780 AddrModeInsts.push_back(I);
4781 return true;
4782 }
4783
4784 // It isn't profitable to do this, roll back.
4785 //cerr << "NOT FOLDING: " << *I;
4786 AddrMode = BackupAddrMode;
4787 AddrModeInsts.resize(OldSize);
4788 TPT.rollback(LastKnownGood);
4789 }
4790 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4791 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4792 return true;
4793 TPT.rollback(LastKnownGood);
4794 } else if (isa<ConstantPointerNull>(Addr)) {
4795 // Null pointer gets folded without affecting the addressing mode.
4796 return true;
4797 }
4798
4799 // Worse case, the target should support [reg] addressing modes. :)
4800 if (!AddrMode.HasBaseReg) {
4801 AddrMode.HasBaseReg = true;
4802 AddrMode.BaseReg = Addr;
4803 // Still check for legality in case the target supports [imm] but not [i+r].
4804 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4805 return true;
4806 AddrMode.HasBaseReg = false;
4807 AddrMode.BaseReg = nullptr;
4808 }
4809
4810 // If the base register is already taken, see if we can do [r+r].
4811 if (AddrMode.Scale == 0) {
4812 AddrMode.Scale = 1;
4813 AddrMode.ScaledReg = Addr;
4814 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4815 return true;
4816 AddrMode.Scale = 0;
4817 AddrMode.ScaledReg = nullptr;
4818 }
4819 // Couldn't match.
4820 TPT.rollback(LastKnownGood);
4821 return false;
4822}
4823
4824/// Check to see if all uses of OpVal by the specified inline asm call are due
4825/// to memory operands. If so, return true, otherwise return false.
4826static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4827 const TargetLowering &TLI,
4828 const TargetRegisterInfo &TRI) {
4829 const Function *F = CI->getFunction();
4830 TargetLowering::AsmOperandInfoVector TargetConstraints =
4831 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
4832
4833 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4834 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4835
4836 // Compute the constraint code and ConstraintType to use.
4837 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4838
4839 // If this asm operand is our Value*, and if it isn't an indirect memory
4840 // operand, we can't fold it!
4841 if (OpInfo.CallOperandVal == OpVal &&
4842 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4843 !OpInfo.isIndirect))
4844 return false;
4845 }
4846
4847 return true;
4848}
4849
4850// Max number of memory uses to look at before aborting the search to conserve
4851// compile time.
4852static constexpr int MaxMemoryUsesToScan = 20;
4853
4854/// Recursively walk all the uses of I until we find a memory use.
4855/// If we find an obviously non-foldable instruction, return true.
4856/// Add the ultimately found memory instructions to MemoryUses.
4857static bool FindAllMemoryUses(
4858 Instruction *I,
4859 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4860 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4861 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
4862 BlockFrequencyInfo *BFI, int SeenInsts = 0) {
4863 // If we already considered this instruction, we're done.
4864 if (!ConsideredInsts.insert(I).second)
4865 return false;
4866
4867 // If this is an obviously unfoldable instruction, bail out.
4868 if (!MightBeFoldableInst(I))
4869 return true;
4870
4871 // Loop over all the uses, recursively processing them.
4872 for (Use &U : I->uses()) {
4873 // Conservatively return true if we're seeing a large number or a deep chain
4874 // of users. This avoids excessive compilation times in pathological cases.
4875 if (SeenInsts++ >= MaxMemoryUsesToScan)
4876 return true;
4877
4878 Instruction *UserI = cast<Instruction>(U.getUser());
4879 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4880 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4881 continue;
4882 }
4883
4884 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4885 unsigned opNo = U.getOperandNo();
4886 if (opNo != StoreInst::getPointerOperandIndex())
4887 return true; // Storing addr, not into addr.
4888 MemoryUses.push_back(std::make_pair(SI, opNo));
4889 continue;
4890 }
4891
4892 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4893 unsigned opNo = U.getOperandNo();
4894 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4895 return true; // Storing addr, not into addr.
4896 MemoryUses.push_back(std::make_pair(RMW, opNo));
4897 continue;
4898 }
4899
4900 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4901 unsigned opNo = U.getOperandNo();
4902 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4903 return true; // Storing addr, not into addr.
4904 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4905 continue;
4906 }
4907
4908 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4909 if (CI->hasFnAttr(Attribute::Cold)) {
4910 // If this is a cold call, we can sink the addressing calculation into
4911 // the cold path. See optimizeCallInst
4912 bool OptForSize = OptSize ||
4913 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
4914 if (!OptForSize)
4915 continue;
4916 }
4917
4918 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
4919 if (!IA) return true;
4920
4921 // If this is a memory operand, we're cool, otherwise bail out.
4922 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4923 return true;
4924 continue;
4925 }
4926
4927 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
4928 PSI, BFI, SeenInsts))
4929 return true;
4930 }
4931
4932 return false;
4933}
4934
4935/// Return true if Val is already known to be live at the use site that we're
4936/// folding it into. If so, there is no cost to include it in the addressing
4937/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4938/// instruction already.
4939bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4940 Value *KnownLive2) {
4941 // If Val is either of the known-live values, we know it is live!
4942 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4943 return true;
4944
4945 // All values other than instructions and arguments (e.g. constants) are live.
4946 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4947
4948 // If Val is a constant sized alloca in the entry block, it is live, this is
4949 // true because it is just a reference to the stack/frame pointer, which is
4950 // live for the whole function.
4951 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4952 if (AI->isStaticAlloca())
4953 return true;
4954
4955 // Check to see if this value is already used in the memory instruction's
4956 // block. If so, it's already live into the block at the very least, so we
4957 // can reasonably fold it.
4958 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4959}
4960
4961/// It is possible for the addressing mode of the machine to fold the specified
4962/// instruction into a load or store that ultimately uses it.
4963/// However, the specified instruction has multiple uses.
4964/// Given this, it may actually increase register pressure to fold it
4965/// into the load. For example, consider this code:
4966///
4967/// X = ...
4968/// Y = X+1
4969/// use(Y) -> nonload/store
4970/// Z = Y+1
4971/// load Z
4972///
4973/// In this case, Y has multiple uses, and can be folded into the load of Z
4974/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4975/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4976/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4977/// number of computations either.
4978///
4979/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4980/// X was live across 'load Z' for other reasons, we actually *would* want to
4981/// fold the addressing mode in the Z case. This would make Y die earlier.
4982bool AddressingModeMatcher::
4983isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4984 ExtAddrMode &AMAfter) {
4985 if (IgnoreProfitability) return true;
4986
4987 // AMBefore is the addressing mode before this instruction was folded into it,
4988 // and AMAfter is the addressing mode after the instruction was folded. Get
4989 // the set of registers referenced by AMAfter and subtract out those
4990 // referenced by AMBefore: this is the set of values which folding in this
4991 // address extends the lifetime of.
4992 //
4993 // Note that there are only two potential values being referenced here,
4994 // BaseReg and ScaleReg (global addresses are always available, as are any
4995 // folded immediates).
4996 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4997
4998 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4999 // lifetime wasn't extended by adding this instruction.
5000 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5001 BaseReg = nullptr;
5002 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5003 ScaledReg = nullptr;
5004
5005 // If folding this instruction (and it's subexprs) didn't extend any live
5006 // ranges, we're ok with it.
5007 if (!BaseReg && !ScaledReg)
5008 return true;
5009
5010 // If all uses of this instruction can have the address mode sunk into them,
5011 // we can remove the addressing mode and effectively trade one live register
5012 // for another (at worst.) In this context, folding an addressing mode into
5013 // the use is just a particularly nice way of sinking it.
5014 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
5015 SmallPtrSet<Instruction*, 16> ConsideredInsts;
5016 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5017 PSI, BFI))
5018 return false; // Has a non-memory, non-foldable use!
5019
5020 // Now that we know that all uses of this instruction are part of a chain of
5021 // computation involving only operations that could theoretically be folded
5022 // into a memory use, loop over each of these memory operation uses and see
5023 // if they could *actually* fold the instruction. The assumption is that
5024 // addressing modes are cheap and that duplicating the computation involved
5025 // many times is worthwhile, even on a fastpath. For sinking candidates
5026 // (i.e. cold call sites), this serves as a way to prevent excessive code
5027 // growth since most architectures have some reasonable small and fast way to
5028 // compute an effective address. (i.e LEA on x86)
5029 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
5030 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
5031 Instruction *User = MemoryUses[i].first;
5032 unsigned OpNo = MemoryUses[i].second;
5033
5034 // Get the access type of this use. If the use isn't a pointer, we don't
5035 // know what it accesses.
5036 Value *Address = User->getOperand(OpNo);
5037 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
5038 if (!AddrTy)
5039 return false;
5040 Type *AddressAccessTy = AddrTy->getElementType();
5041 unsigned AS = AddrTy->getAddressSpace();
5042
5043 // Do a match against the root of this address, ignoring profitability. This
5044 // will tell us if the addressing mode for the memory operation will
5045 // *actually* cover the shared instruction.
5046 ExtAddrMode Result;
5047 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5048 0);
5049 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5050 TPT.getRestorationPoint();
5051 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5052 AddressAccessTy, AS, MemoryInst, Result,
5053 InsertedInsts, PromotedInsts, TPT,
5054 LargeOffsetGEP, OptSize, PSI, BFI);
5055 Matcher.IgnoreProfitability = true;
5056 bool Success = Matcher.matchAddr(Address, 0);
5057 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5057, __PRETTY_FUNCTION__))
;
5058
5059 // The match was to check the profitability, the changes made are not
5060 // part of the original matcher. Therefore, they should be dropped
5061 // otherwise the original matcher will not present the right state.
5062 TPT.rollback(LastKnownGood);
5063
5064 // If the match didn't cover I, then it won't be shared by it.
5065 if (!is_contained(MatchedAddrModeInsts, I))
5066 return false;
5067
5068 MatchedAddrModeInsts.clear();
5069 }
5070
5071 return true;
5072}
5073
5074/// Return true if the specified values are defined in a
5075/// different basic block than BB.
5076static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5077 if (Instruction *I = dyn_cast<Instruction>(V))
5078 return I->getParent() != BB;
5079 return false;
5080}
5081
5082/// Sink addressing mode computation immediate before MemoryInst if doing so
5083/// can be done without increasing register pressure. The need for the
5084/// register pressure constraint means this can end up being an all or nothing
5085/// decision for all uses of the same addressing computation.
5086///
5087/// Load and Store Instructions often have addressing modes that can do
5088/// significant amounts of computation. As such, instruction selection will try
5089/// to get the load or store to do as much computation as possible for the
5090/// program. The problem is that isel can only see within a single block. As
5091/// such, we sink as much legal addressing mode work into the block as possible.
5092///
5093/// This method is used to optimize both load/store and inline asms with memory
5094/// operands. It's also used to sink addressing computations feeding into cold
5095/// call sites into their (cold) basic block.
5096///
5097/// The motivation for handling sinking into cold blocks is that doing so can
5098/// both enable other address mode sinking (by satisfying the register pressure
5099/// constraint above), and reduce register pressure globally (by removing the
5100/// addressing mode computation from the fast path entirely.).
5101bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5102 Type *AccessTy, unsigned AddrSpace) {
5103 Value *Repl = Addr;
5104
5105 // Try to collapse single-value PHI nodes. This is necessary to undo
5106 // unprofitable PRE transformations.
5107 SmallVector<Value*, 8> worklist;
5108 SmallPtrSet<Value*, 16> Visited;
5109 worklist.push_back(Addr);
5110
5111 // Use a worklist to iteratively look through PHI and select nodes, and
5112 // ensure that the addressing mode obtained from the non-PHI/select roots of
5113 // the graph are compatible.
5114 bool PhiOrSelectSeen = false;
5115 SmallVector<Instruction*, 16> AddrModeInsts;
5116 const SimplifyQuery SQ(*DL, TLInfo);
5117 AddressingModeCombiner AddrModes(SQ, Addr);
5118 TypePromotionTransaction TPT(RemovedInsts);
5119 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5120 TPT.getRestorationPoint();
5121 while (!worklist.empty()) {
1
Loop condition is true. Entering loop body
5122 Value *V = worklist.back();
5123 worklist.pop_back();
5124
5125 // We allow traversing cyclic Phi nodes.
5126 // In case of success after this loop we ensure that traversing through
5127 // Phi nodes ends up with all cases to compute address of the form
5128 // BaseGV + Base + Scale * Index + Offset
5129 // where Scale and Offset are constans and BaseGV, Base and Index
5130 // are exactly the same Values in all cases.
5131 // It means that BaseGV, Scale and Offset dominate our memory instruction
5132 // and have the same value as they had in address computation represented
5133 // as Phi. So we can safely sink address computation to memory instruction.
5134 if (!Visited.insert(V).second)
2
Assuming field 'second' is true
3
Taking false branch
5135 continue;
5136
5137 // For a PHI node, push all of its incoming values.
5138 if (PHINode *P
4.1
'P' is null
= dyn_cast<PHINode>(V)) {
4
Assuming 'V' is not a 'PHINode'
5
Taking false branch
5139 append_range(worklist, P->incoming_values());
5140 PhiOrSelectSeen = true;
5141 continue;
5142 }
5143 // Similar for select.
5144 if (SelectInst *SI
6.1
'SI' is null
= dyn_cast<SelectInst>(V)) {
6
Assuming 'V' is not a 'SelectInst'
7
Taking false branch
5145 worklist.push_back(SI->getFalseValue());
5146 worklist.push_back(SI->getTrueValue());
5147 PhiOrSelectSeen = true;
5148 continue;
5149 }
5150
5151 // For non-PHIs, determine the addressing mode being computed. Note that
5152 // the result may differ depending on what other uses our candidate
5153 // addressing instructions might have.
5154 AddrModeInsts.clear();
5155 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5156 0);
5157 // Defer the query (and possible computation of) the dom tree to point of
5158 // actual use. It's expected that most address matches don't actually need
5159 // the domtree.
5160 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5161 Function *F = MemoryInst->getParent()->getParent();
5162 return this->getDT(*F);
5163 };
5164 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5165 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5166 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5167 BFI.get());
5168
5169 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5170 if (GEP && !NewGEPBases.count(GEP)) {
8
Assuming 'GEP' is null
9
Taking false branch
5171 // If splitting the underlying data structure can reduce the offset of a
5172 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5173 // previously split data structures.
5174 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5175 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
5176 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
5177 }
5178
5179 NewAddrMode.OriginalValue = V;
5180 if (!AddrModes.addNewAddrMode(NewAddrMode))
10
Taking true branch
5181 break;
11
Execution continues on line 5187
5182 }
5183
5184 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5185 // or we have multiple but either couldn't combine them or combining them
5186 // wouldn't do anything useful, bail out now.
5187 if (!AddrModes.combineAddrModes()) {
12
Calling 'AddressingModeCombiner::combineAddrModes'
5188 TPT.rollback(LastKnownGood);
5189 return false;
5190 }
5191 bool Modified = TPT.commit();
5192
5193 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5194 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5195
5196 // If all the instructions matched are already in this BB, don't do anything.
5197 // If we saw a Phi node then it is not local definitely, and if we saw a select
5198 // then we want to push the address calculation past it even if it's already
5199 // in this BB.
5200 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5201 return IsNonLocalValue(V, MemoryInst->getParent());
5202 })) {
5203 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
5204 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
5205 return Modified;
5206 }
5207
5208 // Insert this computation right after this user. Since our caller is
5209 // scanning from the top of the BB to the bottom, reuse of the expr are
5210 // guaranteed to happen later.
5211 IRBuilder<> Builder(MemoryInst);
5212
5213 // Now that we determined the addressing expression we want to use and know
5214 // that we have to sink it into this block. Check to see if we have already
5215 // done this for some other load/store instr in this block. If so, reuse
5216 // the computation. Before attempting reuse, check if the address is valid
5217 // as it may have been erased.
5218
5219 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5220
5221 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5222 if (SunkAddr) {
5223 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5224 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5225 if (SunkAddr->getType() != Addr->getType())
5226 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5227 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5228 SubtargetInfo->addrSinkUsingGEPs())) {
5229 // By default, we use the GEP-based method when AA is used later. This
5230 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5231 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5232 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5233 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5234 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5235
5236 // First, find the pointer.
5237 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5238 ResultPtr = AddrMode.BaseReg;
5239 AddrMode.BaseReg = nullptr;
5240 }
5241
5242 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5243 // We can't add more than one pointer together, nor can we scale a
5244 // pointer (both of which seem meaningless).
5245 if (ResultPtr || AddrMode.Scale != 1)
5246 return Modified;
5247
5248 ResultPtr = AddrMode.ScaledReg;
5249 AddrMode.Scale = 0;
5250 }
5251
5252 // It is only safe to sign extend the BaseReg if we know that the math
5253 // required to create it did not overflow before we extend it. Since
5254 // the original IR value was tossed in favor of a constant back when
5255 // the AddrMode was created we need to bail out gracefully if widths
5256 // do not match instead of extending it.
5257 //
5258 // (See below for code to add the scale.)
5259 if (AddrMode.Scale) {
5260 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5261 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5262 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5263 return Modified;
5264 }
5265
5266 if (AddrMode.BaseGV) {
5267 if (ResultPtr)
5268 return Modified;
5269
5270 ResultPtr = AddrMode.BaseGV;
5271 }
5272
5273 // If the real base value actually came from an inttoptr, then the matcher
5274 // will look through it and provide only the integer value. In that case,
5275 // use it here.
5276 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5277 if (!ResultPtr && AddrMode.BaseReg) {
5278 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5279 "sunkaddr");
5280 AddrMode.BaseReg = nullptr;
5281 } else if (!ResultPtr && AddrMode.Scale == 1) {
5282 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5283 "sunkaddr");
5284 AddrMode.Scale = 0;
5285 }
5286 }
5287
5288 if (!ResultPtr &&
5289 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
5290 SunkAddr = Constant::getNullValue(Addr->getType());
5291 } else if (!ResultPtr) {
5292 return Modified;
5293 } else {
5294 Type *I8PtrTy =
5295 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
5296 Type *I8Ty = Builder.getInt8Ty();
5297
5298 // Start with the base register. Do this first so that subsequent address
5299 // matching finds it last, which will prevent it from trying to match it
5300 // as the scaled value in case it happens to be a mul. That would be
5301 // problematic if we've sunk a different mul for the scale, because then
5302 // we'd end up sinking both muls.
5303 if (AddrMode.BaseReg) {
5304 Value *V = AddrMode.BaseReg;
5305 if (V->getType() != IntPtrTy)
5306 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5307
5308 ResultIndex = V;
5309 }
5310
5311 // Add the scale value.
5312 if (AddrMode.Scale) {
5313 Value *V = AddrMode.ScaledReg;
5314 if (V->getType() == IntPtrTy) {
5315 // done.
5316 } else {
5317 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5319, __PRETTY_FUNCTION__))
5318 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5319, __PRETTY_FUNCTION__))
5319 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5319, __PRETTY_FUNCTION__))
;
5320 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5321 }
5322
5323 if (AddrMode.Scale != 1)
5324 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5325 "sunkaddr");
5326 if (ResultIndex)
5327 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5328 else
5329 ResultIndex = V;
5330 }
5331
5332 // Add in the Base Offset if present.
5333 if (AddrMode.BaseOffs) {
5334 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5335 if (ResultIndex) {
5336 // We need to add this separately from the scale above to help with
5337 // SDAG consecutive load/store merging.
5338 if (ResultPtr->getType() != I8PtrTy)
5339 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5340 ResultPtr =
5341 AddrMode.InBounds
5342 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5343 "sunkaddr")
5344 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5345 }
5346
5347 ResultIndex = V;
5348 }
5349
5350 if (!ResultIndex) {
5351 SunkAddr = ResultPtr;
5352 } else {
5353 if (ResultPtr->getType() != I8PtrTy)
5354 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5355 SunkAddr =
5356 AddrMode.InBounds
5357 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
5358 "sunkaddr")
5359 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
5360 }
5361
5362 if (SunkAddr->getType() != Addr->getType())
5363 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5364 }
5365 } else {
5366 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5367 // non-integral pointers, so in that case bail out now.
5368 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5369 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5370 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5371 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5372 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5373 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5374 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5375 (AddrMode.BaseGV &&
5376 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5377 return Modified;
5378
5379 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
5380 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
5381 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5382 Value *Result = nullptr;
5383
5384 // Start with the base register. Do this first so that subsequent address
5385 // matching finds it last, which will prevent it from trying to match it
5386 // as the scaled value in case it happens to be a mul. That would be
5387 // problematic if we've sunk a different mul for the scale, because then
5388 // we'd end up sinking both muls.
5389 if (AddrMode.BaseReg) {
5390 Value *V = AddrMode.BaseReg;
5391 if (V->getType()->isPointerTy())
5392 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5393 if (V->getType() != IntPtrTy)
5394 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5395 Result = V;
5396 }
5397
5398 // Add the scale value.
5399 if (AddrMode.Scale) {
5400 Value *V = AddrMode.ScaledReg;
5401 if (V->getType() == IntPtrTy) {
5402 // done.
5403 } else if (V->getType()->isPointerTy()) {
5404 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5405 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5406 cast<IntegerType>(V->getType())->getBitWidth()) {
5407 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5408 } else {
5409 // It is only safe to sign extend the BaseReg if we know that the math
5410 // required to create it did not overflow before we extend it. Since
5411 // the original IR value was tossed in favor of a constant back when
5412 // the AddrMode was created we need to bail out gracefully if widths
5413 // do not match instead of extending it.
5414 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5415 if (I && (Result != AddrMode.BaseReg))
5416 I->eraseFromParent();
5417 return Modified;
5418 }
5419 if (AddrMode.Scale != 1)
5420 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5421 "sunkaddr");
5422 if (Result)
5423 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5424 else
5425 Result = V;
5426 }
5427
5428 // Add in the BaseGV if present.
5429 if (AddrMode.BaseGV) {
5430 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
5431 if (Result)
5432 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5433 else
5434 Result = V;
5435 }
5436
5437 // Add in the Base Offset if present.
5438 if (AddrMode.BaseOffs) {
5439 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5440 if (Result)
5441 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5442 else
5443 Result = V;
5444 }
5445
5446 if (!Result)
5447 SunkAddr = Constant::getNullValue(Addr->getType());
5448 else
5449 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5450 }
5451
5452 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5453 // Store the newly computed address into the cache. In the case we reused a
5454 // value, this should be idempotent.
5455 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5456
5457 // If we have no uses, recursively delete the value and all dead instructions
5458 // using it.
5459 if (Repl->use_empty()) {
5460 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
5461 RecursivelyDeleteTriviallyDeadInstructions(
5462 Repl, TLInfo, nullptr,
5463 [&](Value *V) { removeAllAssertingVHReferences(V); });
5464 });
5465 }
5466 ++NumMemoryInsts;
5467 return true;
5468}
5469
5470/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
5471/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
5472/// only handle a 2 operand GEP in the same basic block or a splat constant
5473/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
5474/// index.
5475///
5476/// If the existing GEP has a vector base pointer that is splat, we can look
5477/// through the splat to find the scalar pointer. If we can't find a scalar
5478/// pointer there's nothing we can do.
5479///
5480/// If we have a GEP with more than 2 indices where the middle indices are all
5481/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
5482///
5483/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
5484/// followed by a GEP with an all zeroes vector index. This will enable
5485/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
5486/// zero index.
5487bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
5488 Value *Ptr) {
5489 Value *NewAddr;
5490
5491 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
5492 // Don't optimize GEPs that don't have indices.
5493 if (!GEP->hasIndices())
5494 return false;
5495
5496 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
5497 // FIXME: We should support this by sinking the GEP.
5498 if (MemoryInst->getParent() != GEP->getParent())
5499 return false;
5500
5501 SmallVector<Value *, 2> Ops(GEP->operands());
5502
5503 bool RewriteGEP = false;
5504
5505 if (Ops[0]->getType()->isVectorTy()) {
5506 Ops[0] = getSplatValue(Ops[0]);
5507 if (!Ops[0])
5508 return false;
5509 RewriteGEP = true;
5510 }
5511
5512 unsigned FinalIndex = Ops.size() - 1;
5513
5514 // Ensure all but the last index is 0.
5515 // FIXME: This isn't strictly required. All that's required is that they are
5516 // all scalars or splats.
5517 for (unsigned i = 1; i < FinalIndex; ++i) {
5518 auto *C = dyn_cast<Constant>(Ops[i]);
5519 if (!C)
5520 return false;
5521 if (isa<VectorType>(C->getType()))
5522 C = C->getSplatValue();
5523 auto *CI = dyn_cast_or_null<ConstantInt>(C);
5524 if (!CI || !CI->isZero())
5525 return false;
5526 // Scalarize the index if needed.
5527 Ops[i] = CI;
5528 }
5529
5530 // Try to scalarize the final index.
5531 if (Ops[FinalIndex]->getType()->isVectorTy()) {
5532 if (Value *V = getSplatValue(Ops[FinalIndex])) {
5533 auto *C = dyn_cast<ConstantInt>(V);
5534 // Don't scalarize all zeros vector.
5535 if (!C || !C->isZero()) {
5536 Ops[FinalIndex] = V;
5537 RewriteGEP = true;
5538 }
5539 }
5540 }
5541
5542 // If we made any changes or the we have extra operands, we need to generate
5543 // new instructions.
5544 if (!RewriteGEP && Ops.size() == 2)
5545 return false;
5546
5547 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5548
5549 IRBuilder<> Builder(MemoryInst);
5550
5551 Type *SourceTy = GEP->getSourceElementType();
5552 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
5553
5554 // If the final index isn't a vector, emit a scalar GEP containing all ops
5555 // and a vector GEP with all zeroes final index.
5556 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
5557 NewAddr = Builder.CreateGEP(SourceTy, Ops[0],
5558 makeArrayRef(Ops).drop_front());
5559 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5560 NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
5561 } else {
5562 Value *Base = Ops[0];
5563 Value *Index = Ops[FinalIndex];
5564
5565 // Create a scalar GEP if there are more than 2 operands.
5566 if (Ops.size() != 2) {
5567 // Replace the last index with 0.
5568 Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
5569 Base = Builder.CreateGEP(SourceTy, Base,
5570 makeArrayRef(Ops).drop_front());
5571 }
5572
5573 // Now create the GEP with scalar pointer and vector index.
5574 NewAddr = Builder.CreateGEP(Base, Index);
5575 }
5576 } else if (!isa<Constant>(Ptr)) {
5577 // Not a GEP, maybe its a splat and we can create a GEP to enable
5578 // SelectionDAGBuilder to use it as a uniform base.
5579 Value *V = getSplatValue(Ptr);
5580 if (!V)
5581 return false;
5582
5583 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5584
5585 IRBuilder<> Builder(MemoryInst);
5586
5587 // Emit a vector GEP with a scalar pointer and all 0s vector index.
5588 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
5589 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5590 NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy));
5591 } else {
5592 // Constant, SelectionDAGBuilder knows to check if its a splat.
5593 return false;
5594 }
5595
5596 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
5597
5598 // If we have no uses, recursively delete the value and all dead instructions
5599 // using it.
5600 if (Ptr->use_empty())
5601 RecursivelyDeleteTriviallyDeadInstructions(
5602 Ptr, TLInfo, nullptr,
5603 [&](Value *V) { removeAllAssertingVHReferences(V); });
5604
5605 return true;
5606}
5607
5608/// If there are any memory operands, use OptimizeMemoryInst to sink their
5609/// address computing into the block when possible / profitable.
5610bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5611 bool MadeChange = false;
5612
5613 const TargetRegisterInfo *TRI =
5614 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5615 TargetLowering::AsmOperandInfoVector TargetConstraints =
5616 TLI->ParseConstraints(*DL, TRI, *CS);
5617 unsigned ArgNo = 0;
5618 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5619 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5620
5621 // Compute the constraint code and ConstraintType to use.
5622 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5623
5624 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5625 OpInfo.isIndirect) {
5626 Value *OpVal = CS->getArgOperand(ArgNo++);
5627 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5628 } else if (OpInfo.Type == InlineAsm::isInput)
5629 ArgNo++;
5630 }
5631
5632 return MadeChange;
5633}
5634
5635/// Check if all the uses of \p Val are equivalent (or free) zero or
5636/// sign extensions.
5637static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5638 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5638, __PRETTY_FUNCTION__))
;
5639 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5640 bool IsSExt = isa<SExtInst>(FirstUser);
5641 Type *ExtTy = FirstUser->getType();
5642 for (const User *U : Val->users()) {
5643 const Instruction *UI = cast<Instruction>(U);
5644 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5645 return false;
5646 Type *CurTy = UI->getType();
5647 // Same input and output types: Same instruction after CSE.
5648 if (CurTy == ExtTy)
5649 continue;
5650
5651 // If IsSExt is true, we are in this situation:
5652 // a = Val
5653 // b = sext ty1 a to ty2
5654 // c = sext ty1 a to ty3
5655 // Assuming ty2 is shorter than ty3, this could be turned into:
5656 // a = Val
5657 // b = sext ty1 a to ty2
5658 // c = sext ty2 b to ty3
5659 // However, the last sext is not free.
5660 if (IsSExt)
5661 return false;
5662
5663 // This is a ZExt, maybe this is free to extend from one type to another.
5664 // In that case, we would not account for a different use.
5665 Type *NarrowTy;
5666 Type *LargeTy;
5667 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5668 CurTy->getScalarType()->getIntegerBitWidth()) {
5669 NarrowTy = CurTy;
5670 LargeTy = ExtTy;
5671 } else {
5672 NarrowTy = ExtTy;
5673 LargeTy = CurTy;
5674 }
5675
5676 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5677 return false;
5678 }
5679 // All uses are the same or can be derived from one another for free.
5680 return true;
5681}
5682
5683/// Try to speculatively promote extensions in \p Exts and continue
5684/// promoting through newly promoted operands recursively as far as doing so is
5685/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5686/// When some promotion happened, \p TPT contains the proper state to revert
5687/// them.
5688///
5689/// \return true if some promotion happened, false otherwise.
5690bool CodeGenPrepare::tryToPromoteExts(
5691 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5692 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5693 unsigned CreatedInstsCost) {
5694 bool Promoted = false;
5695
5696 // Iterate over all the extensions to try to promote them.
5697 for (auto *I : Exts) {
5698 // Early check if we directly have ext(load).
5699 if (isa<LoadInst>(I->getOperand(0))) {
5700 ProfitablyMovedExts.push_back(I);
5701 continue;
5702 }
5703
5704 // Check whether or not we want to do any promotion. The reason we have
5705 // this check inside the for loop is to catch the case where an extension
5706 // is directly fed by a load because in such case the extension can be moved
5707 // up without any promotion on its operands.
5708 if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5709 return false;
5710
5711 // Get the action to perform the promotion.
5712 TypePromotionHelper::Action TPH =
5713 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5714 // Check if we can promote.
5715 if (!TPH) {
5716 // Save the current extension as we cannot move up through its operand.
5717 ProfitablyMovedExts.push_back(I);
5718 continue;
5719 }
5720
5721 // Save the current state.
5722 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5723 TPT.getRestorationPoint();
5724 SmallVector<Instruction *, 4> NewExts;
5725 unsigned NewCreatedInstsCost = 0;
5726 unsigned ExtCost = !TLI->isExtFree(I);
5727 // Promote.
5728 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5729 &NewExts, nullptr, *TLI);
5730 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5731, __PRETTY_FUNCTION__))
5731 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 5731, __PRETTY_FUNCTION__))
;
5732
5733 // We would be able to merge only one extension in a load.
5734 // Therefore, if we have more than 1 new extension we heuristically
5735 // cut this search path, because it means we degrade the code quality.
5736 // With exactly 2, the transformation is neutral, because we will merge
5737 // one extension but leave one. However, we optimistically keep going,
5738 // because the new extension may be removed too.
5739 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5740 // FIXME: It would be possible to propagate a negative value instead of
5741 // conservatively ceiling it to 0.
5742 TotalCreatedInstsCost =
5743 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5744 if (!StressExtLdPromotion &&
5745 (TotalCreatedInstsCost > 1 ||
5746 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5747 // This promotion is not profitable, rollback to the previous state, and
5748 // save the current extension in ProfitablyMovedExts as the latest
5749 // speculative promotion turned out to be unprofitable.
5750 TPT.rollback(LastKnownGood);
5751 ProfitablyMovedExts.push_back(I);
5752 continue;
5753 }
5754 // Continue promoting NewExts as far as doing so is profitable.
5755 SmallVector<Instruction *, 2> NewlyMovedExts;
5756 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5757 bool NewPromoted = false;
5758 for (auto *ExtInst : NewlyMovedExts) {
5759 Instruction *MovedExt = cast<Instruction>(ExtInst);
5760 Value *ExtOperand = MovedExt->getOperand(0);
5761 // If we have reached to a load, we need this extra profitability check
5762 // as it could potentially be merged into an ext(load).
5763 if (isa<LoadInst>(ExtOperand) &&
5764 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5765 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5766 continue;
5767
5768 ProfitablyMovedExts.push_back(MovedExt);
5769 NewPromoted = true;
5770 }
5771
5772 // If none of speculative promotions for NewExts is profitable, rollback
5773 // and save the current extension (I) as the last profitable extension.
5774 if (!NewPromoted) {
5775 TPT.rollback(LastKnownGood);
5776 ProfitablyMovedExts.push_back(I);
5777 continue;
5778 }
5779 // The promotion is profitable.
5780 Promoted = true;
5781 }
5782 return Promoted;
5783}
5784
5785/// Merging redundant sexts when one is dominating the other.
5786bool CodeGenPrepare::mergeSExts(Function &F) {
5787 bool Changed = false;
5788 for (auto &Entry : ValToSExtendedUses) {
5789 SExts &Insts = Entry.second;
5790 SExts CurPts;
5791 for (Instruction *Inst : Insts) {
5792 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5793 Inst->getOperand(0) != Entry.first)
5794 continue;
5795 bool inserted = false;
5796 for (auto &Pt : CurPts) {
5797 if (getDT(F).dominates(Inst, Pt)) {
5798 Pt->replaceAllUsesWith(Inst);
5799 RemovedInsts.insert(Pt);
5800 Pt->removeFromParent();
5801 Pt = Inst;
5802 inserted = true;
5803 Changed = true;
5804 break;
5805 }
5806 if (!getDT(F).dominates(Pt, Inst))
5807 // Give up if we need to merge in a common dominator as the
5808 // experiments show it is not profitable.
5809 continue;
5810 Inst->replaceAllUsesWith(Pt);
5811 RemovedInsts.insert(Inst);
5812 Inst->removeFromParent();
5813 inserted = true;
5814 Changed = true;
5815 break;
5816 }
5817 if (!inserted)
5818 CurPts.push_back(Inst);
5819 }
5820 }
5821 return Changed;
5822}
5823
5824// Splitting large data structures so that the GEPs accessing them can have
5825// smaller offsets so that they can be sunk to the same blocks as their users.
5826// For example, a large struct starting from %base is split into two parts
5827// where the second part starts from %new_base.
5828//
5829// Before:
5830// BB0:
5831// %base =
5832//
5833// BB1:
5834// %gep0 = gep %base, off0
5835// %gep1 = gep %base, off1
5836// %gep2 = gep %base, off2
5837//
5838// BB2:
5839// %load1 = load %gep0
5840// %load2 = load %gep1
5841// %load3 = load %gep2
5842//
5843// After:
5844// BB0:
5845// %base =
5846// %new_base = gep %base, off0
5847//
5848// BB1:
5849// %new_gep0 = %new_base
5850// %new_gep1 = gep %new_base, off1 - off0
5851// %new_gep2 = gep %new_base, off2 - off0
5852//
5853// BB2:
5854// %load1 = load i32, i32* %new_gep0
5855// %load2 = load i32, i32* %new_gep1
5856// %load3 = load i32, i32* %new_gep2
5857//
5858// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5859// their offsets are smaller enough to fit into the addressing mode.
5860bool CodeGenPrepare::splitLargeGEPOffsets() {
5861 bool Changed = false;
5862 for (auto &Entry : LargeOffsetGEPMap) {
5863 Value *OldBase = Entry.first;
5864 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5865 &LargeOffsetGEPs = Entry.second;
5866 auto compareGEPOffset =
5867 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5868 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5869 if (LHS.first == RHS.first)
5870 return false;
5871 if (LHS.second != RHS.second)
5872 return LHS.second < RHS.second;
5873 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5874 };
5875 // Sorting all the GEPs of the same data structures based on the offsets.
5876 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5877 LargeOffsetGEPs.erase(
5878 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5879 LargeOffsetGEPs.end());
5880 // Skip if all the GEPs have the same offsets.
5881 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5882 continue;
5883 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5884 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5885 Value *NewBaseGEP = nullptr;
5886
5887 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
5888 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5889 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5890 int64_t Offset = LargeOffsetGEP->second;
5891 if (Offset != BaseOffset) {
5892 TargetLowering::AddrMode AddrMode;
5893 AddrMode.BaseOffs = Offset - BaseOffset;
5894 // The result type of the GEP might not be the type of the memory
5895 // access.
5896 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5897 GEP->getResultElementType(),
5898 GEP->getAddressSpace())) {
5899 // We need to create a new base if the offset to the current base is
5900 // too large to fit into the addressing mode. So, a very large struct
5901 // may be split into several parts.
5902 BaseGEP = GEP;
5903 BaseOffset = Offset;
5904 NewBaseGEP = nullptr;
5905 }
5906 }
5907
5908 // Generate a new GEP to replace the current one.
5909 LLVMContext &Ctx = GEP->getContext();
5910 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5911 Type *I8PtrTy =
5912 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5913 Type *I8Ty = Type::getInt8Ty(Ctx);
5914
5915 if (!NewBaseGEP) {
5916 // Create a new base if we don't have one yet. Find the insertion
5917 // pointer for the new base first.
5918 BasicBlock::iterator NewBaseInsertPt;
5919 BasicBlock *NewBaseInsertBB;
5920 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5921 // If the base of the struct is an instruction, the new base will be
5922 // inserted close to it.
5923 NewBaseInsertBB = BaseI->getParent();
5924 if (isa<PHINode>(BaseI))
5925 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5926 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5927 NewBaseInsertBB =
5928 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5929 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5930 } else
5931 NewBaseInsertPt = std::next(BaseI->getIterator());
5932 } else {
5933 // If the current base is an argument or global value, the new base
5934 // will be inserted to the entry block.
5935 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5936 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5937 }
5938 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5939 // Create a new base.
5940 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5941 NewBaseGEP = OldBase;
5942 if (NewBaseGEP->getType() != I8PtrTy)
5943 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5944 NewBaseGEP =
5945 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5946 NewGEPBases.insert(NewBaseGEP);
5947 }
5948
5949 IRBuilder<> Builder(GEP);
5950 Value *NewGEP = NewBaseGEP;
5951 if (Offset == BaseOffset) {
5952 if (GEP->getType() != I8PtrTy)
5953 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5954 } else {
5955 // Calculate the new offset for the new GEP.
5956 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5957 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5958
5959 if (GEP->getType() != I8PtrTy)
5960 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5961 }
5962 GEP->replaceAllUsesWith(NewGEP);
5963 LargeOffsetGEPID.erase(GEP);
5964 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5965 GEP->eraseFromParent();
5966 Changed = true;
5967 }
5968 }
5969 return Changed;
5970}
5971
5972bool CodeGenPrepare::optimizePhiType(
5973 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
5974 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
5975 // We are looking for a collection on interconnected phi nodes that together
5976 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
5977 // are of the same type. Convert the whole set of nodes to the type of the
5978 // bitcast.
5979 Type *PhiTy = I->getType();
5980 Type *ConvertTy = nullptr;
5981 if (Visited.count(I) ||
5982 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
5983 return false;
5984
5985 SmallVector<Instruction *, 4> Worklist;
5986 Worklist.push_back(cast<Instruction>(I));
5987 SmallPtrSet<PHINode *, 4> PhiNodes;
5988 PhiNodes.insert(I);
5989 Visited.insert(I);
5990 SmallPtrSet<Instruction *, 4> Defs;
5991 SmallPtrSet<Instruction *, 4> Uses;
5992 // This works by adding extra bitcasts between load/stores and removing
5993 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
5994 // we can get in the situation where we remove a bitcast in one iteration
5995 // just to add it again in the next. We need to ensure that at least one
5996 // bitcast we remove are anchored to something that will not change back.
5997 bool AnyAnchored = false;
5998
5999 while (!Worklist.empty()) {
6000 Instruction *II = Worklist.pop_back_val();
6001
6002 if (auto *Phi = dyn_cast<PHINode>(II)) {
6003 // Handle Defs, which might also be PHI's
6004 for (Value *V : Phi->incoming_values()) {
6005 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6006 if (!PhiNodes.count(OpPhi)) {
6007 if (Visited.count(OpPhi))
6008 return false;
6009 PhiNodes.insert(OpPhi);
6010 Visited.insert(OpPhi);
6011 Worklist.push_back(OpPhi);
6012 }
6013 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6014 if (!OpLoad->isSimple())
6015 return false;
6016 if (!Defs.count(OpLoad)) {
6017 Defs.insert(OpLoad);
6018 Worklist.push_back(OpLoad);
6019 }
6020 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6021 if (!Defs.count(OpEx)) {
6022 Defs.insert(OpEx);
6023 Worklist.push_back(OpEx);
6024 }
6025 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6026 if (!ConvertTy)
6027 ConvertTy = OpBC->getOperand(0)->getType();
6028 if (OpBC->getOperand(0)->getType() != ConvertTy)
6029 return false;
6030 if (!Defs.count(OpBC)) {
6031 Defs.insert(OpBC);
6032 Worklist.push_back(OpBC);
6033 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6034 !isa<ExtractElementInst>(OpBC->getOperand(0));
6035 }
6036 } else if (!isa<UndefValue>(V)) {
6037 return false;
6038 }
6039 }
6040 }
6041
6042 // Handle uses which might also be phi's
6043 for (User *V : II->users()) {
6044 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6045 if (!PhiNodes.count(OpPhi)) {
6046 if (Visited.count(OpPhi))
6047 return false;
6048 PhiNodes.insert(OpPhi);
6049 Visited.insert(OpPhi);
6050 Worklist.push_back(OpPhi);
6051 }
6052 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6053 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6054 return false;
6055 Uses.insert(OpStore);
6056 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6057 if (!ConvertTy)
6058 ConvertTy = OpBC->getType();
6059 if (OpBC->getType() != ConvertTy)
6060 return false;
6061 Uses.insert(OpBC);
6062 AnyAnchored |=
6063 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6064 } else {
6065 return false;
6066 }
6067 }
6068 }
6069
6070 if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6071 return false;
6072
6073 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Converting " << *
I << "\n and connected nodes to " << *ConvertTy <<
"\n"; } } while (false)
6074 << *ConvertTy << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Converting " << *
I << "\n and connected nodes to " << *ConvertTy <<
"\n"; } } while (false)
;
6075
6076 // Create all the new phi nodes of the new type, and bitcast any loads to the
6077 // correct type.
6078 ValueToValueMap ValMap;
6079 ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
6080 for (Instruction *D : Defs) {
6081 if (isa<BitCastInst>(D)) {
6082 ValMap[D] = D->getOperand(0);
6083 DeletedInstrs.insert(D);
6084 } else {
6085 ValMap[D] =
6086 new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
6087 }
6088 }
6089 for (PHINode *Phi : PhiNodes)
6090 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6091 Phi->getName() + ".tc", Phi);
6092 // Pipe together all the PhiNodes.
6093 for (PHINode *Phi : PhiNodes) {
6094 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6095 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6096 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6097 Phi->getIncomingBlock(i));
6098 Visited.insert(NewPhi);
6099 }
6100 // And finally pipe up the stores and bitcasts
6101 for (Instruction *U : Uses) {
6102 if (isa<BitCastInst>(U)) {
6103 DeletedInstrs.insert(U);
6104 U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
6105 } else {
6106 U->setOperand(0,
6107 new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
6108 }
6109 }
6110
6111 // Save the removed phis to be deleted later.
6112 for (PHINode *Phi : PhiNodes)
6113 DeletedInstrs.insert(Phi);
6114 return true;
6115}
6116
6117bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6118 if (!OptimizePhiTypes)
6119 return false;
6120
6121 bool Changed = false;
6122 SmallPtrSet<PHINode *, 4> Visited;
6123 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6124
6125 // Attempt to optimize all the phis in the functions to the correct type.
6126 for (auto &BB : F)
6127 for (auto &Phi : BB.phis())
6128 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6129
6130 // Remove any old phi's that have been converted.
6131 for (auto *I : DeletedInstrs) {
6132 I->replaceAllUsesWith(UndefValue::get(I->getType()));
6133 I->eraseFromParent();
6134 }
6135
6136 return Changed;
6137}
6138
6139/// Return true, if an ext(load) can be formed from an extension in
6140/// \p MovedExts.
6141bool CodeGenPrepare::canFormExtLd(
6142 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6143 Instruction *&Inst, bool HasPromoted) {
6144 for (auto *MovedExtInst : MovedExts) {
6145 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6146 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6147 Inst = MovedExtInst;
6148 break;
6149 }
6150 }
6151 if (!LI)
6152 return false;
6153
6154 // If they're already in the same block, there's nothing to do.
6155 // Make the cheap checks first if we did not promote.
6156 // If we promoted, we need to check if it is indeed profitable.
6157 if (!HasPromoted && LI->getParent() == Inst->getParent())
6158 return false;
6159
6160 return TLI->isExtLoad(LI, Inst, *DL);
6161}
6162
6163/// Move a zext or sext fed by a load into the same basic block as the load,
6164/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6165/// extend into the load.
6166///
6167/// E.g.,
6168/// \code
6169/// %ld = load i32* %addr
6170/// %add = add nuw i32 %ld, 4
6171/// %zext = zext i32 %add to i64
6172// \endcode
6173/// =>
6174/// \code
6175/// %ld = load i32* %addr
6176/// %zext = zext i32 %ld to i64
6177/// %add = add nuw i64 %zext, 4
6178/// \encode
6179/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6180/// allow us to match zext(load i32*) to i64.
6181///
6182/// Also, try to promote the computations used to obtain a sign extended
6183/// value used into memory accesses.
6184/// E.g.,
6185/// \code
6186/// a = add nsw i32 b, 3
6187/// d = sext i32 a to i64
6188/// e = getelementptr ..., i64 d
6189/// \endcode
6190/// =>
6191/// \code
6192/// f = sext i32 b to i64
6193/// a = add nsw i64 f, 3
6194/// e = getelementptr ..., i64 a
6195/// \endcode
6196///
6197/// \p Inst[in/out] the extension may be modified during the process if some
6198/// promotions apply.
6199bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6200 bool AllowPromotionWithoutCommonHeader = false;
6201 /// See if it is an interesting sext operations for the address type
6202 /// promotion before trying to promote it, e.g., the ones with the right
6203 /// type and used in memory accesses.
6204 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6205 *Inst, AllowPromotionWithoutCommonHeader);
6206 TypePromotionTransaction TPT(RemovedInsts);
6207 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6208 TPT.getRestorationPoint();
6209 SmallVector<Instruction *, 1> Exts;
6210 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6211 Exts.push_back(Inst);
6212
6213 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6214
6215 // Look for a load being extended.
6216 LoadInst *LI = nullptr;
6217 Instruction *ExtFedByLoad;
6218
6219 // Try to promote a chain of computation if it allows to form an extended
6220 // load.
6221 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6222 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6222, __PRETTY_FUNCTION__))
;
6223 TPT.commit();
6224 // Move the extend into the same block as the load.
6225 ExtFedByLoad->moveAfter(LI);
6226 ++NumExtsMoved;
6227 Inst = ExtFedByLoad;
6228 return true;
6229 }
6230
6231 // Continue promoting SExts if known as considerable depending on targets.
6232 if (ATPConsiderable &&
6233 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6234 HasPromoted, TPT, SpeculativelyMovedExts))
6235 return true;
6236
6237 TPT.rollback(LastKnownGood);
6238 return false;
6239}
6240
6241// Perform address type promotion if doing so is profitable.
6242// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6243// instructions that sign extended the same initial value. However, if
6244// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6245// extension is just profitable.
6246bool CodeGenPrepare::performAddressTypePromotion(
6247 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6248 bool HasPromoted, TypePromotionTransaction &TPT,
6249 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6250 bool Promoted = false;
6251 SmallPtrSet<Instruction *, 1> UnhandledExts;
6252 bool AllSeenFirst = true;
6253 for (auto *I : SpeculativelyMovedExts) {
6254 Value *HeadOfChain = I->getOperand(0);
6255 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
6256 SeenChainsForSExt.find(HeadOfChain);
6257 // If there is an unhandled SExt which has the same header, try to promote
6258 // it as well.
6259 if (AlreadySeen != SeenChainsForSExt.end()) {
6260 if (AlreadySeen->second != nullptr)
6261 UnhandledExts.insert(AlreadySeen->second);
6262 AllSeenFirst = false;
6263 }
6264 }
6265
6266 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6267 SpeculativelyMovedExts.size() == 1)) {
6268 TPT.commit();
6269 if (HasPromoted)
6270 Promoted = true;
6271 for (auto *I : SpeculativelyMovedExts) {
6272 Value *HeadOfChain = I->getOperand(0);
6273 SeenChainsForSExt[HeadOfChain] = nullptr;
6274 ValToSExtendedUses[HeadOfChain].push_back(I);
6275 }
6276 // Update Inst as promotion happen.
6277 Inst = SpeculativelyMovedExts.pop_back_val();
6278 } else {
6279 // This is the first chain visited from the header, keep the current chain
6280 // as unhandled. Defer to promote this until we encounter another SExt
6281 // chain derived from the same header.
6282 for (auto *I : SpeculativelyMovedExts) {
6283 Value *HeadOfChain = I->getOperand(0);
6284 SeenChainsForSExt[HeadOfChain] = Inst;
6285 }
6286 return false;
6287 }
6288
6289 if (!AllSeenFirst && !UnhandledExts.empty())
6290 for (auto *VisitedSExt : UnhandledExts) {
6291 if (RemovedInsts.count(VisitedSExt))
6292 continue;
6293 TypePromotionTransaction TPT(RemovedInsts);
6294 SmallVector<Instruction *, 1> Exts;
6295 SmallVector<Instruction *, 2> Chains;
6296 Exts.push_back(VisitedSExt);
6297 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6298 TPT.commit();
6299 if (HasPromoted)
6300 Promoted = true;
6301 for (auto *I : Chains) {
6302 Value *HeadOfChain = I->getOperand(0);
6303 // Mark this as handled.
6304 SeenChainsForSExt[HeadOfChain] = nullptr;
6305 ValToSExtendedUses[HeadOfChain].push_back(I);
6306 }
6307 }
6308 return Promoted;
6309}
6310
6311bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
6312 BasicBlock *DefBB = I->getParent();
6313
6314 // If the result of a {s|z}ext and its source are both live out, rewrite all
6315 // other uses of the source with result of extension.
6316 Value *Src = I->getOperand(0);
6317 if (Src->hasOneUse())
6318 return false;
6319
6320 // Only do this xform if truncating is free.
6321 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
6322 return false;
6323
6324 // Only safe to perform the optimization if the source is also defined in
6325 // this block.
6326 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
6327 return false;
6328
6329 bool DefIsLiveOut = false;
6330 for (User *U : I->users()) {
6331 Instruction *UI = cast<Instruction>(U);
6332
6333 // Figure out which BB this ext is used in.
6334 BasicBlock *UserBB = UI->getParent();
6335 if (UserBB == DefBB) continue;
6336 DefIsLiveOut = true;
6337 break;
6338 }
6339 if (!DefIsLiveOut)
6340 return false;
6341
6342 // Make sure none of the uses are PHI nodes.
6343 for (User *U : Src->users()) {
6344 Instruction *UI = cast<Instruction>(U);
6345 BasicBlock *UserBB = UI->getParent();
6346 if (UserBB == DefBB) continue;
6347 // Be conservative. We don't want this xform to end up introducing
6348 // reloads just before load / store instructions.
6349 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
6350 return false;
6351 }
6352
6353 // InsertedTruncs - Only insert one trunc in each block once.
6354 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
6355
6356 bool MadeChange = false;
6357 for (Use &U : Src->uses()) {
6358 Instruction *User = cast<Instruction>(U.getUser());
6359
6360 // Figure out which BB this ext is used in.
6361 BasicBlock *UserBB = User->getParent();
6362 if (UserBB == DefBB) continue;
6363
6364 // Both src and def are live in this block. Rewrite the use.
6365 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
6366
6367 if (!InsertedTrunc) {
6368 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6369 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6369, __PRETTY_FUNCTION__))
;
6370 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
6371 InsertedInsts.insert(InsertedTrunc);
6372 }
6373
6374 // Replace a use of the {s|z}ext source with a use of the result.
6375 U = InsertedTrunc;
6376 ++NumExtUses;
6377 MadeChange = true;
6378 }
6379
6380 return MadeChange;
6381}
6382
6383// Find loads whose uses only use some of the loaded value's bits. Add an "and"
6384// just after the load if the target can fold this into one extload instruction,
6385// with the hope of eliminating some of the other later "and" instructions using
6386// the loaded value. "and"s that are made trivially redundant by the insertion
6387// of the new "and" are removed by this function, while others (e.g. those whose
6388// path from the load goes through a phi) are left for isel to potentially
6389// remove.
6390//
6391// For example:
6392//
6393// b0:
6394// x = load i32
6395// ...
6396// b1:
6397// y = and x, 0xff
6398// z = use y
6399//
6400// becomes:
6401//
6402// b0:
6403// x = load i32
6404// x' = and x, 0xff
6405// ...
6406// b1:
6407// z = use x'
6408//
6409// whereas:
6410//
6411// b0:
6412// x1 = load i32
6413// ...
6414// b1:
6415// x2 = load i32
6416// ...
6417// b2:
6418// x = phi x1, x2
6419// y = and x, 0xff
6420//
6421// becomes (after a call to optimizeLoadExt for each load):
6422//
6423// b0:
6424// x1 = load i32
6425// x1' = and x1, 0xff
6426// ...
6427// b1:
6428// x2 = load i32
6429// x2' = and x2, 0xff
6430// ...
6431// b2:
6432// x = phi x1', x2'
6433// y = and x, 0xff
6434bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
6435 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
6436 return false;
6437
6438 // Skip loads we've already transformed.
6439 if (Load->hasOneUse() &&
6440 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
6441 return false;
6442
6443 // Look at all uses of Load, looking through phis, to determine how many bits
6444 // of the loaded value are needed.
6445 SmallVector<Instruction *, 8> WorkList;
6446 SmallPtrSet<Instruction *, 16> Visited;
6447 SmallVector<Instruction *, 8> AndsToMaybeRemove;
6448 for (auto *U : Load->users())
6449 WorkList.push_back(cast<Instruction>(U));
6450
6451 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
6452 unsigned BitWidth = LoadResultVT.getSizeInBits();
6453 APInt DemandBits(BitWidth, 0);
6454 APInt WidestAndBits(BitWidth, 0);
6455
6456 while (!WorkList.empty()) {
6457 Instruction *I = WorkList.back();
6458 WorkList.pop_back();
6459
6460 // Break use-def graph loops.
6461 if (!Visited.insert(I).second)
6462 continue;
6463
6464 // For a PHI node, push all of its users.
6465 if (auto *Phi = dyn_cast<PHINode>(I)) {
6466 for (auto *U : Phi->users())
6467 WorkList.push_back(cast<Instruction>(U));
6468 continue;
6469 }
6470
6471 switch (I->getOpcode()) {
6472 case Instruction::And: {
6473 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
6474 if (!AndC)
6475 return false;
6476 APInt AndBits = AndC->getValue();
6477 DemandBits |= AndBits;
6478 // Keep track of the widest and mask we see.
6479 if (AndBits.ugt(WidestAndBits))
6480 WidestAndBits = AndBits;
6481 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
6482 AndsToMaybeRemove.push_back(I);
6483 break;
6484 }
6485
6486 case Instruction::Shl: {
6487 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
6488 if (!ShlC)
6489 return false;
6490 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
6491 DemandBits.setLowBits(BitWidth - ShiftAmt);
6492 break;
6493 }
6494
6495 case Instruction::Trunc: {
6496 EVT TruncVT = TLI->getValueType(*DL, I->getType());
6497 unsigned TruncBitWidth = TruncVT.getSizeInBits();
6498 DemandBits.setLowBits(TruncBitWidth);
6499 break;
6500 }
6501
6502 default:
6503 return false;
6504 }
6505 }
6506
6507 uint32_t ActiveBits = DemandBits.getActiveBits();
6508 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
6509 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
6510 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
6511 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
6512 // followed by an AND.
6513 // TODO: Look into removing this restriction by fixing backends to either
6514 // return false for isLoadExtLegal for i1 or have them select this pattern to
6515 // a single instruction.
6516 //
6517 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
6518 // mask, since these are the only ands that will be removed by isel.
6519 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
6520 WidestAndBits != DemandBits)
6521 return false;
6522
6523 LLVMContext &Ctx = Load->getType()->getContext();
6524 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
6525 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
6526
6527 // Reject cases that won't be matched as extloads.
6528 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
6529 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
6530 return false;
6531
6532 IRBuilder<> Builder(Load->getNextNode());
6533 auto *NewAnd = cast<Instruction>(
6534 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
6535 // Mark this instruction as "inserted by CGP", so that other
6536 // optimizations don't touch it.
6537 InsertedInsts.insert(NewAnd);
6538
6539 // Replace all uses of load with new and (except for the use of load in the
6540 // new and itself).
6541 Load->replaceAllUsesWith(NewAnd);
6542 NewAnd->setOperand(0, Load);
6543
6544 // Remove any and instructions that are now redundant.
6545 for (auto *And : AndsToMaybeRemove)
6546 // Check that the and mask is the same as the one we decided to put on the
6547 // new and.
6548 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
6549 And->replaceAllUsesWith(NewAnd);
6550 if (&*CurInstIterator == And)
6551 CurInstIterator = std::next(And->getIterator());
6552 And->eraseFromParent();
6553 ++NumAndUses;
6554 }
6555
6556 ++NumAndsAdded;
6557 return true;
6558}
6559
6560/// Check if V (an operand of a select instruction) is an expensive instruction
6561/// that is only used once.
6562static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
6563 auto *I = dyn_cast<Instruction>(V);
6564 // If it's safe to speculatively execute, then it should not have side
6565 // effects; therefore, it's safe to sink and possibly *not* execute.
6566 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
6567 TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
6568 TargetTransformInfo::TCC_Expensive;
6569}
6570
6571/// Returns true if a SelectInst should be turned into an explicit branch.
6572static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
6573 const TargetLowering *TLI,
6574 SelectInst *SI) {
6575 // If even a predictable select is cheap, then a branch can't be cheaper.
6576 if (!TLI->isPredictableSelectExpensive())
6577 return false;
6578
6579 // FIXME: This should use the same heuristics as IfConversion to determine
6580 // whether a select is better represented as a branch.
6581
6582 // If metadata tells us that the select condition is obviously predictable,
6583 // then we want to replace the select with a branch.
6584 uint64_t TrueWeight, FalseWeight;
6585 if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
6586 uint64_t Max = std::max(TrueWeight, FalseWeight);
6587 uint64_t Sum = TrueWeight + FalseWeight;
6588 if (Sum != 0) {
6589 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
6590 if (Probability > TTI->getPredictableBranchThreshold())
6591 return true;
6592 }
6593 }
6594
6595 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
6596
6597 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
6598 // comparison condition. If the compare has more than one use, there's
6599 // probably another cmov or setcc around, so it's not worth emitting a branch.
6600 if (!Cmp || !Cmp->hasOneUse())
6601 return false;
6602
6603 // If either operand of the select is expensive and only needed on one side
6604 // of the select, we should form a branch.
6605 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
6606 sinkSelectOperand(TTI, SI->getFalseValue()))
6607 return true;
6608
6609 return false;
6610}
6611
6612/// If \p isTrue is true, return the true value of \p SI, otherwise return
6613/// false value of \p SI. If the true/false value of \p SI is defined by any
6614/// select instructions in \p Selects, look through the defining select
6615/// instruction until the true/false value is not defined in \p Selects.
6616static Value *getTrueOrFalseValue(
6617 SelectInst *SI, bool isTrue,
6618 const SmallPtrSet<const Instruction *, 2> &Selects) {
6619 Value *V = nullptr;
6620
6621 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
6622 DefSI = dyn_cast<SelectInst>(V)) {
6623 assert(DefSI->getCondition() == SI->getCondition() &&((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6624, __PRETTY_FUNCTION__))
6624 "The condition of DefSI does not match with SI")((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6624, __PRETTY_FUNCTION__))
;
6625 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
6626 }
6627
6628 assert(V && "Failed to get select true/false value")((V && "Failed to get select true/false value") ? static_cast
<void> (0) : __assert_fail ("V && \"Failed to get select true/false value\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6628, __PRETTY_FUNCTION__))
;
6629 return V;
6630}
6631
6632bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
6633 assert(Shift->isShift() && "Expected a shift")((Shift->isShift() && "Expected a shift") ? static_cast
<void> (0) : __assert_fail ("Shift->isShift() && \"Expected a shift\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6633, __PRETTY_FUNCTION__))
;
6634
6635 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
6636 // general vector shifts, and (3) the shift amount is a select-of-splatted
6637 // values, hoist the shifts before the select:
6638 // shift Op0, (select Cond, TVal, FVal) -->
6639 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
6640 //
6641 // This is inverting a generic IR transform when we know that the cost of a
6642 // general vector shift is more than the cost of 2 shift-by-scalars.
6643 // We can't do this effectively in SDAG because we may not be able to
6644 // determine if the select operands are splats from within a basic block.
6645 Type *Ty = Shift->getType();
6646 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
6647 return false;
6648 Value *Cond, *TVal, *FVal;
6649 if (!match(Shift->getOperand(1),
6650 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
6651 return false;
6652 if (!isSplatValue(TVal) || !isSplatValue(FVal))
6653 return false;
6654
6655 IRBuilder<> Builder(Shift);
6656 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
6657 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
6658 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
6659 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
6660 Shift->replaceAllUsesWith(NewSel);
6661 Shift->eraseFromParent();
6662 return true;
6663}
6664
6665bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
6666 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
6667 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&(((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
"Expected a funnel shift") ? static_cast<void> (0) : __assert_fail
("(Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && \"Expected a funnel shift\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6668, __PRETTY_FUNCTION__))
6668 "Expected a funnel shift")(((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
"Expected a funnel shift") ? static_cast<void> (0) : __assert_fail
("(Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && \"Expected a funnel shift\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6668, __PRETTY_FUNCTION__))
;
6669
6670 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
6671 // than general vector shifts, and (3) the shift amount is select-of-splatted
6672 // values, hoist the funnel shifts before the select:
6673 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
6674 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
6675 //
6676 // This is inverting a generic IR transform when we know that the cost of a
6677 // general vector shift is more than the cost of 2 shift-by-scalars.
6678 // We can't do this effectively in SDAG because we may not be able to
6679 // determine if the select operands are splats from within a basic block.
6680 Type *Ty = Fsh->getType();
6681 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
6682 return false;
6683 Value *Cond, *TVal, *FVal;
6684 if (!match(Fsh->getOperand(2),
6685 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
6686 return false;
6687 if (!isSplatValue(TVal) || !isSplatValue(FVal))
6688 return false;
6689
6690 IRBuilder<> Builder(Fsh);
6691 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
6692 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal });
6693 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal });
6694 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
6695 Fsh->replaceAllUsesWith(NewSel);
6696 Fsh->eraseFromParent();
6697 return true;
6698}
6699
6700/// If we have a SelectInst that will likely profit from branch prediction,
6701/// turn it into a branch.
6702bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
6703 if (DisableSelectToBranch)
6704 return false;
6705
6706 // Find all consecutive select instructions that share the same condition.
6707 SmallVector<SelectInst *, 2> ASI;
6708 ASI.push_back(SI);
6709 for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
6710 It != SI->getParent()->end(); ++It) {
6711 SelectInst *I = dyn_cast<SelectInst>(&*It);
6712 if (I && SI->getCondition() == I->getCondition()) {
6713 ASI.push_back(I);
6714 } else {
6715 break;
6716 }
6717 }
6718
6719 SelectInst *LastSI = ASI.back();
6720 // Increment the current iterator to skip all the rest of select instructions
6721 // because they will be either "not lowered" or "all lowered" to branch.
6722 CurInstIterator = std::next(LastSI->getIterator());
6723
6724 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
6725
6726 // Can we convert the 'select' to CF ?
6727 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
6728 return false;
6729
6730 TargetLowering::SelectSupportKind SelectKind;
6731 if (VectorCond)
6732 SelectKind = TargetLowering::VectorMaskSelect;
6733 else if (SI->getType()->isVectorTy())
6734 SelectKind = TargetLowering::ScalarCondVectorVal;
6735 else
6736 SelectKind = TargetLowering::ScalarValSelect;
6737
6738 if (TLI->isSelectSupported(SelectKind) &&
6739 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
6740 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
6741 return false;
6742
6743 // The DominatorTree needs to be rebuilt by any consumers after this
6744 // transformation. We simply reset here rather than setting the ModifiedDT
6745 // flag to avoid restarting the function walk in runOnFunction for each
6746 // select optimized.
6747 DT.reset();
6748
6749 // Transform a sequence like this:
6750 // start:
6751 // %cmp = cmp uge i32 %a, %b
6752 // %sel = select i1 %cmp, i32 %c, i32 %d
6753 //
6754 // Into:
6755 // start:
6756 // %cmp = cmp uge i32 %a, %b
6757 // %cmp.frozen = freeze %cmp
6758 // br i1 %cmp.frozen, label %select.true, label %select.false
6759 // select.true:
6760 // br label %select.end
6761 // select.false:
6762 // br label %select.end
6763 // select.end:
6764 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
6765 //
6766 // %cmp should be frozen, otherwise it may introduce undefined behavior.
6767 // In addition, we may sink instructions that produce %c or %d from
6768 // the entry block into the destination(s) of the new branch.
6769 // If the true or false blocks do not contain a sunken instruction, that
6770 // block and its branch may be optimized away. In that case, one side of the
6771 // first branch will point directly to select.end, and the corresponding PHI
6772 // predecessor block will be the start block.
6773
6774 // First, we split the block containing the select into 2 blocks.
6775 BasicBlock *StartBlock = SI->getParent();
6776 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
6777 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
6778 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
6779
6780 // Delete the unconditional branch that was just created by the split.
6781 StartBlock->getTerminator()->eraseFromParent();
6782
6783 // These are the new basic blocks for the conditional branch.
6784 // At least one will become an actual new basic block.
6785 BasicBlock *TrueBlock = nullptr;
6786 BasicBlock *FalseBlock = nullptr;
6787 BranchInst *TrueBranch = nullptr;
6788 BranchInst *FalseBranch = nullptr;
6789
6790 // Sink expensive instructions into the conditional blocks to avoid executing
6791 // them speculatively.
6792 for (SelectInst *SI : ASI) {
6793 if (sinkSelectOperand(TTI, SI->getTrueValue())) {
6794 if (TrueBlock == nullptr) {
6795 TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
6796 EndBlock->getParent(), EndBlock);
6797 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
6798 TrueBranch->setDebugLoc(SI->getDebugLoc());
6799 }
6800 auto *TrueInst = cast<Instruction>(SI->getTrueValue());
6801 TrueInst->moveBefore(TrueBranch);
6802 }
6803 if (sinkSelectOperand(TTI, SI->getFalseValue())) {
6804 if (FalseBlock == nullptr) {
6805 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
6806 EndBlock->getParent(), EndBlock);
6807 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
6808 FalseBranch->setDebugLoc(SI->getDebugLoc());
6809 }
6810 auto *FalseInst = cast<Instruction>(SI->getFalseValue());
6811 FalseInst->moveBefore(FalseBranch);
6812 }
6813 }
6814
6815 // If there was nothing to sink, then arbitrarily choose the 'false' side
6816 // for a new input value to the PHI.
6817 if (TrueBlock == FalseBlock) {
6818 assert(TrueBlock == nullptr &&((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6819, __PRETTY_FUNCTION__))
6819 "Unexpected basic block transform while optimizing select")((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6819, __PRETTY_FUNCTION__))
;
6820
6821 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
6822 EndBlock->getParent(), EndBlock);
6823 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
6824 FalseBranch->setDebugLoc(SI->getDebugLoc());
6825 }
6826
6827 // Insert the real conditional branch based on the original condition.
6828 // If we did not create a new block for one of the 'true' or 'false' paths
6829 // of the condition, it means that side of the branch goes to the end block
6830 // directly and the path originates from the start block from the point of
6831 // view of the new PHI.
6832 BasicBlock *TT, *FT;
6833 if (TrueBlock == nullptr) {
6834 TT = EndBlock;
6835 FT = FalseBlock;
6836 TrueBlock = StartBlock;
6837 } else if (FalseBlock == nullptr) {
6838 TT = TrueBlock;
6839 FT = EndBlock;
6840 FalseBlock = StartBlock;
6841 } else {
6842 TT = TrueBlock;
6843 FT = FalseBlock;
6844 }
6845 IRBuilder<> IB(SI);
6846 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
6847 IB.CreateCondBr(CondFr, TT, FT, SI);
6848
6849 SmallPtrSet<const Instruction *, 2> INS;
6850 INS.insert(ASI.begin(), ASI.end());
6851 // Use reverse iterator because later select may use the value of the
6852 // earlier select, and we need to propagate value through earlier select
6853 // to get the PHI operand.
6854 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
6855 SelectInst *SI = *It;
6856 // The select itself is replaced with a PHI Node.
6857 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
6858 PN->takeName(SI);
6859 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
6860 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
6861 PN->setDebugLoc(SI->getDebugLoc());
6862
6863 SI->replaceAllUsesWith(PN);
6864 SI->eraseFromParent();
6865 INS.erase(SI);
6866 ++NumSelectsExpanded;
6867 }
6868
6869 // Instruct OptimizeBlock to skip to the next block.
6870 CurInstIterator = StartBlock->end();
6871 return true;
6872}
6873
6874/// Some targets only accept certain types for splat inputs. For example a VDUP
6875/// in MVE takes a GPR (integer) register, and the instruction that incorporate
6876/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
6877bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
6878 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
6879 if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
6880 m_Undef(), m_ZeroMask())))
6881 return false;
6882 Type *NewType = TLI->shouldConvertSplatType(SVI);
6883 if (!NewType)
6884 return false;
6885
6886 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
6887 assert(!NewType->isVectorTy() && "Expected a scalar type!")((!NewType->isVectorTy() && "Expected a scalar type!"
) ? static_cast<void> (0) : __assert_fail ("!NewType->isVectorTy() && \"Expected a scalar type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6887, __PRETTY_FUNCTION__))
;
6888 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&((NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits
() && "Expected a type of the same size!") ? static_cast
<void> (0) : __assert_fail ("NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && \"Expected a type of the same size!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6889, __PRETTY_FUNCTION__))
6889 "Expected a type of the same size!")((NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits
() && "Expected a type of the same size!") ? static_cast
<void> (0) : __assert_fail ("NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && \"Expected a type of the same size!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 6889, __PRETTY_FUNCTION__))
;
6890 auto *NewVecType =
6891 FixedVectorType::get(NewType, SVIVecType->getNumElements());
6892
6893 // Create a bitcast (shuffle (insert (bitcast(..))))
6894 IRBuilder<> Builder(SVI->getContext());
6895 Builder.SetInsertPoint(SVI);
6896 Value *BC1 = Builder.CreateBitCast(
6897 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
6898 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
6899 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
6900
6901 SVI->replaceAllUsesWith(BC2);
6902 RecursivelyDeleteTriviallyDeadInstructions(
6903 SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
6904
6905 // Also hoist the bitcast up to its operand if it they are not in the same
6906 // block.
6907 if (auto *BCI = dyn_cast<Instruction>(BC1))
6908 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
6909 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
6910 !Op->isTerminator() && !Op->isEHPad())
6911 BCI->moveAfter(Op);
6912
6913 return true;
6914}
6915
6916bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
6917 // If the operands of I can be folded into a target instruction together with
6918 // I, duplicate and sink them.
6919 SmallVector<Use *, 4> OpsToSink;
6920 if (!TLI->shouldSinkOperands(I, OpsToSink))
6921 return false;
6922
6923 // OpsToSink can contain multiple uses in a use chain (e.g.
6924 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
6925 // uses must come first, so we process the ops in reverse order so as to not
6926 // create invalid IR.
6927 BasicBlock *TargetBB = I->getParent();
6928 bool Changed = false;
6929 SmallVector<Use *, 4> ToReplace;
6930 for (Use *U : reverse(OpsToSink)) {
6931 auto *UI = cast<Instruction>(U->get());
6932 if (UI->getParent() == TargetBB || isa<PHINode>(UI))
6933 continue;
6934 ToReplace.push_back(U);
6935 }
6936
6937 SetVector<Instruction *> MaybeDead;
6938 DenseMap<Instruction *, Instruction *> NewInstructions;
6939 Instruction *InsertPoint = I;
6940 for (Use *U : ToReplace) {
6941 auto *UI = cast<Instruction>(U->get());
6942 Instruction *NI = UI->clone();
6943 NewInstructions[UI] = NI;
6944 MaybeDead.insert(UI);
6945 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sinking " << *UI
<< " to user " << *I << "\n"; } } while (false
)
;
6946 NI->insertBefore(InsertPoint);
6947 InsertPoint = NI;
6948 InsertedInsts.insert(NI);
6949
6950 // Update the use for the new instruction, making sure that we update the
6951 // sunk instruction uses, if it is part of a chain that has already been
6952 // sunk.
6953 Instruction *OldI = cast<Instruction>(U->getUser());
6954 if (NewInstructions.count(OldI))
6955 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
6956 else
6957 U->set(NI);
6958 Changed = true;
6959 }
6960
6961 // Remove instructions that are dead after sinking.
6962 for (auto *I : MaybeDead) {
6963 if (!I->hasNUsesOrMore(1)) {
6964 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Removing dead instruction: "
<< *I << "\n"; } } while (false)
;
6965 I->eraseFromParent();
6966 }
6967 }
6968
6969 return Changed;
6970}
6971
6972bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
6973 Value *Cond = SI->getCondition();
6974 Type *OldType = Cond->getType();
6975 LLVMContext &Context = Cond->getContext();
6976 MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
6977 unsigned RegWidth = RegType.getSizeInBits();
6978
6979 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
6980 return false;
6981
6982 // If the register width is greater than the type width, expand the condition
6983 // of the switch instruction and each case constant to the width of the
6984 // register. By widening the type of the switch condition, subsequent
6985 // comparisons (for case comparisons) will not need to be extended to the
6986 // preferred register width, so we will potentially eliminate N-1 extends,
6987 // where N is the number of cases in the switch.
6988 auto *NewType = Type::getIntNTy(Context, RegWidth);
6989
6990 // Zero-extend the switch condition and case constants unless the switch
6991 // condition is a function argument that is already being sign-extended.
6992 // In that case, we can avoid an unnecessary mask/extension by sign-extending
6993 // everything instead.
6994 Instruction::CastOps ExtType = Instruction::ZExt;
6995 if (auto *Arg = dyn_cast<Argument>(Cond))
6996 if (Arg->hasSExtAttr())
6997 ExtType = Instruction::SExt;
6998
6999 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7000 ExtInst->insertBefore(SI);
7001 ExtInst->setDebugLoc(SI->getDebugLoc());
7002 SI->setCondition(ExtInst);
7003 for (auto Case : SI->cases()) {
7004 APInt NarrowConst = Case.getCaseValue()->getValue();
7005 APInt WideConst = (ExtType == Instruction::ZExt) ?
7006 NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
7007 Case.setValue(ConstantInt::get(Context, WideConst));
7008 }
7009
7010 return true;
7011}
7012
7013
7014namespace {
7015
7016/// Helper class to promote a scalar operation to a vector one.
7017/// This class is used to move downward extractelement transition.
7018/// E.g.,
7019/// a = vector_op <2 x i32>
7020/// b = extractelement <2 x i32> a, i32 0
7021/// c = scalar_op b
7022/// store c
7023///
7024/// =>
7025/// a = vector_op <2 x i32>
7026/// c = vector_op a (equivalent to scalar_op on the related lane)
7027/// * d = extractelement <2 x i32> c, i32 0
7028/// * store d
7029/// Assuming both extractelement and store can be combine, we get rid of the
7030/// transition.
7031class VectorPromoteHelper {
7032 /// DataLayout associated with the current module.
7033 const DataLayout &DL;
7034
7035 /// Used to perform some checks on the legality of vector operations.
7036 const TargetLowering &TLI;
7037
7038 /// Used to estimated the cost of the promoted chain.
7039 const TargetTransformInfo &TTI;
7040
7041 /// The transition being moved downwards.
7042 Instruction *Transition;
7043
7044 /// The sequence of instructions to be promoted.
7045 SmallVector<Instruction *, 4> InstsToBePromoted;
7046
7047 /// Cost of combining a store and an extract.
7048 unsigned StoreExtractCombineCost;
7049
7050 /// Instruction that will be combined with the transition.
7051 Instruction *CombineInst = nullptr;
7052
7053 /// The instruction that represents the current end of the transition.
7054 /// Since we are faking the promotion until we reach the end of the chain
7055 /// of computation, we need a way to get the current end of the transition.
7056 Instruction *getEndOfTransition() const {
7057 if (InstsToBePromoted.empty())
7058 return Transition;
7059 return InstsToBePromoted.back();
7060 }
7061
7062 /// Return the index of the original value in the transition.
7063 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7064 /// c, is at index 0.
7065 unsigned getTransitionOriginalValueIdx() const {
7066 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7067, __PRETTY_FUNCTION__))
7067 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7067, __PRETTY_FUNCTION__))
;
7068 return 0;
7069 }
7070
7071 /// Return the index of the index in the transition.
7072 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7073 /// is at index 1.
7074 unsigned getTransitionIdx() const {
7075 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7076, __PRETTY_FUNCTION__))
7076 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7076, __PRETTY_FUNCTION__))
;
7077 return 1;
7078 }
7079
7080 /// Get the type of the transition.
7081 /// This is the type of the original value.
7082 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7083 /// transition is <2 x i32>.
7084 Type *getTransitionType() const {
7085 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7086 }
7087
7088 /// Promote \p ToBePromoted by moving \p Def downward through.
7089 /// I.e., we have the following sequence:
7090 /// Def = Transition <ty1> a to <ty2>
7091 /// b = ToBePromoted <ty2> Def, ...
7092 /// =>
7093 /// b = ToBePromoted <ty1> a, ...
7094 /// Def = Transition <ty1> ToBePromoted to <ty2>
7095 void promoteImpl(Instruction *ToBePromoted);
7096
7097 /// Check whether or not it is profitable to promote all the
7098 /// instructions enqueued to be promoted.
7099 bool isProfitableToPromote() {
7100 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7101 unsigned Index = isa<ConstantInt>(ValIdx)
7102 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7103 : -1;
7104 Type *PromotedType = getTransitionType();
7105
7106 StoreInst *ST = cast<StoreInst>(CombineInst);
7107 unsigned AS = ST->getPointerAddressSpace();
7108 // Check if this store is supported.
7109 if (!TLI.allowsMisalignedMemoryAccesses(
7110 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7111 ST->getAlign())) {
7112 // If this is not supported, there is no way we can combine
7113 // the extract with the store.
7114 return false;
7115 }
7116
7117 // The scalar chain of computation has to pay for the transition
7118 // scalar to vector.
7119 // The vector chain has to account for the combining cost.
7120 InstructionCost ScalarCost =
7121 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
7122 InstructionCost VectorCost = StoreExtractCombineCost;
7123 enum TargetTransformInfo::TargetCostKind CostKind =
7124 TargetTransformInfo::TCK_RecipThroughput;
7125 for (const auto &Inst : InstsToBePromoted) {
7126 // Compute the cost.
7127 // By construction, all instructions being promoted are arithmetic ones.
7128 // Moreover, one argument is a constant that can be viewed as a splat
7129 // constant.
7130 Value *Arg0 = Inst->getOperand(0);
7131 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7132 isa<ConstantFP>(Arg0);
7133 TargetTransformInfo::OperandValueKind Arg0OVK =
7134 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
7135 : TargetTransformInfo::OK_AnyValue;
7136 TargetTransformInfo::OperandValueKind Arg1OVK =
7137 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
7138 : TargetTransformInfo::OK_AnyValue;
7139 ScalarCost += TTI.getArithmeticInstrCost(
7140 Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK);
7141 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7142 CostKind,
7143 Arg0OVK, Arg1OVK);
7144 }
7145 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
7146 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
7147 << ScalarCost << "\nVector: " << VectorCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
;
7148 return ScalarCost > VectorCost;
7149 }
7150
7151 /// Generate a constant vector with \p Val with the same
7152 /// number of elements as the transition.
7153 /// \p UseSplat defines whether or not \p Val should be replicated
7154 /// across the whole vector.
7155 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7156 /// otherwise we generate a vector with as many undef as possible:
7157 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7158 /// used at the index of the extract.
7159 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7160 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7161 if (!UseSplat) {
7162 // If we cannot determine where the constant must be, we have to
7163 // use a splat constant.
7164 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7165 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7166 ExtractIdx = CstVal->getSExtValue();
7167 else
7168 UseSplat = true;
7169 }
7170
7171 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
7172 if (UseSplat)
7173 return ConstantVector::getSplat(EC, Val);
7174
7175 if (!EC.isScalable()) {
7176 SmallVector<Constant *, 4> ConstVec;
7177 UndefValue *UndefVal = UndefValue::get(Val->getType());
7178 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
7179 if (Idx == ExtractIdx)
7180 ConstVec.push_back(Val);
7181 else
7182 ConstVec.push_back(UndefVal);
7183 }
7184 return ConstantVector::get(ConstVec);
7185 } else
7186 llvm_unreachable(::llvm::llvm_unreachable_internal("Generate scalable vector for non-splat is unimplemented"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7187)
7187 "Generate scalable vector for non-splat is unimplemented")::llvm::llvm_unreachable_internal("Generate scalable vector for non-splat is unimplemented"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7187)
;
7188 }
7189
7190 /// Check if promoting to a vector type an operand at \p OperandIdx
7191 /// in \p Use can trigger undefined behavior.
7192 static bool canCauseUndefinedBehavior(const Instruction *Use,
7193 unsigned OperandIdx) {
7194 // This is not safe to introduce undef when the operand is on
7195 // the right hand side of a division-like instruction.
7196 if (OperandIdx != 1)
7197 return false;
7198 switch (Use->getOpcode()) {
7199 default:
7200 return false;
7201 case Instruction::SDiv:
7202 case Instruction::UDiv:
7203 case Instruction::SRem:
7204 case Instruction::URem:
7205 return true;
7206 case Instruction::FDiv:
7207 case Instruction::FRem:
7208 return !Use->hasNoNaNs();
7209 }
7210 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7210)
;
7211 }
7212
7213public:
7214 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
7215 const TargetTransformInfo &TTI, Instruction *Transition,
7216 unsigned CombineCost)
7217 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
7218 StoreExtractCombineCost(CombineCost) {
7219 assert(Transition && "Do not know how to promote null")((Transition && "Do not know how to promote null") ? static_cast
<void> (0) : __assert_fail ("Transition && \"Do not know how to promote null\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7219, __PRETTY_FUNCTION__))
;
7220 }
7221
7222 /// Check if we can promote \p ToBePromoted to \p Type.
7223 bool canPromote(const Instruction *ToBePromoted) const {
7224 // We could support CastInst too.
7225 return isa<BinaryOperator>(ToBePromoted);
7226 }
7227
7228 /// Check if it is profitable to promote \p ToBePromoted
7229 /// by moving downward the transition through.
7230 bool shouldPromote(const Instruction *ToBePromoted) const {
7231 // Promote only if all the operands can be statically expanded.
7232 // Indeed, we do not want to introduce any new kind of transitions.
7233 for (const Use &U : ToBePromoted->operands()) {
7234 const Value *Val = U.get();
7235 if (Val == getEndOfTransition()) {
7236 // If the use is a division and the transition is on the rhs,
7237 // we cannot promote the operation, otherwise we may create a
7238 // division by zero.
7239 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
7240 return false;
7241 continue;
7242 }
7243 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
7244 !isa<ConstantFP>(Val))
7245 return false;
7246 }
7247 // Check that the resulting operation is legal.
7248 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
7249 if (!ISDOpcode)
7250 return false;
7251 return StressStoreExtract ||
7252 TLI.isOperationLegalOrCustom(
7253 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
7254 }
7255
7256 /// Check whether or not \p Use can be combined
7257 /// with the transition.
7258 /// I.e., is it possible to do Use(Transition) => AnotherUse?
7259 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
7260
7261 /// Record \p ToBePromoted as part of the chain to be promoted.
7262 void enqueueForPromotion(Instruction *ToBePromoted) {
7263 InstsToBePromoted.push_back(ToBePromoted);
7264 }
7265
7266 /// Set the instruction that will be combined with the transition.
7267 void recordCombineInstruction(Instruction *ToBeCombined) {
7268 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine")((canCombine(ToBeCombined) && "Unsupported instruction to combine"
) ? static_cast<void> (0) : __assert_fail ("canCombine(ToBeCombined) && \"Unsupported instruction to combine\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7268, __PRETTY_FUNCTION__))
;
7269 CombineInst = ToBeCombined;
7270 }
7271
7272 /// Promote all the instructions enqueued for promotion if it is
7273 /// is profitable.
7274 /// \return True if the promotion happened, false otherwise.
7275 bool promote() {
7276 // Check if there is something to promote.
7277 // Right now, if we do not have anything to combine with,
7278 // we assume the promotion is not profitable.
7279 if (InstsToBePromoted.empty() || !CombineInst)
7280 return false;
7281
7282 // Check cost.
7283 if (!StressStoreExtract && !isProfitableToPromote())
7284 return false;
7285
7286 // Promote.
7287 for (auto &ToBePromoted : InstsToBePromoted)
7288 promoteImpl(ToBePromoted);
7289 InstsToBePromoted.clear();
7290 return true;
7291 }
7292};
7293
7294} // end anonymous namespace
7295
7296void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
7297 // At this point, we know that all the operands of ToBePromoted but Def
7298 // can be statically promoted.
7299 // For Def, we need to use its parameter in ToBePromoted:
7300 // b = ToBePromoted ty1 a
7301 // Def = Transition ty1 b to ty2
7302 // Move the transition down.
7303 // 1. Replace all uses of the promoted operation by the transition.
7304 // = ... b => = ... Def.
7305 assert(ToBePromoted->getType() == Transition->getType() &&((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7307, __PRETTY_FUNCTION__))
7306 "The type of the result of the transition does not match "((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7307, __PRETTY_FUNCTION__))
7307 "the final type")((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7307, __PRETTY_FUNCTION__))
;
7308 ToBePromoted->replaceAllUsesWith(Transition);
7309 // 2. Update the type of the uses.
7310 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
7311 Type *TransitionTy = getTransitionType();
7312 ToBePromoted->mutateType(TransitionTy);
7313 // 3. Update all the operands of the promoted operation with promoted
7314 // operands.
7315 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
7316 for (Use &U : ToBePromoted->operands()) {
7317 Value *Val = U.get();
7318 Value *NewVal = nullptr;
7319 if (Val == Transition)
7320 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
7321 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
7322 isa<ConstantFP>(Val)) {
7323 // Use a splat constant if it is not safe to use undef.
7324 NewVal = getConstantVector(
7325 cast<Constant>(Val),
7326 isa<UndefValue>(Val) ||
7327 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
7328 } else
7329 llvm_unreachable("Did you modified shouldPromote and forgot to update "::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7330)
7330 "this?")::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7330)
;
7331 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
7332 }
7333 Transition->moveAfter(ToBePromoted);
7334 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
7335}
7336
7337/// Some targets can do store(extractelement) with one instruction.
7338/// Try to push the extractelement towards the stores when the target
7339/// has this feature and this is profitable.
7340bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
7341 unsigned CombineCost = std::numeric_limits<unsigned>::max();
7342 if (DisableStoreExtract ||
7343 (!StressStoreExtract &&
7344 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
7345 Inst->getOperand(1), CombineCost)))
7346 return false;
7347
7348 // At this point we know that Inst is a vector to scalar transition.
7349 // Try to move it down the def-use chain, until:
7350 // - We can combine the transition with its single use
7351 // => we got rid of the transition.
7352 // - We escape the current basic block
7353 // => we would need to check that we are moving it at a cheaper place and
7354 // we do not do that for now.
7355 BasicBlock *Parent = Inst->getParent();
7356 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Found an interesting transition: "
<< *Inst << '\n'; } } while (false)
;
7357 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
7358 // If the transition has more than one use, assume this is not going to be
7359 // beneficial.
7360 while (Inst->hasOneUse()) {
7361 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
7362 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Use: " << *ToBePromoted
<< '\n'; } } while (false)
;
7363
7364 if (ToBePromoted->getParent() != Parent) {
7365 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
7366 << ToBePromoted->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
7367 << ") than the transition (" << Parent->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
7368 << ").\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
;
7369 return false;
7370 }
7371
7372 if (VPH.canCombine(ToBePromoted)) {
7373 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
7374 << "will be combined with: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
;
7375 VPH.recordCombineInstruction(ToBePromoted);
7376 bool Changed = VPH.promote();
7377 NumStoreExtractExposed += Changed;
7378 return Changed;
7379 }
7380
7381 LLVM_DEBUG(dbgs() << "Try promoting.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Try promoting.\n"; } }
while (false)
;
7382 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
7383 return false;
7384
7385 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Promoting is possible... Enqueue for promotion!\n"
; } } while (false)
;
7386
7387 VPH.enqueueForPromotion(ToBePromoted);
7388 Inst = ToBePromoted;
7389 }
7390 return false;
7391}
7392
7393/// For the instruction sequence of store below, F and I values
7394/// are bundled together as an i64 value before being stored into memory.
7395/// Sometimes it is more efficient to generate separate stores for F and I,
7396/// which can remove the bitwise instructions or sink them to colder places.
7397///
7398/// (store (or (zext (bitcast F to i32) to i64),
7399/// (shl (zext I to i64), 32)), addr) -->
7400/// (store F, addr) and (store I, addr+4)
7401///
7402/// Similarly, splitting for other merged store can also be beneficial, like:
7403/// For pair of {i32, i32}, i64 store --> two i32 stores.
7404/// For pair of {i32, i16}, i64 store --> two i32 stores.
7405/// For pair of {i16, i16}, i32 store --> two i16 stores.
7406/// For pair of {i16, i8}, i32 store --> two i16 stores.
7407/// For pair of {i8, i8}, i16 store --> two i8 stores.
7408///
7409/// We allow each target to determine specifically which kind of splitting is
7410/// supported.
7411///
7412/// The store patterns are commonly seen from the simple code snippet below
7413/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
7414/// void goo(const std::pair<int, float> &);
7415/// hoo() {
7416/// ...
7417/// goo(std::make_pair(tmp, ftmp));
7418/// ...
7419/// }
7420///
7421/// Although we already have similar splitting in DAG Combine, we duplicate
7422/// it in CodeGenPrepare to catch the case in which pattern is across
7423/// multiple BBs. The logic in DAG Combine is kept to catch case generated
7424/// during code expansion.
7425static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
7426 const TargetLowering &TLI) {
7427 // Handle simple but common cases only.
7428 Type *StoreType = SI.getValueOperand()->getType();
7429
7430 // The code below assumes shifting a value by <number of bits>,
7431 // whereas scalable vectors would have to be shifted by
7432 // <2log(vscale) + number of bits> in order to store the
7433 // low/high parts. Bailing out for now.
7434 if (isa<ScalableVectorType>(StoreType))
7435 return false;
7436
7437 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
7438 DL.getTypeSizeInBits(StoreType) == 0)
7439 return false;
7440
7441 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
7442 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
7443 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
7444 return false;
7445
7446 // Don't split the store if it is volatile.
7447 if (SI.isVolatile())
7448 return false;
7449
7450 // Match the following patterns:
7451 // (store (or (zext LValue to i64),
7452 // (shl (zext HValue to i64), 32)), HalfValBitSize)
7453 // or
7454 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
7455 // (zext LValue to i64),
7456 // Expect both operands of OR and the first operand of SHL have only
7457 // one use.
7458 Value *LValue, *HValue;
7459 if (!match(SI.getValueOperand(),
7460 m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
7461 m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
7462 m_SpecificInt(HalfValBitSize))))))
7463 return false;
7464
7465 // Check LValue and HValue are int with size less or equal than 32.
7466 if (!LValue->getType()->isIntegerTy() ||
7467 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
7468 !HValue->getType()->isIntegerTy() ||
7469 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
7470 return false;
7471
7472 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
7473 // as the input of target query.
7474 auto *LBC = dyn_cast<BitCastInst>(LValue);
7475 auto *HBC = dyn_cast<BitCastInst>(HValue);
7476 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
7477 : EVT::getEVT(LValue->getType());
7478 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
7479 : EVT::getEVT(HValue->getType());
7480 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
7481 return false;
7482
7483 // Start to split store.
7484 IRBuilder<> Builder(SI.getContext());
7485 Builder.SetInsertPoint(&SI);
7486
7487 // If LValue/HValue is a bitcast in another BB, create a new one in current
7488 // BB so it may be merged with the splitted stores by dag combiner.
7489 if (LBC && LBC->getParent() != SI.getParent())
7490 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
7491 if (HBC && HBC->getParent() != SI.getParent())
7492 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
7493
7494 bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
7495 auto CreateSplitStore = [&](Value *V, bool Upper) {
7496 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
7497 Value *Addr = Builder.CreateBitCast(
7498 SI.getOperand(1),
7499 SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
7500 Align Alignment = SI.getAlign();
7501 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
7502 if (IsOffsetStore) {
7503 Addr = Builder.CreateGEP(
7504 SplitStoreType, Addr,
7505 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
7506
7507 // When splitting the store in half, naturally one half will retain the
7508 // alignment of the original wider store, regardless of whether it was
7509 // over-aligned or not, while the other will require adjustment.
7510 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
7511 }
7512 Builder.CreateAlignedStore(V, Addr, Alignment);
7513 };
7514
7515 CreateSplitStore(LValue, false);
7516 CreateSplitStore(HValue, true);
7517
7518 // Delete the old store.
7519 SI.eraseFromParent();
7520 return true;
7521}
7522
7523// Return true if the GEP has two operands, the first operand is of a sequential
7524// type, and the second operand is a constant.
7525static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
7526 gep_type_iterator I = gep_type_begin(*GEP);
7527 return GEP->getNumOperands() == 2 &&
7528 I.isSequential() &&
7529 isa<ConstantInt>(GEP->getOperand(1));
7530}
7531
7532// Try unmerging GEPs to reduce liveness interference (register pressure) across
7533// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
7534// reducing liveness interference across those edges benefits global register
7535// allocation. Currently handles only certain cases.
7536//
7537// For example, unmerge %GEPI and %UGEPI as below.
7538//
7539// ---------- BEFORE ----------
7540// SrcBlock:
7541// ...
7542// %GEPIOp = ...
7543// ...
7544// %GEPI = gep %GEPIOp, Idx
7545// ...
7546// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
7547// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
7548// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
7549// %UGEPI)
7550//
7551// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
7552// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
7553// ...
7554//
7555// DstBi:
7556// ...
7557// %UGEPI = gep %GEPIOp, UIdx
7558// ...
7559// ---------------------------
7560//
7561// ---------- AFTER ----------
7562// SrcBlock:
7563// ... (same as above)
7564// (* %GEPI is still alive on the indirectbr edges)
7565// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
7566// unmerging)
7567// ...
7568//
7569// DstBi:
7570// ...
7571// %UGEPI = gep %GEPI, (UIdx-Idx)
7572// ...
7573// ---------------------------
7574//
7575// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
7576// no longer alive on them.
7577//
7578// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
7579// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
7580// not to disable further simplications and optimizations as a result of GEP
7581// merging.
7582//
7583// Note this unmerging may increase the length of the data flow critical path
7584// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
7585// between the register pressure and the length of data-flow critical
7586// path. Restricting this to the uncommon IndirectBr case would minimize the
7587// impact of potentially longer critical path, if any, and the impact on compile
7588// time.
7589static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
7590 const TargetTransformInfo *TTI) {
7591 BasicBlock *SrcBlock = GEPI->getParent();
7592 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
7593 // (non-IndirectBr) cases exit early here.
7594 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
7595 return false;
7596 // Check that GEPI is a simple gep with a single constant index.
7597 if (!GEPSequentialConstIndexed(GEPI))
7598 return false;
7599 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
7600 // Check that GEPI is a cheap one.
7601 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
7602 TargetTransformInfo::TCK_SizeAndLatency)
7603 > TargetTransformInfo::TCC_Basic)
7604 return false;
7605 Value *GEPIOp = GEPI->getOperand(0);
7606 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
7607 if (!isa<Instruction>(GEPIOp))
7608 return false;
7609 auto *GEPIOpI = cast<Instruction>(GEPIOp);
7610 if (GEPIOpI->getParent() != SrcBlock)
7611 return false;
7612 // Check that GEP is used outside the block, meaning it's alive on the
7613 // IndirectBr edge(s).
7614 if (find_if(GEPI->users(), [&](User *Usr) {
7615 if (auto *I = dyn_cast<Instruction>(Usr)) {
7616 if (I->getParent() != SrcBlock) {
7617 return true;
7618 }
7619 }
7620 return false;
7621 }) == GEPI->users().end())
7622 return false;
7623 // The second elements of the GEP chains to be unmerged.
7624 std::vector<GetElementPtrInst *> UGEPIs;
7625 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
7626 // on IndirectBr edges.
7627 for (User *Usr : GEPIOp->users()) {
7628 if (Usr == GEPI) continue;
7629 // Check if Usr is an Instruction. If not, give up.
7630 if (!isa<Instruction>(Usr))
7631 return false;
7632 auto *UI = cast<Instruction>(Usr);
7633 // Check if Usr in the same block as GEPIOp, which is fine, skip.
7634 if (UI->getParent() == SrcBlock)
7635 continue;
7636 // Check if Usr is a GEP. If not, give up.
7637 if (!isa<GetElementPtrInst>(Usr))
7638 return false;
7639 auto *UGEPI = cast<GetElementPtrInst>(Usr);
7640 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
7641 // the pointer operand to it. If so, record it in the vector. If not, give
7642 // up.
7643 if (!GEPSequentialConstIndexed(UGEPI))
7644 return false;
7645 if (UGEPI->getOperand(0) != GEPIOp)
7646 return false;
7647 if (GEPIIdx->getType() !=
7648 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
7649 return false;
7650 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
7651 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
7652 TargetTransformInfo::TCK_SizeAndLatency)
7653 > TargetTransformInfo::TCC_Basic)
7654 return false;
7655 UGEPIs.push_back(UGEPI);
7656 }
7657 if (UGEPIs.size() == 0)
7658 return false;
7659 // Check the materializing cost of (Uidx-Idx).
7660 for (GetElementPtrInst *UGEPI : UGEPIs) {
7661 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
7662 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
7663 unsigned ImmCost =
7664 TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
7665 TargetTransformInfo::TCK_SizeAndLatency);
7666 if (ImmCost > TargetTransformInfo::TCC_Basic)
7667 return false;
7668 }
7669 // Now unmerge between GEPI and UGEPIs.
7670 for (GetElementPtrInst *UGEPI : UGEPIs) {
7671 UGEPI->setOperand(0, GEPI);
7672 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
7673 Constant *NewUGEPIIdx =
7674 ConstantInt::get(GEPIIdx->getType(),
7675 UGEPIIdx->getValue() - GEPIIdx->getValue());
7676 UGEPI->setOperand(1, NewUGEPIIdx);
7677 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
7678 // inbounds to avoid UB.
7679 if (!GEPI->isInBounds()) {
7680 UGEPI->setIsInBounds(false);
7681 }
7682 }
7683 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
7684 // alive on IndirectBr edges).
7685 assert(find_if(GEPIOp->users(), [&](User *Usr) {((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7687, __PRETTY_FUNCTION__))
7686 return cast<Instruction>(Usr)->getParent() != SrcBlock;((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7687, __PRETTY_FUNCTION__))
7687 }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock")((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7687, __PRETTY_FUNCTION__))
;
7688 return true;
7689}
7690
7691bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
7692 // Bail out if we inserted the instruction to prevent optimizations from
7693 // stepping on each other's toes.
7694 if (InsertedInsts.count(I))
7695 return false;
7696
7697 // TODO: Move into the switch on opcode below here.
7698 if (PHINode *P = dyn_cast<PHINode>(I)) {
7699 // It is possible for very late stage optimizations (such as SimplifyCFG)
7700 // to introduce PHI nodes too late to be cleaned up. If we detect such a
7701 // trivial PHI, go ahead and zap it here.
7702 if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
7703 LargeOffsetGEPMap.erase(P);
7704 P->replaceAllUsesWith(V);
7705 P->eraseFromParent();
7706 ++NumPHIsElim;
7707 return true;
7708 }
7709 return false;
7710 }
7711
7712 if (CastInst *CI = dyn_cast<CastInst>(I)) {
7713 // If the source of the cast is a constant, then this should have
7714 // already been constant folded. The only reason NOT to constant fold
7715 // it is if something (e.g. LSR) was careful to place the constant
7716 // evaluation in a block other than then one that uses it (e.g. to hoist
7717 // the address of globals out of a loop). If this is the case, we don't
7718 // want to forward-subst the cast.
7719 if (isa<Constant>(CI->getOperand(0)))
7720 return false;
7721
7722 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
7723 return true;
7724
7725 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
7726 /// Sink a zext or sext into its user blocks if the target type doesn't
7727 /// fit in one register
7728 if (TLI->getTypeAction(CI->getContext(),
7729 TLI->getValueType(*DL, CI->getType())) ==
7730 TargetLowering::TypeExpandInteger) {
7731 return SinkCast(CI);
7732 } else {
7733 bool MadeChange = optimizeExt(I);
7734 return MadeChange | optimizeExtUses(I);
7735 }
7736 }
7737 return false;
7738 }
7739
7740 if (auto *Cmp = dyn_cast<CmpInst>(I))
7741 if (optimizeCmp(Cmp, ModifiedDT))
7742 return true;
7743
7744 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
7745 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
7746 bool Modified = optimizeLoadExt(LI);
7747 unsigned AS = LI->getPointerAddressSpace();
7748 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
7749 return Modified;
7750 }
7751
7752 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
7753 if (splitMergedValStore(*SI, *DL, *TLI))
7754 return true;
7755 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
7756 unsigned AS = SI->getPointerAddressSpace();
7757 return optimizeMemoryInst(I, SI->getOperand(1),
7758 SI->getOperand(0)->getType(), AS);
7759 }
7760
7761 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
7762 unsigned AS = RMW->getPointerAddressSpace();
7763 return optimizeMemoryInst(I, RMW->getPointerOperand(),
7764 RMW->getType(), AS);
7765 }
7766
7767 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
7768 unsigned AS = CmpX->getPointerAddressSpace();
7769 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
7770 CmpX->getCompareOperand()->getType(), AS);
7771 }
7772
7773 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
7774
7775 if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
7776 return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
7777
7778 // TODO: Move this into the switch on opcode - it handles shifts already.
7779 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
7780 BinOp->getOpcode() == Instruction::LShr)) {
7781 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
7782 if (CI && TLI->hasExtractBitsInsn())
7783 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
7784 return true;
7785 }
7786
7787 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
7788 if (GEPI->hasAllZeroIndices()) {
7789 /// The GEP operand must be a pointer, so must its result -> BitCast
7790 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
7791 GEPI->getName(), GEPI);
7792 NC->setDebugLoc(GEPI->getDebugLoc());
7793 GEPI->replaceAllUsesWith(NC);
7794 GEPI->eraseFromParent();
7795 ++NumGEPsElim;
7796 optimizeInst(NC, ModifiedDT);
7797 return true;
7798 }
7799 if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
7800 return true;
7801 }
7802 return false;
7803 }
7804
7805 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
7806 // freeze(icmp a, const)) -> icmp (freeze a), const
7807 // This helps generate efficient conditional jumps.
7808 Instruction *CmpI = nullptr;
7809 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
7810 CmpI = II;
7811 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
7812 CmpI = F->getFastMathFlags().none() ? F : nullptr;
7813
7814 if (CmpI && CmpI->hasOneUse()) {
7815 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
7816 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
7817 isa<ConstantPointerNull>(Op0);
7818 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
7819 isa<ConstantPointerNull>(Op1);
7820 if (Const0 || Const1) {
7821 if (!Const0 || !Const1) {
7822 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI);
7823 F->takeName(FI);
7824 CmpI->setOperand(Const0 ? 1 : 0, F);
7825 }
7826 FI->replaceAllUsesWith(CmpI);
7827 FI->eraseFromParent();
7828 return true;
7829 }
7830 }
7831 return false;
7832 }
7833
7834 if (tryToSinkFreeOperands(I))
7835 return true;
7836
7837 switch (I->getOpcode()) {
7838 case Instruction::Shl:
7839 case Instruction::LShr:
7840 case Instruction::AShr:
7841 return optimizeShiftInst(cast<BinaryOperator>(I));
7842 case Instruction::Call:
7843 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
7844 case Instruction::Select:
7845 return optimizeSelectInst(cast<SelectInst>(I));
7846 case Instruction::ShuffleVector:
7847 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
7848 case Instruction::Switch:
7849 return optimizeSwitchInst(cast<SwitchInst>(I));
7850 case Instruction::ExtractElement:
7851 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
7852 }
7853
7854 return false;
7855}
7856
7857/// Given an OR instruction, check to see if this is a bitreverse
7858/// idiom. If so, insert the new intrinsic and return true.
7859bool CodeGenPrepare::makeBitReverse(Instruction &I) {
7860 if (!I.getType()->isIntegerTy() ||
7861 !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
7862 TLI->getValueType(*DL, I.getType(), true)))
7863 return false;
7864
7865 SmallVector<Instruction*, 4> Insts;
7866 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
7867 return false;
7868 Instruction *LastInst = Insts.back();
7869 I.replaceAllUsesWith(LastInst);
7870 RecursivelyDeleteTriviallyDeadInstructions(
7871 &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
7872 return true;
7873}
7874
7875// In this pass we look for GEP and cast instructions that are used
7876// across basic blocks and rewrite them to improve basic-block-at-a-time
7877// selection.
7878bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
7879 SunkAddrs.clear();
7880 bool MadeChange = false;
7881
7882 CurInstIterator = BB.begin();
7883 while (CurInstIterator != BB.end()) {
7884 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
7885 if (ModifiedDT)
7886 return true;
7887 }
7888
7889 bool MadeBitReverse = true;
7890 while (MadeBitReverse) {
7891 MadeBitReverse = false;
7892 for (auto &I : reverse(BB)) {
7893 if (makeBitReverse(I)) {
7894 MadeBitReverse = MadeChange = true;
7895 break;
7896 }
7897 }
7898 }
7899 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
7900
7901 return MadeChange;
7902}
7903
7904// Some CGP optimizations may move or alter what's computed in a block. Check
7905// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
7906bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
7907 assert(isa<DbgValueInst>(I))((isa<DbgValueInst>(I)) ? static_cast<void> (0) :
__assert_fail ("isa<DbgValueInst>(I)", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/CodeGenPrepare.cpp"
, 7907, __PRETTY_FUNCTION__))
;
7908 DbgValueInst &DVI = *cast<DbgValueInst>(I);
7909
7910 // Does this dbg.value refer to a sunk address calculation?
7911 bool AnyChange = false;
7912 for (Value *Location : DVI.getValues()) {
7913 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
7914 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
7915 if (SunkAddr) {
7916 // Point dbg.value at locally computed address, which should give the best
7917 // opportunity to be accurately lowered. This update may change the type
7918 // of pointer being referred to; however this makes no difference to
7919 // debugging information, and we can't generate bitcasts that may affect
7920 // codegen.
7921 DVI.replaceVariableLocationOp(Location, SunkAddr);
7922 AnyChange = true;
7923 }
7924 }
7925 return AnyChange;
7926}
7927
7928// A llvm.dbg.value may be using a value before its definition, due to
7929// optimizations in this pass and others. Scan for such dbg.values, and rescue
7930// them by moving the dbg.value to immediately after the value definition.
7931// FIXME: Ideally this should never be necessary, and this has the potential
7932// to re-order dbg.value intrinsics.
7933bool CodeGenPrepare::placeDbgValues(Function &F) {
7934 bool MadeChange = false;
7935 DominatorTree DT(F);
7936
7937 for (BasicBlock &BB : F) {
7938 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
7939 Instruction *Insn = &*BI++;
7940 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
7941 if (!DVI)
7942 continue;
7943
7944 SmallVector<Instruction *, 4> VIs;
7945 for (Value *V : DVI->getValues())
7946 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
7947 VIs.push_back(VI);
7948
7949 // This DVI may depend on multiple instructions, complicating any
7950 // potential sink. This block takes the defensive approach, opting to
7951 // "undef" the DVI if it has more than one instruction and any of them do
7952 // not dominate DVI.
7953 for (Instruction *VI : VIs) {
7954 if (VI->isTerminator())
7955 continue;
7956
7957 // If VI is a phi in a block with an EHPad terminator, we can't insert
7958 // after it.
7959 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
7960 continue;
7961
7962 // If the defining instruction dominates the dbg.value, we do not need
7963 // to move the dbg.value.
7964 if (DT.dominates(VI, DVI))
7965 continue;
7966
7967 // If we depend on multiple instructions and any of them doesn't
7968 // dominate this DVI, we probably can't salvage it: moving it to
7969 // after any of the instructions could cause us to lose the others.
7970 if (VIs.size() > 1) {
7971 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Unable to find valid location for Debug Value, undefing:\n"
<< *DVI; } } while (false)
7972 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Unable to find valid location for Debug Value, undefing:\n"
<< *DVI; } } while (false)
7973 << "Unable to find valid location for Debug Value, undefing:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Unable to find valid location for Debug Value, undefing:\n"
<< *DVI; } } while (false)
7974 << *DVI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Unable to find valid location for Debug Value, undefing:\n"
<< *DVI; } } while (false)
;
7975 DVI->setUndef();
7976 break;
7977 }
7978
7979 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false)
7980 << *DVI << ' ' << *VI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false)
;
7981 DVI->removeFromParent();
7982 if (isa<PHINode>(VI))
7983 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
7984 else
7985 DVI->insertAfter(VI);
7986 MadeChange = true;
7987 ++NumDbgValueMoved;
7988 }
7989 }
7990 }
7991 return MadeChange;
7992}
7993
7994// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
7995// probes can be chained dependencies of other regular DAG nodes and block DAG
7996// combine optimizations.
7997bool CodeGenPrepare::placePseudoProbes(Function &F) {
7998 bool MadeChange = false;
7999 for (auto &Block : F) {
8000 // Move the rest probes to the beginning of the block.
8001 auto FirstInst = Block.getFirstInsertionPt();
8002 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8003 ++FirstInst;
8004 BasicBlock::iterator I(FirstInst);
8005 I++;
8006 while (I != Block.end()) {
8007 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
8008 II->moveBefore(&*FirstInst);
8009 MadeChange = true;
8010 }
8011 }
8012 }
8013 return MadeChange;
8014}
8015
8016/// Scale down both weights to fit into uint32_t.
8017static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
8018 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
8019 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
8020 NewTrue = NewTrue / Scale;
8021 NewFalse = NewFalse / Scale;
8022}
8023
8024/// Some targets prefer to split a conditional branch like:
8025/// \code
8026/// %0 = icmp ne i32 %a, 0
8027/// %1 = icmp ne i32 %b, 0
8028/// %or.cond = or i1 %0, %1
8029/// br i1 %or.cond, label %TrueBB, label %FalseBB
8030/// \endcode
8031/// into multiple branch instructions like:
8032/// \code
8033/// bb1:
8034/// %0 = icmp ne i32 %a, 0
8035/// br i1 %0, label %TrueBB, label %bb2
8036/// bb2:
8037/// %1 = icmp ne i32 %b, 0
8038/// br i1 %1, label %TrueBB, label %FalseBB
8039/// \endcode
8040/// This usually allows instruction selection to do even further optimizations
8041/// and combine the compare with the branch instruction. Currently this is
8042/// applied for targets which have "cheap" jump instructions.
8043///
8044/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
8045///
8046bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
8047 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
8048 return false;
8049
8050 bool MadeChange = false;
8051 for (auto &BB : F) {
8052 // Does this BB end with the following?
8053 // %cond1 = icmp|fcmp|binary instruction ...
8054 // %cond2 = icmp|fcmp|binary instruction ...
8055 // %cond.or = or|and i1 %cond1, cond2
8056 // br i1 %cond.or label %dest1, label %dest2"
8057 Instruction *LogicOp;
8058 BasicBlock *TBB, *FBB;
8059 if (!match(BB.getTerminator(),
8060 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
8061 continue;
8062
8063 auto *Br1 = cast<BranchInst>(BB.getTerminator());
8064 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
8065 continue;
8066
8067 // The merging of mostly empty BB can cause a degenerate branch.
8068 if (TBB == FBB)
8069 continue;
8070
8071 unsigned Opc;
8072 Value *Cond1, *Cond2;
8073 if (match(LogicOp,
8074 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
8075 Opc = Instruction::And;
8076 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
8077 m_OneUse(m_Value(Cond2)))))
8078 Opc = Instruction::Or;
8079 else
8080 continue;
8081
8082 auto IsGoodCond = [](Value *Cond) {
8083 return match(
8084 Cond,
8085 m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
8086 m_LogicalOr(m_Value(), m_Value()))));
8087 };
8088 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
8089 continue;
8090
8091 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Before branch condition splitting\n"
; BB.dump(); } } while (false)
;
8092
8093 // Create a new BB.
8094 auto *TmpBB =
8095 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
8096 BB.getParent(), BB.getNextNode());
8097
8098 // Update original basic block by using the first condition directly by the
8099 // branch instruction and removing the no longer needed and/or instruction.
8100 Br1->setCondition(Cond1);
8101 LogicOp->eraseFromParent();
8102
8103 // Depending on the condition we have to either replace the true or the
8104 // false successor of the original branch instruction.
8105 if (Opc == Instruction::And)
8106 Br1->setSuccessor(0, TmpBB);
8107 else
8108 Br1->setSuccessor(1, TmpBB);
8109
8110 // Fill in the new basic block.
8111 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
8112 if (auto *I = dyn_cast<Instruction>(Cond2)) {
8113 I->removeFromParent();
8114 I->insertBefore(Br2);
8115 }
8116
8117 // Update PHI nodes in both successors. The original BB needs to be
8118 // replaced in one successor's PHI nodes, because the branch comes now from
8119 // the newly generated BB (NewBB). In the other successor we need to add one
8120 // incoming edge to the PHI nodes, because both branch instructions target
8121 // now the same successor. Depending on the original branch condition
8122 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
8123 // we perform the correct update for the PHI nodes.
8124 // This doesn't change the successor order of the just created branch
8125 // instruction (or any other instruction).
8126 if (Opc == Instruction::Or)
8127 std::swap(TBB, FBB);
8128
8129 // Replace the old BB with the new BB.
8130 TBB->replacePhiUsesWith(&BB, TmpBB);
8131
8132 // Add another incoming edge form the new BB.
8133 for (PHINode &PN : FBB->phis()) {
8134 auto *Val = PN.getIncomingValueForBlock(&BB);
8135 PN.addIncoming(Val, TmpBB);
8136 }
8137
8138 // Update the branch weights (from SelectionDAGBuilder::
8139 // FindMergedConditions).
8140 if (Opc == Instruction::Or) {
8141 // Codegen X | Y as:
8142 // BB1:
8143 // jmp_if_X TBB
8144 // jmp TmpBB
8145 // TmpBB:
8146 // jmp_if_Y TBB
8147 // jmp FBB
8148 //
8149
8150 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
8151 // The requirement is that
8152 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
8153 // = TrueProb for original BB.
8154 // Assuming the original weights are A and B, one choice is to set BB1's
8155 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
8156 // assumes that
8157 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
8158 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
8159 // TmpBB, but the math is more complicated.
8160 uint64_t TrueWeight, FalseWeight;
8161 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
8162 uint64_t NewTrueWeight = TrueWeight;
8163 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
8164 scaleWeights(NewTrueWeight, NewFalseWeight);
8165 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
8166 .createBranchWeights(TrueWeight, FalseWeight));
8167
8168 NewTrueWeight = TrueWeight;
8169 NewFalseWeight = 2 * FalseWeight;
8170 scaleWeights(NewTrueWeight, NewFalseWeight);
8171 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
8172 .createBranchWeights(TrueWeight, FalseWeight));
8173 }
8174 } else {
8175 // Codegen X & Y as:
8176 // BB1:
8177 // jmp_if_X TmpBB
8178 // jmp FBB
8179 // TmpBB:
8180 // jmp_if_Y TBB
8181 // jmp FBB
8182 //
8183 // This requires creation of TmpBB after CurBB.
8184
8185 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
8186 // The requirement is that
8187 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
8188 // = FalseProb for original BB.
8189 // Assuming the original weights are A and B, one choice is to set BB1's
8190 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
8191 // assumes that
8192 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
8193 uint64_t TrueWeight, FalseWeight;
8194 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
8195 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
8196 uint64_t NewFalseWeight = FalseWeight;
8197 scaleWeights(NewTrueWeight, NewFalseWeight);
8198 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
8199 .createBranchWeights(TrueWeight, FalseWeight));
8200
8201 NewTrueWeight = 2 * TrueWeight;
8202 NewFalseWeight = FalseWeight;
8203 scaleWeights(NewTrueWeight, NewFalseWeight);
8204 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
8205 .createBranchWeights(TrueWeight, FalseWeight));
8206 }
8207 }
8208
8209 ModifiedDT = true;
8210 MadeChange = true;
8211
8212 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
8213 TmpBB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
;
8214 }
8215 return MadeChange;
8216}