Bug Summary

File:lib/Transforms/IPO/GlobalOpt.cpp
Warning:line 2534, column 22
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name GlobalOpt.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/lib/Transforms/IPO -I /build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO -I /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn361465/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/lib/Transforms/IPO -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn361465=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-05-24-031927-21217-1 -x c++ /build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp -faddrsig
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass transforms simple global variables that never have their address
10// taken. If obviously true, it marks read/write globals as constant, deletes
11// variables only stored to, etc.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/GlobalOpt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/ConstantFolding.h"
25#include "llvm/Analysis/MemoryBuiltins.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/Transforms/Utils/Local.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/CallingConv.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfoMetadata.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Dominators.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/GetElementPtrTypeIterator.h"
42#include "llvm/IR/GlobalAlias.h"
43#include "llvm/IR/GlobalValue.h"
44#include "llvm/IR/GlobalVariable.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/IntrinsicInst.h"
49#include "llvm/IR/Module.h"
50#include "llvm/IR/Operator.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Use.h"
53#include "llvm/IR/User.h"
54#include "llvm/IR/Value.h"
55#include "llvm/IR/ValueHandle.h"
56#include "llvm/Pass.h"
57#include "llvm/Support/AtomicOrdering.h"
58#include "llvm/Support/Casting.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/IPO.h"
65#include "llvm/Transforms/Utils/CtorUtils.h"
66#include "llvm/Transforms/Utils/Evaluator.h"
67#include "llvm/Transforms/Utils/GlobalStatus.h"
68#include <cassert>
69#include <cstdint>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75#define DEBUG_TYPE"globalopt" "globalopt"
76
77STATISTIC(NumMarked , "Number of globals marked constant")static llvm::Statistic NumMarked = {"globalopt", "NumMarked",
"Number of globals marked constant", {0}, {false}}
;
78STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr")static llvm::Statistic NumUnnamed = {"globalopt", "NumUnnamed"
, "Number of globals marked unnamed_addr", {0}, {false}}
;
79STATISTIC(NumSRA , "Number of aggregate globals broken into scalars")static llvm::Statistic NumSRA = {"globalopt", "NumSRA", "Number of aggregate globals broken into scalars"
, {0}, {false}}
;
80STATISTIC(NumHeapSRA , "Number of heap objects SRA'd")static llvm::Statistic NumHeapSRA = {"globalopt", "NumHeapSRA"
, "Number of heap objects SRA'd", {0}, {false}}
;
81STATISTIC(NumSubstitute,"Number of globals with initializers stored into them")static llvm::Statistic NumSubstitute = {"globalopt", "NumSubstitute"
, "Number of globals with initializers stored into them", {0}
, {false}}
;
82STATISTIC(NumDeleted , "Number of globals deleted")static llvm::Statistic NumDeleted = {"globalopt", "NumDeleted"
, "Number of globals deleted", {0}, {false}}
;
83STATISTIC(NumGlobUses , "Number of global uses devirtualized")static llvm::Statistic NumGlobUses = {"globalopt", "NumGlobUses"
, "Number of global uses devirtualized", {0}, {false}}
;
84STATISTIC(NumLocalized , "Number of globals localized")static llvm::Statistic NumLocalized = {"globalopt", "NumLocalized"
, "Number of globals localized", {0}, {false}}
;
85STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans")static llvm::Statistic NumShrunkToBool = {"globalopt", "NumShrunkToBool"
, "Number of global vars shrunk to booleans", {0}, {false}}
;
86STATISTIC(NumFastCallFns , "Number of functions converted to fastcc")static llvm::Statistic NumFastCallFns = {"globalopt", "NumFastCallFns"
, "Number of functions converted to fastcc", {0}, {false}}
;
87STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated")static llvm::Statistic NumCtorsEvaluated = {"globalopt", "NumCtorsEvaluated"
, "Number of static ctors evaluated", {0}, {false}}
;
88STATISTIC(NumNestRemoved , "Number of nest attributes removed")static llvm::Statistic NumNestRemoved = {"globalopt", "NumNestRemoved"
, "Number of nest attributes removed", {0}, {false}}
;
89STATISTIC(NumAliasesResolved, "Number of global aliases resolved")static llvm::Statistic NumAliasesResolved = {"globalopt", "NumAliasesResolved"
, "Number of global aliases resolved", {0}, {false}}
;
90STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated")static llvm::Statistic NumAliasesRemoved = {"globalopt", "NumAliasesRemoved"
, "Number of global aliases eliminated", {0}, {false}}
;
91STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed")static llvm::Statistic NumCXXDtorsRemoved = {"globalopt", "NumCXXDtorsRemoved"
, "Number of global C++ destructors removed", {0}, {false}}
;
92STATISTIC(NumInternalFunc, "Number of internal functions")static llvm::Statistic NumInternalFunc = {"globalopt", "NumInternalFunc"
, "Number of internal functions", {0}, {false}}
;
93STATISTIC(NumColdCC, "Number of functions marked coldcc")static llvm::Statistic NumColdCC = {"globalopt", "NumColdCC",
"Number of functions marked coldcc", {0}, {false}}
;
94
95static cl::opt<bool>
96 EnableColdCCStressTest("enable-coldcc-stress-test",
97 cl::desc("Enable stress test of coldcc by adding "
98 "calling conv to all internal functions."),
99 cl::init(false), cl::Hidden);
100
101static cl::opt<int> ColdCCRelFreq(
102 "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
103 cl::desc(
104 "Maximum block frequency, expressed as a percentage of caller's "
105 "entry frequency, for a call site to be considered cold for enabling"
106 "coldcc"));
107
108/// Is this global variable possibly used by a leak checker as a root? If so,
109/// we might not really want to eliminate the stores to it.
110static bool isLeakCheckerRoot(GlobalVariable *GV) {
111 // A global variable is a root if it is a pointer, or could plausibly contain
112 // a pointer. There are two challenges; one is that we could have a struct
113 // the has an inner member which is a pointer. We recurse through the type to
114 // detect these (up to a point). The other is that we may actually be a union
115 // of a pointer and another type, and so our LLVM type is an integer which
116 // gets converted into a pointer, or our type is an [i8 x #] with a pointer
117 // potentially contained here.
118
119 if (GV->hasPrivateLinkage())
120 return false;
121
122 SmallVector<Type *, 4> Types;
123 Types.push_back(GV->getValueType());
124
125 unsigned Limit = 20;
126 do {
127 Type *Ty = Types.pop_back_val();
128 switch (Ty->getTypeID()) {
129 default: break;
130 case Type::PointerTyID: return true;
131 case Type::ArrayTyID:
132 case Type::VectorTyID: {
133 SequentialType *STy = cast<SequentialType>(Ty);
134 Types.push_back(STy->getElementType());
135 break;
136 }
137 case Type::StructTyID: {
138 StructType *STy = cast<StructType>(Ty);
139 if (STy->isOpaque()) return true;
140 for (StructType::element_iterator I = STy->element_begin(),
141 E = STy->element_end(); I != E; ++I) {
142 Type *InnerTy = *I;
143 if (isa<PointerType>(InnerTy)) return true;
144 if (isa<CompositeType>(InnerTy))
145 Types.push_back(InnerTy);
146 }
147 break;
148 }
149 }
150 if (--Limit == 0) return true;
151 } while (!Types.empty());
152 return false;
153}
154
155/// Given a value that is stored to a global but never read, determine whether
156/// it's safe to remove the store and the chain of computation that feeds the
157/// store.
158static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
159 do {
160 if (isa<Constant>(V))
161 return true;
162 if (!V->hasOneUse())
163 return false;
164 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
165 isa<GlobalValue>(V))
166 return false;
167 if (isAllocationFn(V, TLI))
168 return true;
169
170 Instruction *I = cast<Instruction>(V);
171 if (I->mayHaveSideEffects())
172 return false;
173 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
174 if (!GEP->hasAllConstantIndices())
175 return false;
176 } else if (I->getNumOperands() != 1) {
177 return false;
178 }
179
180 V = I->getOperand(0);
181 } while (true);
182}
183
184/// This GV is a pointer root. Loop over all users of the global and clean up
185/// any that obviously don't assign the global a value that isn't dynamically
186/// allocated.
187static bool CleanupPointerRootUsers(Value *V,
188 const TargetLibraryInfo *TLI) {
189 // A brief explanation of leak checkers. The goal is to find bugs where
190 // pointers are forgotten, causing an accumulating growth in memory
191 // usage over time. The common strategy for leak checkers is to whitelist the
192 // memory pointed to by globals at exit. This is popular because it also
193 // solves another problem where the main thread of a C++ program may shut down
194 // before other threads that are still expecting to use those globals. To
195 // handle that case, we expect the program may create a singleton and never
196 // destroy it.
197
198 bool Changed = false;
199
200 // If Dead[n].first is the only use of a malloc result, we can delete its
201 // chain of computation and the store to the global in Dead[n].second.
202 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
203
204 // Constants can't be pointers to dynamically allocated memory.
205 for (Value::user_iterator UI = V->user_begin(), E = V->user_end();
206 UI != E;) {
207 User *U = *UI++;
208 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
209 Value *V = SI->getValueOperand();
210 if (isa<Constant>(V)) {
211 Changed = true;
212 SI->eraseFromParent();
213 } else if (Instruction *I = dyn_cast<Instruction>(V)) {
214 if (I->hasOneUse())
215 Dead.push_back(std::make_pair(I, SI));
216 }
217 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
218 if (isa<Constant>(MSI->getValue())) {
219 Changed = true;
220 MSI->eraseFromParent();
221 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
222 if (I->hasOneUse())
223 Dead.push_back(std::make_pair(I, MSI));
224 }
225 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
226 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
227 if (MemSrc && MemSrc->isConstant()) {
228 Changed = true;
229 MTI->eraseFromParent();
230 } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
231 if (I->hasOneUse())
232 Dead.push_back(std::make_pair(I, MTI));
233 }
234 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
235 if (CE->getOpcode() == Instruction::GetElementPtr) {
236 Changed |= CleanupPointerRootUsers(CE, TLI);
237 }
238 if (CE->use_empty()) {
239 CE->destroyConstant();
240 Changed = true;
241 }
242 } else if (Constant *C = dyn_cast<Constant>(U)) {
243 if (isSafeToDestroyConstant(C)) {
244 C->destroyConstant();
245 // This could have invalidated UI, start over from scratch.
246 Dead.clear();
247 CleanupPointerRootUsers(V, TLI);
248 return true;
249 }
250 }
251 }
252
253 for (int i = 0, e = Dead.size(); i != e; ++i) {
254 if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
255 Dead[i].second->eraseFromParent();
256 Instruction *I = Dead[i].first;
257 do {
258 if (isAllocationFn(I, TLI))
259 break;
260 Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
261 if (!J)
262 break;
263 I->eraseFromParent();
264 I = J;
265 } while (true);
266 I->eraseFromParent();
267 }
268 }
269
270 return Changed;
271}
272
273/// We just marked GV constant. Loop over all users of the global, cleaning up
274/// the obvious ones. This is largely just a quick scan over the use list to
275/// clean up the easy and obvious cruft. This returns true if it made a change.
276static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
277 const DataLayout &DL,
278 TargetLibraryInfo *TLI) {
279 bool Changed = false;
280 // Note that we need to use a weak value handle for the worklist items. When
281 // we delete a constant array, we may also be holding pointer to one of its
282 // elements (or an element of one of its elements if we're dealing with an
283 // array of arrays) in the worklist.
284 SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
285 while (!WorkList.empty()) {
286 Value *UV = WorkList.pop_back_val();
287 if (!UV)
288 continue;
289
290 User *U = cast<User>(UV);
291
292 if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
293 if (Init) {
294 // Replace the load with the initializer.
295 LI->replaceAllUsesWith(Init);
296 LI->eraseFromParent();
297 Changed = true;
298 }
299 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
300 // Store must be unreachable or storing Init into the global.
301 SI->eraseFromParent();
302 Changed = true;
303 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
304 if (CE->getOpcode() == Instruction::GetElementPtr) {
305 Constant *SubInit = nullptr;
306 if (Init)
307 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
308 Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
309 } else if ((CE->getOpcode() == Instruction::BitCast &&
310 CE->getType()->isPointerTy()) ||
311 CE->getOpcode() == Instruction::AddrSpaceCast) {
312 // Pointer cast, delete any stores and memsets to the global.
313 Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
314 }
315
316 if (CE->use_empty()) {
317 CE->destroyConstant();
318 Changed = true;
319 }
320 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
321 // Do not transform "gepinst (gep constexpr (GV))" here, because forming
322 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
323 // and will invalidate our notion of what Init is.
324 Constant *SubInit = nullptr;
325 if (!isa<ConstantExpr>(GEP->getOperand(0))) {
326 ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
327 ConstantFoldInstruction(GEP, DL, TLI));
328 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
329 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
330
331 // If the initializer is an all-null value and we have an inbounds GEP,
332 // we already know what the result of any load from that GEP is.
333 // TODO: Handle splats.
334 if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
335 SubInit = Constant::getNullValue(GEP->getResultElementType());
336 }
337 Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
338
339 if (GEP->use_empty()) {
340 GEP->eraseFromParent();
341 Changed = true;
342 }
343 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
344 if (MI->getRawDest() == V) {
345 MI->eraseFromParent();
346 Changed = true;
347 }
348
349 } else if (Constant *C = dyn_cast<Constant>(U)) {
350 // If we have a chain of dead constantexprs or other things dangling from
351 // us, and if they are all dead, nuke them without remorse.
352 if (isSafeToDestroyConstant(C)) {
353 C->destroyConstant();
354 CleanupConstantGlobalUsers(V, Init, DL, TLI);
355 return true;
356 }
357 }
358 }
359 return Changed;
360}
361
362static bool isSafeSROAElementUse(Value *V);
363
364/// Return true if the specified GEP is a safe user of a derived
365/// expression from a global that we want to SROA.
366static bool isSafeSROAGEP(User *U) {
367 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
368 // don't like < 3 operand CE's, and we don't like non-constant integer
369 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
370 // value of C.
371 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
372 !cast<Constant>(U->getOperand(1))->isNullValue())
373 return false;
374
375 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
376 ++GEPI; // Skip over the pointer index.
377
378 // For all other level we require that the indices are constant and inrange.
379 // In particular, consider: A[0][i]. We cannot know that the user isn't doing
380 // invalid things like allowing i to index an out-of-range subscript that
381 // accesses A[1]. This can also happen between different members of a struct
382 // in llvm IR.
383 for (; GEPI != E; ++GEPI) {
384 if (GEPI.isStruct())
385 continue;
386
387 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
388 if (!IdxVal || (GEPI.isBoundedSequential() &&
389 IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
390 return false;
391 }
392
393 return llvm::all_of(U->users(),
394 [](User *UU) { return isSafeSROAElementUse(UU); });
395}
396
397/// Return true if the specified GEP is a safe user of a derived
398/// expression from a global that we want to SROA.
399static bool isSafeSubSROAGEP(User *U) {
400
401 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
402 // don't like < 3 operand CE's, and we don't like non-constant integer
403 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
404 // value of C.
405 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
406 !cast<Constant>(U->getOperand(1))->isNullValue())
407 return false;
408
409 return llvm::all_of(U->users(),
410 [](User *UU) { return isSafeSROAElementUse(UU); });
411}
412
413/// Return true if the specified instruction is a safe user of a derived
414/// expression from a global that we want to SROA.
415static bool isSafeSROAElementUse(Value *V) {
416 // We might have a dead and dangling constant hanging off of here.
417 if (Constant *C = dyn_cast<Constant>(V))
418 return isSafeToDestroyConstant(C);
419
420 Instruction *I = dyn_cast<Instruction>(V);
421 if (!I) return false;
422
423 // Loads are ok.
424 if (isa<LoadInst>(I)) return true;
425
426 // Stores *to* the pointer are ok.
427 if (StoreInst *SI = dyn_cast<StoreInst>(I))
428 return SI->getOperand(0) != V;
429
430 // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
431 return isa<GetElementPtrInst>(I) && isSafeSubSROAGEP(I);
432}
433
434/// Look at all uses of the global and decide whether it is safe for us to
435/// perform this transformation.
436static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
437 for (User *U : GV->users()) {
438 // The user of the global must be a GEP Inst or a ConstantExpr GEP.
439 if (!isa<GetElementPtrInst>(U) &&
440 (!isa<ConstantExpr>(U) ||
441 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
442 return false;
443
444 // Check the gep and it's users are safe to SRA
445 if (!isSafeSROAGEP(U))
446 return false;
447 }
448
449 return true;
450}
451
452/// Copy over the debug info for a variable to its SRA replacements.
453static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
454 uint64_t FragmentOffsetInBits,
455 uint64_t FragmentSizeInBits,
456 unsigned NumElements) {
457 SmallVector<DIGlobalVariableExpression *, 1> GVs;
458 GV->getDebugInfo(GVs);
459 for (auto *GVE : GVs) {
460 DIVariable *Var = GVE->getVariable();
461 DIExpression *Expr = GVE->getExpression();
462 if (NumElements > 1) {
463 if (auto E = DIExpression::createFragmentExpression(
464 Expr, FragmentOffsetInBits, FragmentSizeInBits))
465 Expr = *E;
466 else
467 return;
468 }
469 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
470 NGV->addDebugInfo(NGVE);
471 }
472}
473
474/// Perform scalar replacement of aggregates on the specified global variable.
475/// This opens the door for other optimizations by exposing the behavior of the
476/// program in a more fine-grained way. We have determined that this
477/// transformation is safe already. We return the first global variable we
478/// insert so that the caller can reprocess it.
479static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
480 // Make sure this global only has simple uses that we can SRA.
481 if (!GlobalUsersSafeToSRA(GV))
482 return nullptr;
483
484 assert(GV->hasLocalLinkage())((GV->hasLocalLinkage()) ? static_cast<void> (0) : __assert_fail
("GV->hasLocalLinkage()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 484, __PRETTY_FUNCTION__))
;
485 Constant *Init = GV->getInitializer();
486 Type *Ty = Init->getType();
487
488 std::vector<GlobalVariable *> NewGlobals;
489 Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
490
491 // Get the alignment of the global, either explicit or target-specific.
492 unsigned StartAlignment = GV->getAlignment();
493 if (StartAlignment == 0)
494 StartAlignment = DL.getABITypeAlignment(GV->getType());
495
496 if (StructType *STy = dyn_cast<StructType>(Ty)) {
497 unsigned NumElements = STy->getNumElements();
498 NewGlobals.reserve(NumElements);
499 const StructLayout &Layout = *DL.getStructLayout(STy);
500 for (unsigned i = 0, e = NumElements; i != e; ++i) {
501 Constant *In = Init->getAggregateElement(i);
502 assert(In && "Couldn't get element of initializer?")((In && "Couldn't get element of initializer?") ? static_cast
<void> (0) : __assert_fail ("In && \"Couldn't get element of initializer?\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 502, __PRETTY_FUNCTION__))
;
503 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
504 GlobalVariable::InternalLinkage,
505 In, GV->getName()+"."+Twine(i),
506 GV->getThreadLocalMode(),
507 GV->getType()->getAddressSpace());
508 NGV->setExternallyInitialized(GV->isExternallyInitialized());
509 NGV->copyAttributesFrom(GV);
510 Globals.push_back(NGV);
511 NewGlobals.push_back(NGV);
512
513 // Calculate the known alignment of the field. If the original aggregate
514 // had 256 byte alignment for example, something might depend on that:
515 // propagate info to each field.
516 uint64_t FieldOffset = Layout.getElementOffset(i);
517 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
518 if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
519 NGV->setAlignment(NewAlign);
520
521 // Copy over the debug info for the variable.
522 uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
523 uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i);
524 transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements);
525 }
526 } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
527 unsigned NumElements = STy->getNumElements();
528 if (NumElements > 16 && GV->hasNUsesOrMore(16))
529 return nullptr; // It's not worth it.
530 NewGlobals.reserve(NumElements);
531 auto ElTy = STy->getElementType();
532 uint64_t EltSize = DL.getTypeAllocSize(ElTy);
533 unsigned EltAlign = DL.getABITypeAlignment(ElTy);
534 uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
535 for (unsigned i = 0, e = NumElements; i != e; ++i) {
536 Constant *In = Init->getAggregateElement(i);
537 assert(In && "Couldn't get element of initializer?")((In && "Couldn't get element of initializer?") ? static_cast
<void> (0) : __assert_fail ("In && \"Couldn't get element of initializer?\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 537, __PRETTY_FUNCTION__))
;
538
539 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
540 GlobalVariable::InternalLinkage,
541 In, GV->getName()+"."+Twine(i),
542 GV->getThreadLocalMode(),
543 GV->getType()->getAddressSpace());
544 NGV->setExternallyInitialized(GV->isExternallyInitialized());
545 NGV->copyAttributesFrom(GV);
546 Globals.push_back(NGV);
547 NewGlobals.push_back(NGV);
548
549 // Calculate the known alignment of the field. If the original aggregate
550 // had 256 byte alignment for example, something might depend on that:
551 // propagate info to each field.
552 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
553 if (NewAlign > EltAlign)
554 NGV->setAlignment(NewAlign);
555 transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
556 NumElements);
557 }
558 }
559
560 if (NewGlobals.empty())
561 return nullptr;
562
563 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "PERFORMING GLOBAL SRA ON: "
<< *GV << "\n"; } } while (false)
;
564
565 Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
566
567 // Loop over all of the uses of the global, replacing the constantexpr geps,
568 // with smaller constantexpr geps or direct references.
569 while (!GV->use_empty()) {
570 User *GEP = GV->user_back();
571 assert(((isa<ConstantExpr>(GEP) &&((((isa<ConstantExpr>(GEP) && cast<ConstantExpr
>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<
GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"
) ? static_cast<void> (0) : __assert_fail ("((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<GetElementPtrInst>(GEP)) && \"NonGEP CE's are not SRAable!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 573, __PRETTY_FUNCTION__))
572 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||((((isa<ConstantExpr>(GEP) && cast<ConstantExpr
>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<
GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"
) ? static_cast<void> (0) : __assert_fail ("((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<GetElementPtrInst>(GEP)) && \"NonGEP CE's are not SRAable!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 573, __PRETTY_FUNCTION__))
573 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!")((((isa<ConstantExpr>(GEP) && cast<ConstantExpr
>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<
GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"
) ? static_cast<void> (0) : __assert_fail ("((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<GetElementPtrInst>(GEP)) && \"NonGEP CE's are not SRAable!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 573, __PRETTY_FUNCTION__))
;
574
575 // Ignore the 1th operand, which has to be zero or else the program is quite
576 // broken (undefined). Get the 2nd operand, which is the structure or array
577 // index.
578 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
579 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
580
581 Value *NewPtr = NewGlobals[Val];
582 Type *NewTy = NewGlobals[Val]->getValueType();
583
584 // Form a shorter GEP if needed.
585 if (GEP->getNumOperands() > 3) {
586 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
587 SmallVector<Constant*, 8> Idxs;
588 Idxs.push_back(NullInt);
589 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
590 Idxs.push_back(CE->getOperand(i));
591 NewPtr =
592 ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
593 } else {
594 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
595 SmallVector<Value*, 8> Idxs;
596 Idxs.push_back(NullInt);
597 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
598 Idxs.push_back(GEPI->getOperand(i));
599 NewPtr = GetElementPtrInst::Create(
600 NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
601 }
602 }
603 GEP->replaceAllUsesWith(NewPtr);
604
605 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
606 GEPI->eraseFromParent();
607 else
608 cast<ConstantExpr>(GEP)->destroyConstant();
609 }
610
611 // Delete the old global, now that it is dead.
612 Globals.erase(GV);
613 ++NumSRA;
614
615 // Loop over the new globals array deleting any globals that are obviously
616 // dead. This can arise due to scalarization of a structure or an array that
617 // has elements that are dead.
618 unsigned FirstGlobal = 0;
619 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
620 if (NewGlobals[i]->use_empty()) {
621 Globals.erase(NewGlobals[i]);
622 if (FirstGlobal == i) ++FirstGlobal;
623 }
624
625 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
626}
627
628/// Return true if all users of the specified value will trap if the value is
629/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
630/// reprocessing them.
631static bool AllUsesOfValueWillTrapIfNull(const Value *V,
632 SmallPtrSetImpl<const PHINode*> &PHIs) {
633 for (const User *U : V->users()) {
634 if (const Instruction *I = dyn_cast<Instruction>(U)) {
635 // If null pointer is considered valid, then all uses are non-trapping.
636 // Non address-space 0 globals have already been pruned by the caller.
637 if (NullPointerIsDefined(I->getFunction()))
638 return false;
639 }
640 if (isa<LoadInst>(U)) {
641 // Will trap.
642 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
643 if (SI->getOperand(0) == V) {
644 //cerr << "NONTRAPPING USE: " << *U;
645 return false; // Storing the value.
646 }
647 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
648 if (CI->getCalledValue() != V) {
649 //cerr << "NONTRAPPING USE: " << *U;
650 return false; // Not calling the ptr
651 }
652 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
653 if (II->getCalledValue() != V) {
654 //cerr << "NONTRAPPING USE: " << *U;
655 return false; // Not calling the ptr
656 }
657 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
658 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
659 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
660 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
661 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
662 // If we've already seen this phi node, ignore it, it has already been
663 // checked.
664 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
665 return false;
666 } else if (isa<ICmpInst>(U) &&
667 isa<ConstantPointerNull>(U->getOperand(1))) {
668 // Ignore icmp X, null
669 } else {
670 //cerr << "NONTRAPPING USE: " << *U;
671 return false;
672 }
673 }
674 return true;
675}
676
677/// Return true if all uses of any loads from GV will trap if the loaded value
678/// is null. Note that this also permits comparisons of the loaded value
679/// against null, as a special case.
680static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
681 for (const User *U : GV->users())
682 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
683 SmallPtrSet<const PHINode*, 8> PHIs;
684 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
685 return false;
686 } else if (isa<StoreInst>(U)) {
687 // Ignore stores to the global.
688 } else {
689 // We don't know or understand this user, bail out.
690 //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
691 return false;
692 }
693 return true;
694}
695
696static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
697 bool Changed = false;
698 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
699 Instruction *I = cast<Instruction>(*UI++);
700 // Uses are non-trapping if null pointer is considered valid.
701 // Non address-space 0 globals are already pruned by the caller.
702 if (NullPointerIsDefined(I->getFunction()))
703 return false;
704 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
705 LI->setOperand(0, NewV);
706 Changed = true;
707 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
708 if (SI->getOperand(1) == V) {
709 SI->setOperand(1, NewV);
710 Changed = true;
711 }
712 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
713 CallSite CS(I);
714 if (CS.getCalledValue() == V) {
715 // Calling through the pointer! Turn into a direct call, but be careful
716 // that the pointer is not also being passed as an argument.
717 CS.setCalledFunction(NewV);
718 Changed = true;
719 bool PassedAsArg = false;
720 for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
721 if (CS.getArgument(i) == V) {
722 PassedAsArg = true;
723 CS.setArgument(i, NewV);
724 }
725
726 if (PassedAsArg) {
727 // Being passed as an argument also. Be careful to not invalidate UI!
728 UI = V->user_begin();
729 }
730 }
731 } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
732 Changed |= OptimizeAwayTrappingUsesOfValue(CI,
733 ConstantExpr::getCast(CI->getOpcode(),
734 NewV, CI->getType()));
735 if (CI->use_empty()) {
736 Changed = true;
737 CI->eraseFromParent();
738 }
739 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
740 // Should handle GEP here.
741 SmallVector<Constant*, 8> Idxs;
742 Idxs.reserve(GEPI->getNumOperands()-1);
743 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
744 i != e; ++i)
745 if (Constant *C = dyn_cast<Constant>(*i))
746 Idxs.push_back(C);
747 else
748 break;
749 if (Idxs.size() == GEPI->getNumOperands()-1)
750 Changed |= OptimizeAwayTrappingUsesOfValue(
751 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
752 NewV, Idxs));
753 if (GEPI->use_empty()) {
754 Changed = true;
755 GEPI->eraseFromParent();
756 }
757 }
758 }
759
760 return Changed;
761}
762
763/// The specified global has only one non-null value stored into it. If there
764/// are uses of the loaded value that would trap if the loaded value is
765/// dynamically null, then we know that they cannot be reachable with a null
766/// optimize away the load.
767static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
768 const DataLayout &DL,
769 TargetLibraryInfo *TLI) {
770 bool Changed = false;
771
772 // Keep track of whether we are able to remove all the uses of the global
773 // other than the store that defines it.
774 bool AllNonStoreUsesGone = true;
775
776 // Replace all uses of loads with uses of uses of the stored value.
777 for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
778 User *GlobalUser = *GUI++;
779 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
780 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
781 // If we were able to delete all uses of the loads
782 if (LI->use_empty()) {
783 LI->eraseFromParent();
784 Changed = true;
785 } else {
786 AllNonStoreUsesGone = false;
787 }
788 } else if (isa<StoreInst>(GlobalUser)) {
789 // Ignore the store that stores "LV" to the global.
790 assert(GlobalUser->getOperand(1) == GV &&((GlobalUser->getOperand(1) == GV && "Must be storing *to* the global"
) ? static_cast<void> (0) : __assert_fail ("GlobalUser->getOperand(1) == GV && \"Must be storing *to* the global\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 791, __PRETTY_FUNCTION__))
791 "Must be storing *to* the global")((GlobalUser->getOperand(1) == GV && "Must be storing *to* the global"
) ? static_cast<void> (0) : __assert_fail ("GlobalUser->getOperand(1) == GV && \"Must be storing *to* the global\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 791, __PRETTY_FUNCTION__))
;
792 } else {
793 AllNonStoreUsesGone = false;
794
795 // If we get here we could have other crazy uses that are transitively
796 // loaded.
797 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||(((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser
) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>
(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<
GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"
) ? static_cast<void> (0) : __assert_fail ("(isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<GetElementPtrInst>(GlobalUser)) && \"Only expect load and stores!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 801, __PRETTY_FUNCTION__))
798 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||(((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser
) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>
(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<
GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"
) ? static_cast<void> (0) : __assert_fail ("(isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<GetElementPtrInst>(GlobalUser)) && \"Only expect load and stores!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 801, __PRETTY_FUNCTION__))
799 isa<BitCastInst>(GlobalUser) ||(((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser
) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>
(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<
GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"
) ? static_cast<void> (0) : __assert_fail ("(isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<GetElementPtrInst>(GlobalUser)) && \"Only expect load and stores!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 801, __PRETTY_FUNCTION__))
800 isa<GetElementPtrInst>(GlobalUser)) &&(((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser
) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>
(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<
GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"
) ? static_cast<void> (0) : __assert_fail ("(isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<GetElementPtrInst>(GlobalUser)) && \"Only expect load and stores!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 801, __PRETTY_FUNCTION__))
801 "Only expect load and stores!")(((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser
) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>
(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<
GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"
) ? static_cast<void> (0) : __assert_fail ("(isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || isa<BitCastInst>(GlobalUser) || isa<GetElementPtrInst>(GlobalUser)) && \"Only expect load and stores!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 801, __PRETTY_FUNCTION__))
;
802 }
803 }
804
805 if (Changed) {
806 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GVdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: "
<< *GV << "\n"; } } while (false)
807 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: "
<< *GV << "\n"; } } while (false)
;
808 ++NumGlobUses;
809 }
810
811 // If we nuked all of the loads, then none of the stores are needed either,
812 // nor is the global.
813 if (AllNonStoreUsesGone) {
814 if (isLeakCheckerRoot(GV)) {
815 Changed |= CleanupPointerRootUsers(GV, TLI);
816 } else {
817 Changed = true;
818 CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
819 }
820 if (GV->use_empty()) {
821 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** GLOBAL NOW DEAD!\n"; }
} while (false)
;
822 Changed = true;
823 GV->eraseFromParent();
824 ++NumDeleted;
825 }
826 }
827 return Changed;
828}
829
830/// Walk the use list of V, constant folding all of the instructions that are
831/// foldable.
832static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
833 TargetLibraryInfo *TLI) {
834 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
835 if (Instruction *I = dyn_cast<Instruction>(*UI++))
836 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
837 I->replaceAllUsesWith(NewC);
838
839 // Advance UI to the next non-I use to avoid invalidating it!
840 // Instructions could multiply use V.
841 while (UI != E && *UI == I)
842 ++UI;
843 if (isInstructionTriviallyDead(I, TLI))
844 I->eraseFromParent();
845 }
846}
847
848/// This function takes the specified global variable, and transforms the
849/// program as if it always contained the result of the specified malloc.
850/// Because it is always the result of the specified malloc, there is no reason
851/// to actually DO the malloc. Instead, turn the malloc into a global, and any
852/// loads of GV as uses of the new global.
853static GlobalVariable *
854OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
855 ConstantInt *NElements, const DataLayout &DL,
856 TargetLibraryInfo *TLI) {
857 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { errs() << "PROMOTING GLOBAL: " <<
*GV << " CALL = " << *CI << '\n'; } } while
(false)
858 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { errs() << "PROMOTING GLOBAL: " <<
*GV << " CALL = " << *CI << '\n'; } } while
(false)
;
859
860 Type *GlobalType;
861 if (NElements->getZExtValue() == 1)
862 GlobalType = AllocTy;
863 else
864 // If we have an array allocation, the global variable is of an array.
865 GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
866
867 // Create the new global variable. The contents of the malloc'd memory is
868 // undefined, so initialize with an undef value.
869 GlobalVariable *NewGV = new GlobalVariable(
870 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
871 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
872 GV->getThreadLocalMode());
873
874 // If there are bitcast users of the malloc (which is typical, usually we have
875 // a malloc + bitcast) then replace them with uses of the new global. Update
876 // other users to use the global as well.
877 BitCastInst *TheBC = nullptr;
878 while (!CI->use_empty()) {
879 Instruction *User = cast<Instruction>(CI->user_back());
880 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
881 if (BCI->getType() == NewGV->getType()) {
882 BCI->replaceAllUsesWith(NewGV);
883 BCI->eraseFromParent();
884 } else {
885 BCI->setOperand(0, NewGV);
886 }
887 } else {
888 if (!TheBC)
889 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
890 User->replaceUsesOfWith(CI, TheBC);
891 }
892 }
893
894 Constant *RepValue = NewGV;
895 if (NewGV->getType() != GV->getValueType())
896 RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
897
898 // If there is a comparison against null, we will insert a global bool to
899 // keep track of whether the global was initialized yet or not.
900 GlobalVariable *InitBool =
901 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
902 GlobalValue::InternalLinkage,
903 ConstantInt::getFalse(GV->getContext()),
904 GV->getName()+".init", GV->getThreadLocalMode());
905 bool InitBoolUsed = false;
906
907 // Loop over all uses of GV, processing them in turn.
908 while (!GV->use_empty()) {
909 if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
910 // The global is initialized when the store to it occurs.
911 new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
912 SI->getOrdering(), SI->getSyncScopeID(), SI);
913 SI->eraseFromParent();
914 continue;
915 }
916
917 LoadInst *LI = cast<LoadInst>(GV->user_back());
918 while (!LI->use_empty()) {
919 Use &LoadUse = *LI->use_begin();
920 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
921 if (!ICI) {
922 LoadUse = RepValue;
923 continue;
924 }
925
926 // Replace the cmp X, 0 with a use of the bool value.
927 // Sink the load to where the compare was, if atomic rules allow us to.
928 Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
929 InitBool->getName() + ".val", false, 0,
930 LI->getOrdering(), LI->getSyncScopeID(),
931 LI->isUnordered() ? (Instruction *)ICI : LI);
932 InitBoolUsed = true;
933 switch (ICI->getPredicate()) {
934 default: llvm_unreachable("Unknown ICmp Predicate!")::llvm::llvm_unreachable_internal("Unknown ICmp Predicate!", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 934)
;
935 case ICmpInst::ICMP_ULT:
936 case ICmpInst::ICMP_SLT: // X < null -> always false
937 LV = ConstantInt::getFalse(GV->getContext());
938 break;
939 case ICmpInst::ICMP_ULE:
940 case ICmpInst::ICMP_SLE:
941 case ICmpInst::ICMP_EQ:
942 LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
943 break;
944 case ICmpInst::ICMP_NE:
945 case ICmpInst::ICMP_UGE:
946 case ICmpInst::ICMP_SGE:
947 case ICmpInst::ICMP_UGT:
948 case ICmpInst::ICMP_SGT:
949 break; // no change.
950 }
951 ICI->replaceAllUsesWith(LV);
952 ICI->eraseFromParent();
953 }
954 LI->eraseFromParent();
955 }
956
957 // If the initialization boolean was used, insert it, otherwise delete it.
958 if (!InitBoolUsed) {
959 while (!InitBool->use_empty()) // Delete initializations
960 cast<StoreInst>(InitBool->user_back())->eraseFromParent();
961 delete InitBool;
962 } else
963 GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
964
965 // Now the GV is dead, nuke it and the malloc..
966 GV->eraseFromParent();
967 CI->eraseFromParent();
968
969 // To further other optimizations, loop over all users of NewGV and try to
970 // constant prop them. This will promote GEP instructions with constant
971 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
972 ConstantPropUsersOf(NewGV, DL, TLI);
973 if (RepValue != NewGV)
974 ConstantPropUsersOf(RepValue, DL, TLI);
975
976 return NewGV;
977}
978
979/// Scan the use-list of V checking to make sure that there are no complex uses
980/// of V. We permit simple things like dereferencing the pointer, but not
981/// storing through the address, unless it is to the specified global.
982static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
983 const GlobalVariable *GV,
984 SmallPtrSetImpl<const PHINode*> &PHIs) {
985 for (const User *U : V->users()) {
986 const Instruction *Inst = cast<Instruction>(U);
987
988 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
989 continue; // Fine, ignore.
990 }
991
992 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
993 if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
994 return false; // Storing the pointer itself... bad.
995 continue; // Otherwise, storing through it, or storing into GV... fine.
996 }
997
998 // Must index into the array and into the struct.
999 if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
1000 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
1001 return false;
1002 continue;
1003 }
1004
1005 if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
1006 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
1007 // cycles.
1008 if (PHIs.insert(PN).second)
1009 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
1010 return false;
1011 continue;
1012 }
1013
1014 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
1015 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
1016 return false;
1017 continue;
1018 }
1019
1020 return false;
1021 }
1022 return true;
1023}
1024
1025/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
1026/// allocation into loads from the global and uses of the resultant pointer.
1027/// Further, delete the store into GV. This assumes that these value pass the
1028/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1029static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1030 GlobalVariable *GV) {
1031 while (!Alloc->use_empty()) {
1032 Instruction *U = cast<Instruction>(*Alloc->user_begin());
1033 Instruction *InsertPt = U;
1034 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1035 // If this is the store of the allocation into the global, remove it.
1036 if (SI->getOperand(1) == GV) {
1037 SI->eraseFromParent();
1038 continue;
1039 }
1040 } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1041 // Insert the load in the corresponding predecessor, not right before the
1042 // PHI.
1043 InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1044 } else if (isa<BitCastInst>(U)) {
1045 // Must be bitcast between the malloc and store to initialize the global.
1046 ReplaceUsesOfMallocWithGlobal(U, GV);
1047 U->eraseFromParent();
1048 continue;
1049 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1050 // If this is a "GEP bitcast" and the user is a store to the global, then
1051 // just process it as a bitcast.
1052 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1053 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1054 if (SI->getOperand(1) == GV) {
1055 // Must be bitcast GEP between the malloc and store to initialize
1056 // the global.
1057 ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1058 GEPI->eraseFromParent();
1059 continue;
1060 }
1061 }
1062
1063 // Insert a load from the global, and use it instead of the malloc.
1064 Value *NL =
1065 new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
1066 U->replaceUsesOfWith(Alloc, NL);
1067 }
1068}
1069
1070/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
1071/// perform heap SRA on. This permits GEP's that index through the array and
1072/// struct field, icmps of null, and PHIs.
1073static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1074 SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
1075 SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
1076 // We permit two users of the load: setcc comparing against the null
1077 // pointer, and a getelementptr of a specific form.
1078 for (const User *U : V->users()) {
1079 const Instruction *UI = cast<Instruction>(U);
1080
1081 // Comparison against null is ok.
1082 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1083 if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1084 return false;
1085 continue;
1086 }
1087
1088 // getelementptr is also ok, but only a simple form.
1089 if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1090 // Must index into the array and into the struct.
1091 if (GEPI->getNumOperands() < 3)
1092 return false;
1093
1094 // Otherwise the GEP is ok.
1095 continue;
1096 }
1097
1098 if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1099 if (!LoadUsingPHIsPerLoad.insert(PN).second)
1100 // This means some phi nodes are dependent on each other.
1101 // Avoid infinite looping!
1102 return false;
1103 if (!LoadUsingPHIs.insert(PN).second)
1104 // If we have already analyzed this PHI, then it is safe.
1105 continue;
1106
1107 // Make sure all uses of the PHI are simple enough to transform.
1108 if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1109 LoadUsingPHIs, LoadUsingPHIsPerLoad))
1110 return false;
1111
1112 continue;
1113 }
1114
1115 // Otherwise we don't know what this is, not ok.
1116 return false;
1117 }
1118
1119 return true;
1120}
1121
1122/// If all users of values loaded from GV are simple enough to perform HeapSRA,
1123/// return true.
1124static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1125 Instruction *StoredVal) {
1126 SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1127 SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1128 for (const User *U : GV->users())
1129 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1130 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1131 LoadUsingPHIsPerLoad))
1132 return false;
1133 LoadUsingPHIsPerLoad.clear();
1134 }
1135
1136 // If we reach here, we know that all uses of the loads and transitive uses
1137 // (through PHI nodes) are simple enough to transform. However, we don't know
1138 // that all inputs the to the PHI nodes are in the same equivalence sets.
1139 // Check to verify that all operands of the PHIs are either PHIS that can be
1140 // transformed, loads from GV, or MI itself.
1141 for (const PHINode *PN : LoadUsingPHIs) {
1142 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1143 Value *InVal = PN->getIncomingValue(op);
1144
1145 // PHI of the stored value itself is ok.
1146 if (InVal == StoredVal) continue;
1147
1148 if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1149 // One of the PHIs in our set is (optimistically) ok.
1150 if (LoadUsingPHIs.count(InPN))
1151 continue;
1152 return false;
1153 }
1154
1155 // Load from GV is ok.
1156 if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1157 if (LI->getOperand(0) == GV)
1158 continue;
1159
1160 // UNDEF? NULL?
1161
1162 // Anything else is rejected.
1163 return false;
1164 }
1165 }
1166
1167 return true;
1168}
1169
1170static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1171 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1172 std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1173 std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
1174
1175 if (FieldNo >= FieldVals.size())
1176 FieldVals.resize(FieldNo+1);
1177
1178 // If we already have this value, just reuse the previously scalarized
1179 // version.
1180 if (Value *FieldVal = FieldVals[FieldNo])
1181 return FieldVal;
1182
1183 // Depending on what instruction this is, we have several cases.
1184 Value *Result;
1185 if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1186 // This is a scalarized version of the load from the global. Just create
1187 // a new Load of the scalarized global.
1188 Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
1189 InsertedScalarizedValues, PHIsToRewrite);
1190 Result = new LoadInst(V->getType()->getPointerElementType(), V,
1191 LI->getName() + ".f" + Twine(FieldNo), LI);
1192 } else {
1193 PHINode *PN = cast<PHINode>(V);
1194 // PN's type is pointer to struct. Make a new PHI of pointer to struct
1195 // field.
1196
1197 PointerType *PTy = cast<PointerType>(PN->getType());
1198 StructType *ST = cast<StructType>(PTy->getElementType());
1199
1200 unsigned AS = PTy->getAddressSpace();
1201 PHINode *NewPN =
1202 PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1203 PN->getNumIncomingValues(),
1204 PN->getName()+".f"+Twine(FieldNo), PN);
1205 Result = NewPN;
1206 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1207 }
1208
1209 return FieldVals[FieldNo] = Result;
1210}
1211
1212/// Given a load instruction and a value derived from the load, rewrite the
1213/// derived value to use the HeapSRoA'd load.
1214static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1215 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1216 std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1217 // If this is a comparison against null, handle it.
1218 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1219 assert(isa<ConstantPointerNull>(SCI->getOperand(1)))((isa<ConstantPointerNull>(SCI->getOperand(1))) ? static_cast
<void> (0) : __assert_fail ("isa<ConstantPointerNull>(SCI->getOperand(1))"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1219, __PRETTY_FUNCTION__))
;
1220 // If we have a setcc of the loaded pointer, we can use a setcc of any
1221 // field.
1222 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1223 InsertedScalarizedValues, PHIsToRewrite);
1224
1225 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1226 Constant::getNullValue(NPtr->getType()),
1227 SCI->getName());
1228 SCI->replaceAllUsesWith(New);
1229 SCI->eraseFromParent();
1230 return;
1231 }
1232
1233 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1234 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1235 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))((GEPI->getNumOperands() >= 3 && isa<ConstantInt
>(GEPI->getOperand(2)) && "Unexpected GEPI!") ?
static_cast<void> (0) : __assert_fail ("GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) && \"Unexpected GEPI!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1236, __PRETTY_FUNCTION__))
1236 && "Unexpected GEPI!")((GEPI->getNumOperands() >= 3 && isa<ConstantInt
>(GEPI->getOperand(2)) && "Unexpected GEPI!") ?
static_cast<void> (0) : __assert_fail ("GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) && \"Unexpected GEPI!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1236, __PRETTY_FUNCTION__))
;
1237
1238 // Load the pointer for this field.
1239 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1240 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1241 InsertedScalarizedValues, PHIsToRewrite);
1242
1243 // Create the new GEP idx vector.
1244 SmallVector<Value*, 8> GEPIdx;
1245 GEPIdx.push_back(GEPI->getOperand(1));
1246 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1247
1248 Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
1249 GEPI->getName(), GEPI);
1250 GEPI->replaceAllUsesWith(NGEPI);
1251 GEPI->eraseFromParent();
1252 return;
1253 }
1254
1255 // Recursively transform the users of PHI nodes. This will lazily create the
1256 // PHIs that are needed for individual elements. Keep track of what PHIs we
1257 // see in InsertedScalarizedValues so that we don't get infinite loops (very
1258 // antisocial). If the PHI is already in InsertedScalarizedValues, it has
1259 // already been seen first by another load, so its uses have already been
1260 // processed.
1261 PHINode *PN = cast<PHINode>(LoadUser);
1262 if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1263 std::vector<Value *>())).second)
1264 return;
1265
1266 // If this is the first time we've seen this PHI, recursively process all
1267 // users.
1268 for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1269 Instruction *User = cast<Instruction>(*UI++);
1270 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1271 }
1272}
1273
1274/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
1275/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
1276/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1277static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1278 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1279 std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
1280 for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1281 Instruction *User = cast<Instruction>(*UI++);
1282 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1283 }
1284
1285 if (Load->use_empty()) {
1286 Load->eraseFromParent();
1287 InsertedScalarizedValues.erase(Load);
1288 }
1289}
1290
1291/// CI is an allocation of an array of structures. Break it up into multiple
1292/// allocations of arrays of the fields.
1293static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1294 Value *NElems, const DataLayout &DL,
1295 const TargetLibraryInfo *TLI) {
1296 LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "SROA HEAP ALLOC: " <<
*GV << " MALLOC = " << *CI << '\n'; } } while
(false)
1297 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "SROA HEAP ALLOC: " <<
*GV << " MALLOC = " << *CI << '\n'; } } while
(false)
;
1298 Type *MAT = getMallocAllocatedType(CI, TLI);
1299 StructType *STy = cast<StructType>(MAT);
1300
1301 // There is guaranteed to be at least one use of the malloc (storing
1302 // it into GV). If there are other uses, change them to be uses of
1303 // the global to simplify later code. This also deletes the store
1304 // into GV.
1305 ReplaceUsesOfMallocWithGlobal(CI, GV);
1306
1307 // Okay, at this point, there are no users of the malloc. Insert N
1308 // new mallocs at the same place as CI, and N globals.
1309 std::vector<Value *> FieldGlobals;
1310 std::vector<Value *> FieldMallocs;
1311
1312 SmallVector<OperandBundleDef, 1> OpBundles;
1313 CI->getOperandBundlesAsDefs(OpBundles);
1314
1315 unsigned AS = GV->getType()->getPointerAddressSpace();
1316 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1317 Type *FieldTy = STy->getElementType(FieldNo);
1318 PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1319
1320 GlobalVariable *NGV = new GlobalVariable(
1321 *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
1322 Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
1323 nullptr, GV->getThreadLocalMode());
1324 NGV->copyAttributesFrom(GV);
1325 FieldGlobals.push_back(NGV);
1326
1327 unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
1328 if (StructType *ST = dyn_cast<StructType>(FieldTy))
1329 TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
1330 Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1331 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1332 ConstantInt::get(IntPtrTy, TypeSize),
1333 NElems, OpBundles, nullptr,
1334 CI->getName() + ".f" + Twine(FieldNo));
1335 FieldMallocs.push_back(NMI);
1336 new StoreInst(NMI, NGV, CI);
1337 }
1338
1339 // The tricky aspect of this transformation is handling the case when malloc
1340 // fails. In the original code, malloc failing would set the result pointer
1341 // of malloc to null. In this case, some mallocs could succeed and others
1342 // could fail. As such, we emit code that looks like this:
1343 // F0 = malloc(field0)
1344 // F1 = malloc(field1)
1345 // F2 = malloc(field2)
1346 // if (F0 == 0 || F1 == 0 || F2 == 0) {
1347 // if (F0) { free(F0); F0 = 0; }
1348 // if (F1) { free(F1); F1 = 0; }
1349 // if (F2) { free(F2); F2 = 0; }
1350 // }
1351 // The malloc can also fail if its argument is too large.
1352 Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1353 Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1354 ConstantZero, "isneg");
1355 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1356 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1357 Constant::getNullValue(FieldMallocs[i]->getType()),
1358 "isnull");
1359 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1360 }
1361
1362 // Split the basic block at the old malloc.
1363 BasicBlock *OrigBB = CI->getParent();
1364 BasicBlock *ContBB =
1365 OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
1366
1367 // Create the block to check the first condition. Put all these blocks at the
1368 // end of the function as they are unlikely to be executed.
1369 BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1370 "malloc_ret_null",
1371 OrigBB->getParent());
1372
1373 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1374 // branch on RunningOr.
1375 OrigBB->getTerminator()->eraseFromParent();
1376 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1377
1378 // Within the NullPtrBlock, we need to emit a comparison and branch for each
1379 // pointer, because some may be null while others are not.
1380 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1381 Value *GVVal =
1382 new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
1383 FieldGlobals[i], "tmp", NullPtrBlock);
1384 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1385 Constant::getNullValue(GVVal->getType()));
1386 BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1387 OrigBB->getParent());
1388 BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1389 OrigBB->getParent());
1390 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1391 Cmp, NullPtrBlock);
1392
1393 // Fill in FreeBlock.
1394 CallInst::CreateFree(GVVal, OpBundles, BI);
1395 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1396 FreeBlock);
1397 BranchInst::Create(NextBlock, FreeBlock);
1398
1399 NullPtrBlock = NextBlock;
1400 }
1401
1402 BranchInst::Create(ContBB, NullPtrBlock);
1403
1404 // CI is no longer needed, remove it.
1405 CI->eraseFromParent();
1406
1407 /// As we process loads, if we can't immediately update all uses of the load,
1408 /// keep track of what scalarized loads are inserted for a given load.
1409 DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
1410 InsertedScalarizedValues[GV] = FieldGlobals;
1411
1412 std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
1413
1414 // Okay, the malloc site is completely handled. All of the uses of GV are now
1415 // loads, and all uses of those loads are simple. Rewrite them to use loads
1416 // of the per-field globals instead.
1417 for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1418 Instruction *User = cast<Instruction>(*UI++);
1419
1420 if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1421 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1422 continue;
1423 }
1424
1425 // Must be a store of null.
1426 StoreInst *SI = cast<StoreInst>(User);
1427 assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&((isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!") ? static_cast<void> (0) :
__assert_fail ("isa<ConstantPointerNull>(SI->getOperand(0)) && \"Unexpected heap-sra user!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1428, __PRETTY_FUNCTION__))
1428 "Unexpected heap-sra user!")((isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!") ? static_cast<void> (0) :
__assert_fail ("isa<ConstantPointerNull>(SI->getOperand(0)) && \"Unexpected heap-sra user!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1428, __PRETTY_FUNCTION__))
;
1429
1430 // Insert a store of null into each global.
1431 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1432 Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
1433 Constant *Null = Constant::getNullValue(ValTy);
1434 new StoreInst(Null, FieldGlobals[i], SI);
1435 }
1436 // Erase the original store.
1437 SI->eraseFromParent();
1438 }
1439
1440 // While we have PHIs that are interesting to rewrite, do it.
1441 while (!PHIsToRewrite.empty()) {
1442 PHINode *PN = PHIsToRewrite.back().first;
1443 unsigned FieldNo = PHIsToRewrite.back().second;
1444 PHIsToRewrite.pop_back();
1445 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1446 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi")((FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"
) ? static_cast<void> (0) : __assert_fail ("FieldPN->getNumIncomingValues() == 0 &&\"Already processed this phi\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1446, __PRETTY_FUNCTION__))
;
1447
1448 // Add all the incoming values. This can materialize more phis.
1449 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1450 Value *InVal = PN->getIncomingValue(i);
1451 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1452 PHIsToRewrite);
1453 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1454 }
1455 }
1456
1457 // Drop all inter-phi links and any loads that made it this far.
1458 for (DenseMap<Value *, std::vector<Value *>>::iterator
1459 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1460 I != E; ++I) {
1461 if (PHINode *PN = dyn_cast<PHINode>(I->first))
1462 PN->dropAllReferences();
1463 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1464 LI->dropAllReferences();
1465 }
1466
1467 // Delete all the phis and loads now that inter-references are dead.
1468 for (DenseMap<Value *, std::vector<Value *>>::iterator
1469 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1470 I != E; ++I) {
1471 if (PHINode *PN = dyn_cast<PHINode>(I->first))
1472 PN->eraseFromParent();
1473 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1474 LI->eraseFromParent();
1475 }
1476
1477 // The old global is now dead, remove it.
1478 GV->eraseFromParent();
1479
1480 ++NumHeapSRA;
1481 return cast<GlobalVariable>(FieldGlobals[0]);
1482}
1483
1484/// This function is called when we see a pointer global variable with a single
1485/// value stored it that is a malloc or cast of malloc.
1486static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
1487 Type *AllocTy,
1488 AtomicOrdering Ordering,
1489 const DataLayout &DL,
1490 TargetLibraryInfo *TLI) {
1491 // If this is a malloc of an abstract type, don't touch it.
1492 if (!AllocTy->isSized())
1493 return false;
1494
1495 // We can't optimize this global unless all uses of it are *known* to be
1496 // of the malloc value, not of the null initializer value (consider a use
1497 // that compares the global's value against zero to see if the malloc has
1498 // been reached). To do this, we check to see if all uses of the global
1499 // would trap if the global were null: this proves that they must all
1500 // happen after the malloc.
1501 if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1502 return false;
1503
1504 // We can't optimize this if the malloc itself is used in a complex way,
1505 // for example, being stored into multiple globals. This allows the
1506 // malloc to be stored into the specified global, loaded icmp'd, and
1507 // GEP'd. These are all things we could transform to using the global
1508 // for.
1509 SmallPtrSet<const PHINode*, 8> PHIs;
1510 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1511 return false;
1512
1513 // If we have a global that is only initialized with a fixed size malloc,
1514 // transform the program to use global memory instead of malloc'd memory.
1515 // This eliminates dynamic allocation, avoids an indirection accessing the
1516 // data, and exposes the resultant global to further GlobalOpt.
1517 // We cannot optimize the malloc if we cannot determine malloc array size.
1518 Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1519 if (!NElems)
1520 return false;
1521
1522 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1523 // Restrict this transformation to only working on small allocations
1524 // (2048 bytes currently), as we don't want to introduce a 16M global or
1525 // something.
1526 if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
1527 OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1528 return true;
1529 }
1530
1531 // If the allocation is an array of structures, consider transforming this
1532 // into multiple malloc'd arrays, one for each field. This is basically
1533 // SRoA for malloc'd memory.
1534
1535 if (Ordering != AtomicOrdering::NotAtomic)
1536 return false;
1537
1538 // If this is an allocation of a fixed size array of structs, analyze as a
1539 // variable size array. malloc [100 x struct],1 -> malloc struct, 100
1540 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1541 if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1542 AllocTy = AT->getElementType();
1543
1544 StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1545 if (!AllocSTy)
1546 return false;
1547
1548 // This the structure has an unreasonable number of fields, leave it
1549 // alone.
1550 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1551 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1552
1553 // If this is a fixed size array, transform the Malloc to be an alloc of
1554 // structs. malloc [100 x struct],1 -> malloc struct, 100
1555 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1556 Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1557 unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
1558 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1559 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1560 SmallVector<OperandBundleDef, 1> OpBundles;
1561 CI->getOperandBundlesAsDefs(OpBundles);
1562 Instruction *Malloc =
1563 CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
1564 OpBundles, nullptr, CI->getName());
1565 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1566 CI->replaceAllUsesWith(Cast);
1567 CI->eraseFromParent();
1568 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1569 CI = cast<CallInst>(BCI->getOperand(0));
1570 else
1571 CI = cast<CallInst>(Malloc);
1572 }
1573
1574 PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
1575 TLI);
1576 return true;
1577 }
1578
1579 return false;
1580}
1581
1582// Try to optimize globals based on the knowledge that only one value (besides
1583// its initializer) is ever stored to the global.
1584static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1585 AtomicOrdering Ordering,
1586 const DataLayout &DL,
1587 TargetLibraryInfo *TLI) {
1588 // Ignore no-op GEPs and bitcasts.
1589 StoredOnceVal = StoredOnceVal->stripPointerCasts();
1590
1591 // If we are dealing with a pointer global that is initialized to null and
1592 // only has one (non-null) value stored into it, then we can optimize any
1593 // users of the loaded value (often calls and loads) that would trap if the
1594 // value was null.
1595 if (GV->getInitializer()->getType()->isPointerTy() &&
1596 GV->getInitializer()->isNullValue() &&
1597 !NullPointerIsDefined(
1598 nullptr /* F */,
1599 GV->getInitializer()->getType()->getPointerAddressSpace())) {
1600 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1601 if (GV->getInitializer()->getType() != SOVC->getType())
1602 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1603
1604 // Optimize away any trapping uses of the loaded value.
1605 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
1606 return true;
1607 } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1608 Type *MallocType = getMallocAllocatedType(CI, TLI);
1609 if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1610 Ordering, DL, TLI))
1611 return true;
1612 }
1613 }
1614
1615 return false;
1616}
1617
1618/// At this point, we have learned that the only two values ever stored into GV
1619/// are its initializer and OtherVal. See if we can shrink the global into a
1620/// boolean and select between the two values whenever it is used. This exposes
1621/// the values to other scalar optimizations.
1622static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1623 Type *GVElType = GV->getValueType();
1624
1625 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1626 // an FP value, pointer or vector, don't do this optimization because a select
1627 // between them is very expensive and unlikely to lead to later
1628 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1629 // where v1 and v2 both require constant pool loads, a big loss.
1630 if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1631 GVElType->isFloatingPointTy() ||
1632 GVElType->isPointerTy() || GVElType->isVectorTy())
1633 return false;
1634
1635 // Walk the use list of the global seeing if all the uses are load or store.
1636 // If there is anything else, bail out.
1637 for (User *U : GV->users())
1638 if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1639 return false;
1640
1641 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** SHRINKING TO BOOL: "
<< *GV << "\n"; } } while (false)
;
1642
1643 // Create the new global, initializing it to false.
1644 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1645 false,
1646 GlobalValue::InternalLinkage,
1647 ConstantInt::getFalse(GV->getContext()),
1648 GV->getName()+".b",
1649 GV->getThreadLocalMode(),
1650 GV->getType()->getAddressSpace());
1651 NewGV->copyAttributesFrom(GV);
1652 GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1653
1654 Constant *InitVal = GV->getInitializer();
1655 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&((InitVal->getType() != Type::getInt1Ty(GV->getContext(
)) && "No reason to shrink to bool!") ? static_cast<
void> (0) : __assert_fail ("InitVal->getType() != Type::getInt1Ty(GV->getContext()) && \"No reason to shrink to bool!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1656, __PRETTY_FUNCTION__))
1656 "No reason to shrink to bool!")((InitVal->getType() != Type::getInt1Ty(GV->getContext(
)) && "No reason to shrink to bool!") ? static_cast<
void> (0) : __assert_fail ("InitVal->getType() != Type::getInt1Ty(GV->getContext()) && \"No reason to shrink to bool!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1656, __PRETTY_FUNCTION__))
;
1657
1658 SmallVector<DIGlobalVariableExpression *, 1> GVs;
1659 GV->getDebugInfo(GVs);
1660
1661 // If initialized to zero and storing one into the global, we can use a cast
1662 // instead of a select to synthesize the desired value.
1663 bool IsOneZero = false;
1664 bool EmitOneOrZero = true;
1665 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){
1666 IsOneZero = InitVal->isNullValue() && CI->isOne();
1667
1668 if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){
1669 uint64_t ValInit = CIInit->getZExtValue();
1670 uint64_t ValOther = CI->getZExtValue();
1671 uint64_t ValMinus = ValOther - ValInit;
1672
1673 for(auto *GVe : GVs){
1674 DIGlobalVariable *DGV = GVe->getVariable();
1675 DIExpression *E = GVe->getExpression();
1676 const DataLayout &DL = GV->getParent()->getDataLayout();
1677 unsigned SizeInOctets =
1678 DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
1679
1680 // It is expected that the address of global optimized variable is on
1681 // top of the stack. After optimization, value of that variable will
1682 // be ether 0 for initial value or 1 for other value. The following
1683 // expression should return constant integer value depending on the
1684 // value at global object address:
1685 // val * (ValOther - ValInit) + ValInit:
1686 // DW_OP_deref DW_OP_constu <ValMinus>
1687 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1688 SmallVector<uint64_t, 12> Ops = {
1689 dwarf::DW_OP_deref_size, SizeInOctets,
1690 dwarf::DW_OP_constu, ValMinus,
1691 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1692 dwarf::DW_OP_plus};
1693 bool WithStackValue = true;
1694 E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1695 DIGlobalVariableExpression *DGVE =
1696 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1697 NewGV->addDebugInfo(DGVE);
1698 }
1699 EmitOneOrZero = false;
1700 }
1701 }
1702
1703 if (EmitOneOrZero) {
1704 // FIXME: This will only emit address for debugger on which will
1705 // be written only 0 or 1.
1706 for(auto *GV : GVs)
1707 NewGV->addDebugInfo(GV);
1708 }
1709
1710 while (!GV->use_empty()) {
1711 Instruction *UI = cast<Instruction>(GV->user_back());
1712 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1713 // Change the store into a boolean store.
1714 bool StoringOther = SI->getOperand(0) == OtherVal;
1715 // Only do this if we weren't storing a loaded value.
1716 Value *StoreVal;
1717 if (StoringOther || SI->getOperand(0) == InitVal) {
1718 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1719 StoringOther);
1720 } else {
1721 // Otherwise, we are storing a previously loaded copy. To do this,
1722 // change the copy from copying the original value to just copying the
1723 // bool.
1724 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1725
1726 // If we've already replaced the input, StoredVal will be a cast or
1727 // select instruction. If not, it will be a load of the original
1728 // global.
1729 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1730 assert(LI->getOperand(0) == GV && "Not a copy!")((LI->getOperand(0) == GV && "Not a copy!") ? static_cast
<void> (0) : __assert_fail ("LI->getOperand(0) == GV && \"Not a copy!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1730, __PRETTY_FUNCTION__))
;
1731 // Insert a new load, to preserve the saved value.
1732 StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1733 LI->getName() + ".b", false, 0,
1734 LI->getOrdering(), LI->getSyncScopeID(), LI);
1735 } else {
1736 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&(((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal
)) && "This is not a form that we understand!") ? static_cast
<void> (0) : __assert_fail ("(isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && \"This is not a form that we understand!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1737, __PRETTY_FUNCTION__))
1737 "This is not a form that we understand!")(((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal
)) && "This is not a form that we understand!") ? static_cast
<void> (0) : __assert_fail ("(isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && \"This is not a form that we understand!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1737, __PRETTY_FUNCTION__))
;
1738 StoreVal = StoredVal->getOperand(0);
1739 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!")((isa<LoadInst>(StoreVal) && "Not a load of NewGV!"
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(StoreVal) && \"Not a load of NewGV!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1739, __PRETTY_FUNCTION__))
;
1740 }
1741 }
1742 StoreInst *NSI =
1743 new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
1744 SI->getSyncScopeID(), SI);
1745 NSI->setDebugLoc(SI->getDebugLoc());
1746 } else {
1747 // Change the load into a load of bool then a select.
1748 LoadInst *LI = cast<LoadInst>(UI);
1749 LoadInst *NLI =
1750 new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
1751 false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
1752 Instruction *NSI;
1753 if (IsOneZero)
1754 NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1755 else
1756 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1757 NSI->takeName(LI);
1758 // Since LI is split into two instructions, NLI and NSI both inherit the
1759 // same DebugLoc
1760 NLI->setDebugLoc(LI->getDebugLoc());
1761 NSI->setDebugLoc(LI->getDebugLoc());
1762 LI->replaceAllUsesWith(NSI);
1763 }
1764 UI->eraseFromParent();
1765 }
1766
1767 // Retain the name of the old global variable. People who are debugging their
1768 // programs may expect these variables to be named the same.
1769 NewGV->takeName(GV);
1770 GV->eraseFromParent();
1771 return true;
1772}
1773
1774static bool deleteIfDead(
1775 GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1776 GV.removeDeadConstantUsers();
1777
1778 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1779 return false;
1780
1781 if (const Comdat *C = GV.getComdat())
1782 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1783 return false;
1784
1785 bool Dead;
1786 if (auto *F = dyn_cast<Function>(&GV))
1787 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1788 else
1789 Dead = GV.use_empty();
1790 if (!Dead)
1791 return false;
1792
1793 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "GLOBAL DEAD: " << GV <<
"\n"; } } while (false)
;
1794 GV.eraseFromParent();
1795 ++NumDeleted;
1796 return true;
1797}
1798
1799static bool isPointerValueDeadOnEntryToFunction(
1800 const Function *F, GlobalValue *GV,
1801 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1802 // Find all uses of GV. We expect them all to be in F, and if we can't
1803 // identify any of the uses we bail out.
1804 //
1805 // On each of these uses, identify if the memory that GV points to is
1806 // used/required/live at the start of the function. If it is not, for example
1807 // if the first thing the function does is store to the GV, the GV can
1808 // possibly be demoted.
1809 //
1810 // We don't do an exhaustive search for memory operations - simply look
1811 // through bitcasts as they're quite common and benign.
1812 const DataLayout &DL = GV->getParent()->getDataLayout();
1813 SmallVector<LoadInst *, 4> Loads;
1814 SmallVector<StoreInst *, 4> Stores;
1815 for (auto *U : GV->users()) {
1816 if (Operator::getOpcode(U) == Instruction::BitCast) {
1817 for (auto *UU : U->users()) {
1818 if (auto *LI = dyn_cast<LoadInst>(UU))
1819 Loads.push_back(LI);
1820 else if (auto *SI = dyn_cast<StoreInst>(UU))
1821 Stores.push_back(SI);
1822 else
1823 return false;
1824 }
1825 continue;
1826 }
1827
1828 Instruction *I = dyn_cast<Instruction>(U);
1829 if (!I)
1830 return false;
1831 assert(I->getParent()->getParent() == F)((I->getParent()->getParent() == F) ? static_cast<void
> (0) : __assert_fail ("I->getParent()->getParent() == F"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1831, __PRETTY_FUNCTION__))
;
1832
1833 if (auto *LI = dyn_cast<LoadInst>(I))
1834 Loads.push_back(LI);
1835 else if (auto *SI = dyn_cast<StoreInst>(I))
1836 Stores.push_back(SI);
1837 else
1838 return false;
1839 }
1840
1841 // We have identified all uses of GV into loads and stores. Now check if all
1842 // of them are known not to depend on the value of the global at the function
1843 // entry point. We do this by ensuring that every load is dominated by at
1844 // least one store.
1845 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1846
1847 // The below check is quadratic. Check we're not going to do too many tests.
1848 // FIXME: Even though this will always have worst-case quadratic time, we
1849 // could put effort into minimizing the average time by putting stores that
1850 // have been shown to dominate at least one load at the beginning of the
1851 // Stores array, making subsequent dominance checks more likely to succeed
1852 // early.
1853 //
1854 // The threshold here is fairly large because global->local demotion is a
1855 // very powerful optimization should it fire.
1856 const unsigned Threshold = 100;
1857 if (Loads.size() * Stores.size() > Threshold)
1858 return false;
1859
1860 for (auto *L : Loads) {
1861 auto *LTy = L->getType();
1862 if (none_of(Stores, [&](const StoreInst *S) {
1863 auto *STy = S->getValueOperand()->getType();
1864 // The load is only dominated by the store if DomTree says so
1865 // and the number of bits loaded in L is less than or equal to
1866 // the number of bits stored in S.
1867 return DT.dominates(S, L) &&
1868 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
1869 }))
1870 return false;
1871 }
1872 // All loads have known dependences inside F, so the global can be localized.
1873 return true;
1874}
1875
1876/// C may have non-instruction users. Can all of those users be turned into
1877/// instructions?
1878static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1879 // We don't do this exhaustively. The most common pattern that we really need
1880 // to care about is a constant GEP or constant bitcast - so just looking
1881 // through one single ConstantExpr.
1882 //
1883 // The set of constants that this function returns true for must be able to be
1884 // handled by makeAllConstantUsesInstructions.
1885 for (auto *U : C->users()) {
1886 if (isa<Instruction>(U))
1887 continue;
1888 if (!isa<ConstantExpr>(U))
1889 // Non instruction, non-constantexpr user; cannot convert this.
1890 return false;
1891 for (auto *UU : U->users())
1892 if (!isa<Instruction>(UU))
1893 // A constantexpr used by another constant. We don't try and recurse any
1894 // further but just bail out at this point.
1895 return false;
1896 }
1897
1898 return true;
1899}
1900
1901/// C may have non-instruction users, and
1902/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1903/// non-instruction users to instructions.
1904static void makeAllConstantUsesInstructions(Constant *C) {
1905 SmallVector<ConstantExpr*,4> Users;
1906 for (auto *U : C->users()) {
1907 if (isa<ConstantExpr>(U))
1908 Users.push_back(cast<ConstantExpr>(U));
1909 else
1910 // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1911 // should not have returned true for C.
1912 assert(((isa<Instruction>(U) && "Can't transform non-constantexpr non-instruction to instruction!"
) ? static_cast<void> (0) : __assert_fail ("isa<Instruction>(U) && \"Can't transform non-constantexpr non-instruction to instruction!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1914, __PRETTY_FUNCTION__))
1913 isa<Instruction>(U) &&((isa<Instruction>(U) && "Can't transform non-constantexpr non-instruction to instruction!"
) ? static_cast<void> (0) : __assert_fail ("isa<Instruction>(U) && \"Can't transform non-constantexpr non-instruction to instruction!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1914, __PRETTY_FUNCTION__))
1914 "Can't transform non-constantexpr non-instruction to instruction!")((isa<Instruction>(U) && "Can't transform non-constantexpr non-instruction to instruction!"
) ? static_cast<void> (0) : __assert_fail ("isa<Instruction>(U) && \"Can't transform non-constantexpr non-instruction to instruction!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 1914, __PRETTY_FUNCTION__))
;
1915 }
1916
1917 SmallVector<Value*,4> UUsers;
1918 for (auto *U : Users) {
1919 UUsers.clear();
1920 for (auto *UU : U->users())
1921 UUsers.push_back(UU);
1922 for (auto *UU : UUsers) {
1923 Instruction *UI = cast<Instruction>(UU);
1924 Instruction *NewU = U->getAsInstruction();
1925 NewU->insertBefore(UI);
1926 UI->replaceUsesOfWith(U, NewU);
1927 }
1928 // We've replaced all the uses, so destroy the constant. (destroyConstant
1929 // will update value handles and metadata.)
1930 U->destroyConstant();
1931 }
1932}
1933
1934/// Analyze the specified global variable and optimize
1935/// it if possible. If we make a change, return true.
1936static bool processInternalGlobal(
1937 GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
1938 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1939 auto &DL = GV->getParent()->getDataLayout();
1940 // If this is a first class global and has only one accessing function and
1941 // this function is non-recursive, we replace the global with a local alloca
1942 // in this function.
1943 //
1944 // NOTE: It doesn't make sense to promote non-single-value types since we
1945 // are just replacing static memory to stack memory.
1946 //
1947 // If the global is in different address space, don't bring it to stack.
1948 if (!GS.HasMultipleAccessingFunctions &&
1949 GS.AccessingFunction &&
1950 GV->getValueType()->isSingleValueType() &&
1951 GV->getType()->getAddressSpace() == 0 &&
1952 !GV->isExternallyInitialized() &&
1953 allNonInstructionUsersCanBeMadeInstructions(GV) &&
1954 GS.AccessingFunction->doesNotRecurse() &&
1955 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1956 LookupDomTree)) {
1957 const DataLayout &DL = GV->getParent()->getDataLayout();
1958
1959 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "LOCALIZING GLOBAL: " <<
*GV << "\n"; } } while (false)
;
1960 Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1961 ->getEntryBlock().begin());
1962 Type *ElemTy = GV->getValueType();
1963 // FIXME: Pass Global's alignment when globals have alignment
1964 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1965 GV->getName(), &FirstI);
1966 if (!isa<UndefValue>(GV->getInitializer()))
1967 new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1968
1969 makeAllConstantUsesInstructions(GV);
1970
1971 GV->replaceAllUsesWith(Alloca);
1972 GV->eraseFromParent();
1973 ++NumLocalized;
1974 return true;
1975 }
1976
1977 // If the global is never loaded (but may be stored to), it is dead.
1978 // Delete it now.
1979 if (!GS.IsLoaded) {
1980 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "GLOBAL NEVER LOADED: " <<
*GV << "\n"; } } while (false)
;
1981
1982 bool Changed;
1983 if (isLeakCheckerRoot(GV)) {
1984 // Delete any constant stores to the global.
1985 Changed = CleanupPointerRootUsers(GV, TLI);
1986 } else {
1987 // Delete any stores we can find to the global. We may not be able to
1988 // make it completely dead though.
1989 Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1990 }
1991
1992 // If the global is dead now, delete it.
1993 if (GV->use_empty()) {
1994 GV->eraseFromParent();
1995 ++NumDeleted;
1996 Changed = true;
1997 }
1998 return Changed;
1999
2000 }
2001 if (GS.StoredType <= GlobalStatus::InitializerStored) {
2002 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "MARKING CONSTANT: " <<
*GV << "\n"; } } while (false)
;
2003
2004 // Don't actually mark a global constant if it's atomic because atomic loads
2005 // are implemented by a trivial cmpxchg in some edge-cases and that usually
2006 // requires write access to the variable even if it's not actually changed.
2007 if (GS.Ordering == AtomicOrdering::NotAtomic)
2008 GV->setConstant(true);
2009
2010 // Clean up any obviously simplifiable users now.
2011 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
2012
2013 // If the global is dead now, just nuke it.
2014 if (GV->use_empty()) {
2015 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** Marking constant allowed us to simplify "
<< "all users and delete global!\n"; } } while (false)
2016 << "all users and delete global!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** Marking constant allowed us to simplify "
<< "all users and delete global!\n"; } } while (false)
;
2017 GV->eraseFromParent();
2018 ++NumDeleted;
2019 return true;
2020 }
2021
2022 // Fall through to the next check; see if we can optimize further.
2023 ++NumMarked;
2024 }
2025 if (!GV->getInitializer()->getType()->isSingleValueType()) {
2026 const DataLayout &DL = GV->getParent()->getDataLayout();
2027 if (SRAGlobal(GV, DL))
2028 return true;
2029 }
2030 if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
2031 // If the initial value for the global was an undef value, and if only
2032 // one other value was stored into it, we can just change the
2033 // initializer to be the stored value, then delete all stores to the
2034 // global. This allows us to mark it constant.
2035 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2036 if (isa<UndefValue>(GV->getInitializer())) {
2037 // Change the initial value here.
2038 GV->setInitializer(SOVConstant);
2039
2040 // Clean up any obviously simplifiable users now.
2041 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
2042
2043 if (GV->use_empty()) {
2044 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** Substituting initializer allowed us to "
<< "simplify all users and delete global!\n"; } } while
(false)
2045 << "simplify all users and delete global!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << " *** Substituting initializer allowed us to "
<< "simplify all users and delete global!\n"; } } while
(false)
;
2046 GV->eraseFromParent();
2047 ++NumDeleted;
2048 }
2049 ++NumSubstitute;
2050 return true;
2051 }
2052
2053 // Try to optimize globals based on the knowledge that only one value
2054 // (besides its initializer) is ever stored to the global.
2055 if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
2056 return true;
2057
2058 // Otherwise, if the global was not a boolean, we can shrink it to be a
2059 // boolean.
2060 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
2061 if (GS.Ordering == AtomicOrdering::NotAtomic) {
2062 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
2063 ++NumShrunkToBool;
2064 return true;
2065 }
2066 }
2067 }
2068 }
2069
2070 return false;
2071}
2072
2073/// Analyze the specified global variable and optimize it if possible. If we
2074/// make a change, return true.
2075static bool
2076processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
2077 function_ref<DominatorTree &(Function &)> LookupDomTree) {
2078 if (GV.getName().startswith("llvm."))
2079 return false;
2080
2081 GlobalStatus GS;
2082
2083 if (GlobalStatus::analyzeGlobal(&GV, GS))
2084 return false;
2085
2086 bool Changed = false;
2087 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
2088 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
2089 : GlobalValue::UnnamedAddr::Local;
2090 if (NewUnnamedAddr != GV.getUnnamedAddr()) {
2091 GV.setUnnamedAddr(NewUnnamedAddr);
2092 NumUnnamed++;
2093 Changed = true;
2094 }
2095 }
2096
2097 // Do more involved optimizations if the global is internal.
2098 if (!GV.hasLocalLinkage())
2099 return Changed;
2100
2101 auto *GVar = dyn_cast<GlobalVariable>(&GV);
2102 if (!GVar)
2103 return Changed;
2104
2105 if (GVar->isConstant() || !GVar->hasInitializer())
2106 return Changed;
2107
2108 return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed;
2109}
2110
2111/// Walk all of the direct calls of the specified function, changing them to
2112/// FastCC.
2113static void ChangeCalleesToFastCall(Function *F) {
2114 for (User *U : F->users()) {
2115 if (isa<BlockAddress>(U))
2116 continue;
2117 CallSite CS(cast<Instruction>(U));
2118 CS.setCallingConv(CallingConv::Fast);
2119 }
2120}
2121
2122static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
2123 Attribute::AttrKind A) {
2124 unsigned AttrIndex;
2125 if (Attrs.hasAttrSomewhere(A, &AttrIndex))
2126 return Attrs.removeAttribute(C, AttrIndex, A);
2127 return Attrs;
2128}
2129
2130static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2131 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
2132 for (User *U : F->users()) {
2133 if (isa<BlockAddress>(U))
2134 continue;
2135 CallSite CS(cast<Instruction>(U));
2136 CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
2137 }
2138}
2139
2140/// Return true if this is a calling convention that we'd like to change. The
2141/// idea here is that we don't want to mess with the convention if the user
2142/// explicitly requested something with performance implications like coldcc,
2143/// GHC, or anyregcc.
2144static bool hasChangeableCC(Function *F) {
2145 CallingConv::ID CC = F->getCallingConv();
2146
2147 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2148 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2149 return false;
2150
2151 // FIXME: Change CC for the whole chain of musttail calls when possible.
2152 //
2153 // Can't change CC of the function that either has musttail calls, or is a
2154 // musttail callee itself
2155 for (User *U : F->users()) {
2156 if (isa<BlockAddress>(U))
2157 continue;
2158 CallInst* CI = dyn_cast<CallInst>(U);
2159 if (!CI)
2160 continue;
2161
2162 if (CI->isMustTailCall())
2163 return false;
2164 }
2165
2166 for (BasicBlock &BB : *F)
2167 if (BB.getTerminatingMustTailCall())
2168 return false;
2169
2170 return true;
2171}
2172
2173/// Return true if the block containing the call site has a BlockFrequency of
2174/// less than ColdCCRelFreq% of the entry block.
2175static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
2176 const BranchProbability ColdProb(ColdCCRelFreq, 100);
2177 auto CallSiteBB = CS.getInstruction()->getParent();
2178 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
2179 auto CallerEntryFreq =
2180 CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
2181 return CallSiteFreq < CallerEntryFreq * ColdProb;
2182}
2183
2184// This function checks if the input function F is cold at all call sites. It
2185// also looks each call site's containing function, returning false if the
2186// caller function contains other non cold calls. The input vector AllCallsCold
2187// contains a list of functions that only have call sites in cold blocks.
2188static bool
2189isValidCandidateForColdCC(Function &F,
2190 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2191 const std::vector<Function *> &AllCallsCold) {
2192
2193 if (F.user_empty())
2194 return false;
2195
2196 for (User *U : F.users()) {
2197 if (isa<BlockAddress>(U))
2198 continue;
2199
2200 CallSite CS(cast<Instruction>(U));
2201 Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
2202 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
2203 if (!isColdCallSite(CS, CallerBFI))
2204 return false;
2205 auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
2206 if (It == AllCallsCold.end())
2207 return false;
2208 }
2209 return true;
2210}
2211
2212static void changeCallSitesToColdCC(Function *F) {
2213 for (User *U : F->users()) {
2214 if (isa<BlockAddress>(U))
2215 continue;
2216 CallSite CS(cast<Instruction>(U));
2217 CS.setCallingConv(CallingConv::Cold);
2218 }
2219}
2220
2221// This function iterates over all the call instructions in the input Function
2222// and checks that all call sites are in cold blocks and are allowed to use the
2223// coldcc calling convention.
2224static bool
2225hasOnlyColdCalls(Function &F,
2226 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2227 for (BasicBlock &BB : F) {
2228 for (Instruction &I : BB) {
2229 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2230 CallSite CS(cast<Instruction>(CI));
2231 // Skip over isline asm instructions since they aren't function calls.
2232 if (CI->isInlineAsm())
2233 continue;
2234 Function *CalledFn = CI->getCalledFunction();
2235 if (!CalledFn)
2236 return false;
2237 if (!CalledFn->hasLocalLinkage())
2238 return false;
2239 // Skip over instrinsics since they won't remain as function calls.
2240 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
2241 continue;
2242 // Check if it's valid to use coldcc calling convention.
2243 if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
2244 CalledFn->hasAddressTaken())
2245 return false;
2246 BlockFrequencyInfo &CallerBFI = GetBFI(F);
2247 if (!isColdCallSite(CS, CallerBFI))
2248 return false;
2249 }
2250 }
2251 }
2252 return true;
2253}
2254
2255static bool
2256OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
2257 function_ref<TargetTransformInfo &(Function &)> GetTTI,
2258 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2259 function_ref<DominatorTree &(Function &)> LookupDomTree,
2260 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2261
2262 bool Changed = false;
2263
2264 std::vector<Function *> AllCallsCold;
2265 for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
2266 Function *F = &*FI++;
2267 if (hasOnlyColdCalls(*F, GetBFI))
2268 AllCallsCold.push_back(F);
2269 }
2270
2271 // Optimize functions.
2272 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
2273 Function *F = &*FI++;
2274
2275 // Don't perform global opt pass on naked functions; we don't want fast
2276 // calling conventions for naked functions.
2277 if (F->hasFnAttribute(Attribute::Naked))
2278 continue;
2279
2280 // Functions without names cannot be referenced outside this module.
2281 if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
2282 F->setLinkage(GlobalValue::InternalLinkage);
2283
2284 if (deleteIfDead(*F, NotDiscardableComdats)) {
2285 Changed = true;
2286 continue;
2287 }
2288
2289 // LLVM's definition of dominance allows instructions that are cyclic
2290 // in unreachable blocks, e.g.:
2291 // %pat = select i1 %condition, @global, i16* %pat
2292 // because any instruction dominates an instruction in a block that's
2293 // not reachable from entry.
2294 // So, remove unreachable blocks from the function, because a) there's
2295 // no point in analyzing them and b) GlobalOpt should otherwise grow
2296 // some more complicated logic to break these cycles.
2297 // Removing unreachable blocks might invalidate the dominator so we
2298 // recalculate it.
2299 if (!F->isDeclaration()) {
2300 if (removeUnreachableBlocks(*F)) {
2301 auto &DT = LookupDomTree(*F);
2302 DT.recalculate(*F);
2303 Changed = true;
2304 }
2305 }
2306
2307 Changed |= processGlobal(*F, TLI, LookupDomTree);
2308
2309 if (!F->hasLocalLinkage())
2310 continue;
2311
2312 // If we have an inalloca parameter that we can safely remove the
2313 // inalloca attribute from, do so. This unlocks optimizations that
2314 // wouldn't be safe in the presence of inalloca.
2315 // FIXME: We should also hoist alloca affected by this to the entry
2316 // block if possible.
2317 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2318 !F->hasAddressTaken()) {
2319 RemoveAttribute(F, Attribute::InAlloca);
2320 Changed = true;
2321 }
2322
2323 if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
2324 NumInternalFunc++;
2325 TargetTransformInfo &TTI = GetTTI(*F);
2326 // Change the calling convention to coldcc if either stress testing is
2327 // enabled or the target would like to use coldcc on functions which are
2328 // cold at all call sites and the callers contain no other non coldcc
2329 // calls.
2330 if (EnableColdCCStressTest ||
2331 (TTI.useColdCCForColdCall(*F) &&
2332 isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
2333 F->setCallingConv(CallingConv::Cold);
2334 changeCallSitesToColdCC(F);
2335 Changed = true;
2336 NumColdCC++;
2337 }
2338 }
2339
2340 if (hasChangeableCC(F) && !F->isVarArg() &&
2341 !F->hasAddressTaken()) {
2342 // If this function has a calling convention worth changing, is not a
2343 // varargs function, and is only called directly, promote it to use the
2344 // Fast calling convention.
2345 F->setCallingConv(CallingConv::Fast);
2346 ChangeCalleesToFastCall(F);
2347 ++NumFastCallFns;
2348 Changed = true;
2349 }
2350
2351 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2352 !F->hasAddressTaken()) {
2353 // The function is not used by a trampoline intrinsic, so it is safe
2354 // to remove the 'nest' attribute.
2355 RemoveAttribute(F, Attribute::Nest);
2356 ++NumNestRemoved;
2357 Changed = true;
2358 }
2359 }
2360 return Changed;
2361}
2362
2363static bool
2364OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
2365 function_ref<DominatorTree &(Function &)> LookupDomTree,
2366 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2367 bool Changed = false;
2368
2369 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2370 GVI != E; ) {
2371 GlobalVariable *GV = &*GVI++;
2372 // Global variables without names cannot be referenced outside this module.
2373 if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
2374 GV->setLinkage(GlobalValue::InternalLinkage);
2375 // Simplify the initializer.
2376 if (GV->hasInitializer())
2377 if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
2378 auto &DL = M.getDataLayout();
2379 Constant *New = ConstantFoldConstant(C, DL, TLI);
2380 if (New && New != C)
2381 GV->setInitializer(New);
2382 }
2383
2384 if (deleteIfDead(*GV, NotDiscardableComdats)) {
2385 Changed = true;
2386 continue;
2387 }
2388
2389 Changed |= processGlobal(*GV, TLI, LookupDomTree);
2390 }
2391 return Changed;
2392}
2393
2394/// Evaluate a piece of a constantexpr store into a global initializer. This
2395/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
2396/// GEP operands of Addr [0, OpNo) have been stepped into.
2397static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2398 ConstantExpr *Addr, unsigned OpNo) {
2399 // Base case of the recursion.
2400 if (OpNo == Addr->getNumOperands()) {
2401 assert(Val->getType() == Init->getType() && "Type mismatch!")((Val->getType() == Init->getType() && "Type mismatch!"
) ? static_cast<void> (0) : __assert_fail ("Val->getType() == Init->getType() && \"Type mismatch!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2401, __PRETTY_FUNCTION__))
;
2402 return Val;
2403 }
2404
2405 SmallVector<Constant*, 32> Elts;
2406 if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2407 // Break up the constant into its elements.
2408 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2409 Elts.push_back(Init->getAggregateElement(i));
2410
2411 // Replace the element that we are supposed to.
2412 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2413 unsigned Idx = CU->getZExtValue();
2414 assert(Idx < STy->getNumElements() && "Struct index out of range!")((Idx < STy->getNumElements() && "Struct index out of range!"
) ? static_cast<void> (0) : __assert_fail ("Idx < STy->getNumElements() && \"Struct index out of range!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2414, __PRETTY_FUNCTION__))
;
2415 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2416
2417 // Return the modified struct.
2418 return ConstantStruct::get(STy, Elts);
2419 }
2420
2421 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2422 SequentialType *InitTy = cast<SequentialType>(Init->getType());
2423 uint64_t NumElts = InitTy->getNumElements();
2424
2425 // Break up the array into elements.
2426 for (uint64_t i = 0, e = NumElts; i != e; ++i)
2427 Elts.push_back(Init->getAggregateElement(i));
2428
2429 assert(CI->getZExtValue() < NumElts)((CI->getZExtValue() < NumElts) ? static_cast<void>
(0) : __assert_fail ("CI->getZExtValue() < NumElts", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2429, __PRETTY_FUNCTION__))
;
2430 Elts[CI->getZExtValue()] =
2431 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2432
2433 if (Init->getType()->isArrayTy())
2434 return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2435 return ConstantVector::get(Elts);
2436}
2437
2438/// We have decided that Addr (which satisfies the predicate
2439/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
2440static void CommitValueTo(Constant *Val, Constant *Addr) {
2441 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2442 assert(GV->hasInitializer())((GV->hasInitializer()) ? static_cast<void> (0) : __assert_fail
("GV->hasInitializer()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2442, __PRETTY_FUNCTION__))
;
2443 GV->setInitializer(Val);
2444 return;
2445 }
2446
2447 ConstantExpr *CE = cast<ConstantExpr>(Addr);
2448 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2449 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2450}
2451
2452/// Given a map of address -> value, where addresses are expected to be some form
2453/// of either a global or a constant GEP, set the initializer for the address to
2454/// be the value. This performs mostly the same function as CommitValueTo()
2455/// and EvaluateStoreInto() but is optimized to be more efficient for the common
2456/// case where the set of addresses are GEPs sharing the same underlying global,
2457/// processing the GEPs in batches rather than individually.
2458///
2459/// To give an example, consider the following C++ code adapted from the clang
2460/// regression tests:
2461/// struct S {
2462/// int n = 10;
2463/// int m = 2 * n;
2464/// S(int a) : n(a) {}
2465/// };
2466///
2467/// template<typename T>
2468/// struct U {
2469/// T *r = &q;
2470/// T q = 42;
2471/// U *p = this;
2472/// };
2473///
2474/// U<S> e;
2475///
2476/// The global static constructor for 'e' will need to initialize 'r' and 'p' of
2477/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
2478/// members. This batch algorithm will simply use general CommitValueTo() method
2479/// to handle the complex nested S struct initialization of 'q', before
2480/// processing the outermost members in a single batch. Using CommitValueTo() to
2481/// handle member in the outer struct is inefficient when the struct/array is
2482/// very large as we end up creating and destroy constant arrays for each
2483/// initialization.
2484/// For the above case, we expect the following IR to be generated:
2485///
2486/// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
2487/// %struct.S = type { i32, i32 }
2488/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
2489/// i64 0, i32 1),
2490/// %struct.S { i32 42, i32 84 }, %struct.U* @e }
2491/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
2492/// constant expression, while the other two elements of @e are "simple".
2493static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
2494 SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs;
2495 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs;
2496 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs;
2497 SimpleCEs.reserve(Mem.size());
2498
2499 for (const auto &I : Mem) {
2500 if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
2501 GVs.push_back(std::make_pair(GV, I.second));
2502 } else {
2503 ConstantExpr *GEP = cast<ConstantExpr>(I.first);
2504 // We don't handle the deeply recursive case using the batch method.
2505 if (GEP->getNumOperands() > 3)
2506 ComplexCEs.push_back(std::make_pair(GEP, I.second));
2507 else
2508 SimpleCEs.push_back(std::make_pair(GEP, I.second));
2509 }
2510 }
2511
2512 // The algorithm below doesn't handle cases like nested structs, so use the
2513 // slower fully general method if we have to.
2514 for (auto ComplexCE : ComplexCEs)
6
Assuming '__begin1' is equal to '__end1'
2515 CommitValueTo(ComplexCE.second, ComplexCE.first);
2516
2517 for (auto GVPair : GVs) {
7
Assuming '__begin1' is equal to '__end1'
2518 assert(GVPair.first->hasInitializer())((GVPair.first->hasInitializer()) ? static_cast<void>
(0) : __assert_fail ("GVPair.first->hasInitializer()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2518, __PRETTY_FUNCTION__))
;
2519 GVPair.first->setInitializer(GVPair.second);
2520 }
2521
2522 if (SimpleCEs.empty())
8
Taking false branch
2523 return;
2524
2525 // We cache a single global's initializer elements in the case where the
2526 // subsequent address/val pair uses the same one. This avoids throwing away and
2527 // rebuilding the constant struct/vector/array just because one element is
2528 // modified at a time.
2529 SmallVector<Constant *, 32> Elts;
2530 Elts.reserve(SimpleCEs.size());
2531 GlobalVariable *CurrentGV = nullptr;
9
'CurrentGV' initialized to a null pointer value
2532
2533 auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
2534 Constant *Init = GV->getInitializer();
13
Called C++ object pointer is null
2535 Type *Ty = Init->getType();
2536 if (Update) {
2537 if (CurrentGV) {
2538 assert(CurrentGV && "Expected a GV to commit to!")((CurrentGV && "Expected a GV to commit to!") ? static_cast
<void> (0) : __assert_fail ("CurrentGV && \"Expected a GV to commit to!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2538, __PRETTY_FUNCTION__))
;
2539 Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
2540 // We have a valid cache that needs to be committed.
2541 if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
2542 CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
2543 else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
2544 CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
2545 else
2546 CurrentGV->setInitializer(ConstantVector::get(Elts));
2547 }
2548 if (CurrentGV == GV)
2549 return;
2550 // Need to clear and set up cache for new initializer.
2551 CurrentGV = GV;
2552 Elts.clear();
2553 unsigned NumElts;
2554 if (auto *STy = dyn_cast<StructType>(Ty))
2555 NumElts = STy->getNumElements();
2556 else
2557 NumElts = cast<SequentialType>(Ty)->getNumElements();
2558 for (unsigned i = 0, e = NumElts; i != e; ++i)
2559 Elts.push_back(Init->getAggregateElement(i));
2560 }
2561 };
2562
2563 for (auto CEPair : SimpleCEs) {
10
Assuming '__begin1' is equal to '__end1'
2564 ConstantExpr *GEP = CEPair.first;
2565 Constant *Val = CEPair.second;
2566
2567 GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
2568 commitAndSetupCache(GV, GV != CurrentGV);
2569 ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
2570 Elts[CI->getZExtValue()] = Val;
2571 }
2572 // The last initializer in the list needs to be committed, others
2573 // will be committed on a new initializer being processed.
2574 commitAndSetupCache(CurrentGV, true);
11
Passing null pointer value via 1st parameter 'GV'
12
Calling 'operator()'
2575}
2576
2577/// Evaluate static constructors in the function, if we can. Return true if we
2578/// can, false otherwise.
2579static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2580 TargetLibraryInfo *TLI) {
2581 // Call the function.
2582 Evaluator Eval(DL, TLI);
2583 Constant *RetValDummy;
2584 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2585 SmallVector<Constant*, 0>());
2586
2587 if (EvalSuccess) {
1
Assuming 'EvalSuccess' is not equal to 0
2
Taking true branch
2588 ++NumCtorsEvaluated;
2589
2590 // We succeeded at evaluation: commit the result.
2591 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory
().size() << " stores.\n"; } } while (false)
3
Assuming 'DebugFlag' is 0
4
Loop condition is false. Exiting loop
2592 << F->getName() << "' to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory
().size() << " stores.\n"; } } while (false)
2593 << Eval.getMutatedMemory().size() << " stores.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("globalopt")) { dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory
().size() << " stores.\n"; } } while (false)
;
2594 BatchCommitValueTo(Eval.getMutatedMemory());
5
Calling 'BatchCommitValueTo'
2595 for (GlobalVariable *GV : Eval.getInvariants())
2596 GV->setConstant(true);
2597 }
2598
2599 return EvalSuccess;
2600}
2601
2602static int compareNames(Constant *const *A, Constant *const *B) {
2603 Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
2604 Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
2605 return AStripped->getName().compare(BStripped->getName());
2606}
2607
2608static void setUsedInitializer(GlobalVariable &V,
2609 const SmallPtrSetImpl<GlobalValue *> &Init) {
2610 if (Init.empty()) {
2611 V.eraseFromParent();
2612 return;
2613 }
2614
2615 // Type of pointer to the array of pointers.
2616 PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2617
2618 SmallVector<Constant *, 8> UsedArray;
2619 for (GlobalValue *GV : Init) {
2620 Constant *Cast
2621 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2622 UsedArray.push_back(Cast);
2623 }
2624 // Sort to get deterministic order.
2625 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2626 ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2627
2628 Module *M = V.getParent();
2629 V.removeFromParent();
2630 GlobalVariable *NV =
2631 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2632 ConstantArray::get(ATy, UsedArray), "");
2633 NV->takeName(&V);
2634 NV->setSection("llvm.metadata");
2635 delete &V;
2636}
2637
2638namespace {
2639
2640/// An easy to access representation of llvm.used and llvm.compiler.used.
2641class LLVMUsed {
2642 SmallPtrSet<GlobalValue *, 8> Used;
2643 SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2644 GlobalVariable *UsedV;
2645 GlobalVariable *CompilerUsedV;
2646
2647public:
2648 LLVMUsed(Module &M) {
2649 UsedV = collectUsedGlobalVariables(M, Used, false);
2650 CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2651 }
2652
2653 using iterator = SmallPtrSet<GlobalValue *, 8>::iterator;
2654 using used_iterator_range = iterator_range<iterator>;
2655
2656 iterator usedBegin() { return Used.begin(); }
2657 iterator usedEnd() { return Used.end(); }
2658
2659 used_iterator_range used() {
2660 return used_iterator_range(usedBegin(), usedEnd());
2661 }
2662
2663 iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2664 iterator compilerUsedEnd() { return CompilerUsed.end(); }
2665
2666 used_iterator_range compilerUsed() {
2667 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2668 }
2669
2670 bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2671
2672 bool compilerUsedCount(GlobalValue *GV) const {
2673 return CompilerUsed.count(GV);
2674 }
2675
2676 bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2677 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2678 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2679
2680 bool compilerUsedInsert(GlobalValue *GV) {
2681 return CompilerUsed.insert(GV).second;
2682 }
2683
2684 void syncVariablesAndSets() {
2685 if (UsedV)
2686 setUsedInitializer(*UsedV, Used);
2687 if (CompilerUsedV)
2688 setUsedInitializer(*CompilerUsedV, CompilerUsed);
2689 }
2690};
2691
2692} // end anonymous namespace
2693
2694static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2695 if (GA.use_empty()) // No use at all.
2696 return false;
2697
2698 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&(((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2700, __PRETTY_FUNCTION__))
2699 "We should have removed the duplicated "(((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2700, __PRETTY_FUNCTION__))
2700 "element from llvm.compiler.used")(((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2700, __PRETTY_FUNCTION__))
;
2701 if (!GA.hasOneUse())
2702 // Strictly more than one use. So at least one is not in llvm.used and
2703 // llvm.compiler.used.
2704 return true;
2705
2706 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2707 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2708}
2709
2710static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2711 const LLVMUsed &U) {
2712 unsigned N = 2;
2713 assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&(((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&V) || !U.compilerUsedCount(&V)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2715, __PRETTY_FUNCTION__))
2714 "We should have removed the duplicated "(((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&V) || !U.compilerUsedCount(&V)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2715, __PRETTY_FUNCTION__))
2715 "element from llvm.compiler.used")(((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
"We should have removed the duplicated " "element from llvm.compiler.used"
) ? static_cast<void> (0) : __assert_fail ("(!U.usedCount(&V) || !U.compilerUsedCount(&V)) && \"We should have removed the duplicated \" \"element from llvm.compiler.used\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Transforms/IPO/GlobalOpt.cpp"
, 2715, __PRETTY_FUNCTION__))
;
2716 if (U.usedCount(&V) || U.compilerUsedCount(&V))
2717 ++N;
2718 return V.hasNUsesOrMore(N);
2719}
2720
2721static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2722 if (!GA.hasLocalLinkage())
2723 return true;
2724
2725 return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2726}
2727
2728static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2729 bool &RenameTarget) {
2730 RenameTarget = false;
2731 bool Ret = false;
2732 if (hasUseOtherThanLLVMUsed(GA, U))
2733 Ret = true;
2734
2735 // If the alias is externally visible, we may still be able to simplify it.
2736 if (!mayHaveOtherReferences(GA, U))
2737 return Ret;
2738
2739 // If the aliasee has internal linkage, give it the name and linkage
2740 // of the alias, and delete the alias. This turns:
2741 // define internal ... @f(...)
2742 // @a = alias ... @f
2743 // into:
2744 // define ... @a(...)
2745 Constant *Aliasee = GA.getAliasee();
2746 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2747 if (!Target->hasLocalLinkage())
2748 return Ret;
2749
2750 // Do not perform the transform if multiple aliases potentially target the
2751 // aliasee. This check also ensures that it is safe to replace the section
2752 // and other attributes of the aliasee with those of the alias.
2753 if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2754 return Ret;
2755
2756 RenameTarget = true;
2757 return true;
2758}
2759
2760static bool
2761OptimizeGlobalAliases(Module &M,
2762 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2763 bool Changed = false;
2764 LLVMUsed Used(M);
2765
2766 for (GlobalValue *GV : Used.used())
2767 Used.compilerUsedErase(GV);
2768
2769 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2770 I != E;) {
2771 GlobalAlias *J = &*I++;
2772
2773 // Aliases without names cannot be referenced outside this module.
2774 if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2775 J->setLinkage(GlobalValue::InternalLinkage);
2776
2777 if (deleteIfDead(*J, NotDiscardableComdats)) {
2778 Changed = true;
2779 continue;
2780 }
2781
2782 // If the alias can change at link time, nothing can be done - bail out.
2783 if (J->isInterposable())
2784 continue;
2785
2786 Constant *Aliasee = J->getAliasee();
2787 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2788 // We can't trivially replace the alias with the aliasee if the aliasee is
2789 // non-trivial in some way.
2790 // TODO: Try to handle non-zero GEPs of local aliasees.
2791 if (!Target)
2792 continue;
2793 Target->removeDeadConstantUsers();
2794
2795 // Make all users of the alias use the aliasee instead.
2796 bool RenameTarget;
2797 if (!hasUsesToReplace(*J, Used, RenameTarget))
2798 continue;
2799
2800 J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2801 ++NumAliasesResolved;
2802 Changed = true;
2803
2804 if (RenameTarget) {
2805 // Give the aliasee the name, linkage and other attributes of the alias.
2806 Target->takeName(&*J);
2807 Target->setLinkage(J->getLinkage());
2808 Target->setDSOLocal(J->isDSOLocal());
2809 Target->setVisibility(J->getVisibility());
2810 Target->setDLLStorageClass(J->getDLLStorageClass());
2811
2812 if (Used.usedErase(&*J))
2813 Used.usedInsert(Target);
2814
2815 if (Used.compilerUsedErase(&*J))
2816 Used.compilerUsedInsert(Target);
2817 } else if (mayHaveOtherReferences(*J, Used))
2818 continue;
2819
2820 // Delete the alias.
2821 M.getAliasList().erase(J);
2822 ++NumAliasesRemoved;
2823 Changed = true;
2824 }
2825
2826 Used.syncVariablesAndSets();
2827
2828 return Changed;
2829}
2830
2831static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
2832 LibFunc F = LibFunc_cxa_atexit;
2833 if (!TLI->has(F))
2834 return nullptr;
2835
2836 Function *Fn = M.getFunction(TLI->getName(F));
2837 if (!Fn)
2838 return nullptr;
2839
2840 // Make sure that the function has the correct prototype.
2841 if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2842 return nullptr;
2843
2844 return Fn;
2845}
2846
2847/// Returns whether the given function is an empty C++ destructor and can
2848/// therefore be eliminated.
2849/// Note that we assume that other optimization passes have already simplified
2850/// the code so we simply check for 'ret'.
2851static bool cxxDtorIsEmpty(const Function &Fn) {
2852 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2853 // nounwind, but that doesn't seem worth doing.
2854 if (Fn.isDeclaration())
2855 return false;
2856
2857 for (auto &I : Fn.getEntryBlock()) {
2858 if (isa<DbgInfoIntrinsic>(I))
2859 continue;
2860 if (isa<ReturnInst>(I))
2861 return true;
2862 break;
2863 }
2864 return false;
2865}
2866
2867static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2868 /// Itanium C++ ABI p3.3.5:
2869 ///
2870 /// After constructing a global (or local static) object, that will require
2871 /// destruction on exit, a termination function is registered as follows:
2872 ///
2873 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2874 ///
2875 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2876 /// call f(p) when DSO d is unloaded, before all such termination calls
2877 /// registered before this one. It returns zero if registration is
2878 /// successful, nonzero on failure.
2879
2880 // This pass will look for calls to __cxa_atexit where the function is trivial
2881 // and remove them.
2882 bool Changed = false;
2883
2884 for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2885 I != E;) {
2886 // We're only interested in calls. Theoretically, we could handle invoke
2887 // instructions as well, but neither llvm-gcc nor clang generate invokes
2888 // to __cxa_atexit.
2889 CallInst *CI = dyn_cast<CallInst>(*I++);
2890 if (!CI)
2891 continue;
2892
2893 Function *DtorFn =
2894 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2895 if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2896 continue;
2897
2898 // Just remove the call.
2899 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2900 CI->eraseFromParent();
2901
2902 ++NumCXXDtorsRemoved;
2903
2904 Changed |= true;
2905 }
2906
2907 return Changed;
2908}
2909
2910static bool optimizeGlobalsInModule(
2911 Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
2912 function_ref<TargetTransformInfo &(Function &)> GetTTI,
2913 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2914 function_ref<DominatorTree &(Function &)> LookupDomTree) {
2915 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2916 bool Changed = false;
2917 bool LocalChange = true;
2918 while (LocalChange) {
2919 LocalChange = false;
2920
2921 NotDiscardableComdats.clear();
2922 for (const GlobalVariable &GV : M.globals())
2923 if (const Comdat *C = GV.getComdat())
2924 if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2925 NotDiscardableComdats.insert(C);
2926 for (Function &F : M)
2927 if (const Comdat *C = F.getComdat())
2928 if (!F.isDefTriviallyDead())
2929 NotDiscardableComdats.insert(C);
2930 for (GlobalAlias &GA : M.aliases())
2931 if (const Comdat *C = GA.getComdat())
2932 if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2933 NotDiscardableComdats.insert(C);
2934
2935 // Delete functions that are trivially dead, ccc -> fastcc
2936 LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
2937 NotDiscardableComdats);
2938
2939 // Optimize global_ctors list.
2940 LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2941 return EvaluateStaticConstructor(F, DL, TLI);
2942 });
2943
2944 // Optimize non-address-taken globals.
2945 LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
2946 NotDiscardableComdats);
2947
2948 // Resolve aliases, when possible.
2949 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2950
2951 // Try to remove trivial global destructors if they are not removed
2952 // already.
2953 Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
2954 if (CXAAtExitFn)
2955 LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2956
2957 Changed |= LocalChange;
2958 }
2959
2960 // TODO: Move all global ctors functions to the end of the module for code
2961 // layout.
2962
2963 return Changed;
2964}
2965
2966PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2967 auto &DL = M.getDataLayout();
2968 auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
2969 auto &FAM =
2970 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2971 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2972 return FAM.getResult<DominatorTreeAnalysis>(F);
2973 };
2974 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2975 return FAM.getResult<TargetIRAnalysis>(F);
2976 };
2977
2978 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2979 return FAM.getResult<BlockFrequencyAnalysis>(F);
2980 };
2981
2982 if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
2983 return PreservedAnalyses::all();
2984 return PreservedAnalyses::none();
2985}
2986
2987namespace {
2988
2989struct GlobalOptLegacyPass : public ModulePass {
2990 static char ID; // Pass identification, replacement for typeid
2991
2992 GlobalOptLegacyPass() : ModulePass(ID) {
2993 initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
2994 }
2995
2996 bool runOnModule(Module &M) override {
2997 if (skipModule(M))
2998 return false;
2999
3000 auto &DL = M.getDataLayout();
3001 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
3002 auto LookupDomTree = [this](Function &F) -> DominatorTree & {
3003 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
3004 };
3005 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
3006 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3007 };
3008
3009 auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
3010 return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
3011 };
3012
3013 return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
3014 }
3015
3016 void getAnalysisUsage(AnalysisUsage &AU) const override {
3017 AU.addRequired<TargetLibraryInfoWrapperPass>();
3018 AU.addRequired<TargetTransformInfoWrapperPass>();
3019 AU.addRequired<DominatorTreeWrapperPass>();
3020 AU.addRequired<BlockFrequencyInfoWrapperPass>();
3021 }
3022};
3023
3024} // end anonymous namespace
3025
3026char GlobalOptLegacyPass::ID = 0;
3027
3028INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry
&Registry) {
3029 "Global Variable Optimizer", false, false)static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry
&Registry) {
3030INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
3031INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3032INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry);
3033INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3034INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt"
, &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<GlobalOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag
; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag
, initializeGlobalOptLegacyPassPassOnce, std::ref(Registry));
}
3035 "Global Variable Optimizer", false, false)PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt"
, &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<GlobalOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag
; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag
, initializeGlobalOptLegacyPassPassOnce, std::ref(Registry));
}
3036
3037ModulePass *llvm::createGlobalOptimizerPass() {
3038 return new GlobalOptLegacyPass();
3039}