LLVM  14.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1 //===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges globals with internal linkage into one. This way all the
10 // globals which were merged into a biggest one can be addressed using offsets
11 // from the same base pointer (no need for separate base pointer for each of the
12 // global). Such a transformation can significantly reduce the register pressure
13 // when many globals are involved.
14 //
15 // For example, consider the code which touches several global variables at
16 // once:
17 //
18 // static int foo[N], bar[N], baz[N];
19 //
20 // for (i = 0; i < N; ++i) {
21 // foo[i] = bar[i] * baz[i];
22 // }
23 //
24 // On ARM the addresses of 3 arrays should be kept in the registers, thus
25 // this code has quite large register pressure (loop body):
26 //
27 // ldr r1, [r5], #4
28 // ldr r2, [r6], #4
29 // mul r1, r2, r1
30 // str r1, [r0], #4
31 //
32 // Pass converts the code to something like:
33 //
34 // static struct {
35 // int foo[N];
36 // int bar[N];
37 // int baz[N];
38 // } merged;
39 //
40 // for (i = 0; i < N; ++i) {
41 // merged.foo[i] = merged.bar[i] * merged.baz[i];
42 // }
43 //
44 // and in ARM code this becomes:
45 //
46 // ldr r0, [r5, #40]
47 // ldr r1, [r5, #80]
48 // mul r0, r1, r0
49 // str r0, [r5], #4
50 //
51 // note that we saved 2 registers here almostly "for free".
52 //
53 // However, merging globals can have tradeoffs:
54 // - it confuses debuggers, tools, and users
55 // - it makes linker optimizations less useful (order files, LOHs, ...)
56 // - it forces usage of indexed addressing (which isn't necessarily "free")
57 // - it can increase register pressure when the uses are disparate enough.
58 //
59 // We use heuristics to discover the best global grouping we can (cf cl::opts).
60 //
61 // ===---------------------------------------------------------------------===//
62 
63 #include "llvm/ADT/BitVector.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/SmallPtrSet.h"
66 #include "llvm/ADT/SmallVector.h"
67 #include "llvm/ADT/Statistic.h"
68 #include "llvm/ADT/StringRef.h"
69 #include "llvm/ADT/Triple.h"
70 #include "llvm/ADT/Twine.h"
71 #include "llvm/CodeGen/Passes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/Constants.h"
74 #include "llvm/IR/DataLayout.h"
75 #include "llvm/IR/DerivedTypes.h"
76 #include "llvm/IR/Function.h"
77 #include "llvm/IR/GlobalAlias.h"
78 #include "llvm/IR/GlobalValue.h"
79 #include "llvm/IR/GlobalVariable.h"
80 #include "llvm/IR/Instruction.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/Type.h"
83 #include "llvm/IR/Use.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/InitializePasses.h"
86 #include "llvm/MC/SectionKind.h"
87 #include "llvm/Pass.h"
88 #include "llvm/Support/Casting.h"
90 #include "llvm/Support/Debug.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstddef>
97 #include <cstdint>
98 #include <string>
99 #include <vector>
100 
101 using namespace llvm;
102 
103 #define DEBUG_TYPE "global-merge"
104 
105 // FIXME: This is only useful as a last-resort way to disable the pass.
106 static cl::opt<bool>
107 EnableGlobalMerge("enable-global-merge", cl::Hidden,
108  cl::desc("Enable the global merge pass"),
109  cl::init(true));
110 
111 static cl::opt<unsigned>
112 GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
113  cl::desc("Set maximum offset for global merge pass"),
114  cl::init(0));
115 
117  "global-merge-group-by-use", cl::Hidden,
118  cl::desc("Improve global merge pass to look at uses"), cl::init(true));
119 
121  "global-merge-ignore-single-use", cl::Hidden,
122  cl::desc("Improve global merge pass to ignore globals only used alone"),
123  cl::init(true));
124 
125 static cl::opt<bool>
126 EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
127  cl::desc("Enable global merge pass on constants"),
128  cl::init(false));
129 
130 // FIXME: this could be a transitional option, and we probably need to remove
131 // it if only we are sure this optimization could always benefit all targets.
133 EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
134  cl::desc("Enable global merge pass on external linkage"));
135 
136 STATISTIC(NumMerged, "Number of globals merged");
137 
138 namespace {
139 
140  class GlobalMerge : public FunctionPass {
141  const TargetMachine *TM = nullptr;
142 
143  // FIXME: Infer the maximum possible offset depending on the actual users
144  // (these max offsets are different for the users inside Thumb or ARM
145  // functions), see the code that passes in the offset in the ARM backend
146  // for more information.
147  unsigned MaxOffset;
148 
149  /// Whether we should try to optimize for size only.
150  /// Currently, this applies a dead simple heuristic: only consider globals
151  /// used in minsize functions for merging.
152  /// FIXME: This could learn about optsize, and be used in the cost model.
153  bool OnlyOptimizeForSize = false;
154 
155  /// Whether we should merge global variables that have external linkage.
156  bool MergeExternalGlobals = false;
157 
158  bool IsMachO;
159 
160  bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
161  Module &M, bool isConst, unsigned AddrSpace) const;
162 
163  /// Merge everything in \p Globals for which the corresponding bit
164  /// in \p GlobalSet is set.
165  bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
166  const BitVector &GlobalSet, Module &M, bool isConst,
167  unsigned AddrSpace) const;
168 
169  /// Check if the given variable has been identified as must keep
170  /// \pre setMustKeepGlobalVariables must have been called on the Module that
171  /// contains GV
172  bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
173  return MustKeepGlobalVariables.count(GV);
174  }
175 
176  /// Collect every variables marked as "used" or used in a landing pad
177  /// instruction for this Module.
178  void setMustKeepGlobalVariables(Module &M);
179 
180  /// Collect every variables marked as "used"
182 
183  /// Keep track of the GlobalVariable that must not be merged away
184  SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
185 
186  public:
187  static char ID; // Pass identification, replacement for typeid.
188 
189  explicit GlobalMerge()
190  : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
192  }
193 
194  explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
195  bool OnlyOptimizeForSize, bool MergeExternalGlobals)
196  : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
197  OnlyOptimizeForSize(OnlyOptimizeForSize),
198  MergeExternalGlobals(MergeExternalGlobals) {
200  }
201 
202  bool doInitialization(Module &M) override;
203  bool runOnFunction(Function &F) override;
204  bool doFinalization(Module &M) override;
205 
206  StringRef getPassName() const override { return "Merge internal globals"; }
207 
208  void getAnalysisUsage(AnalysisUsage &AU) const override {
209  AU.setPreservesCFG();
211  }
212  };
213 
214 } // end anonymous namespace
215 
216 char GlobalMerge::ID = 0;
217 
218 INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
219 
220 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
221  Module &M, bool isConst, unsigned AddrSpace) const {
222  auto &DL = M.getDataLayout();
223  // FIXME: Find better heuristics
225  Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
226  // We don't support scalable global variables.
227  return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
228  DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
229  });
230 
231  // If we want to just blindly group all globals together, do so.
232  if (!GlobalMergeGroupByUse) {
233  BitVector AllGlobals(Globals.size());
234  AllGlobals.set();
235  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
236  }
237 
238  // If we want to be smarter, look at all uses of each global, to try to
239  // discover all sets of globals used together, and how many times each of
240  // these sets occurred.
241  //
242  // Keep this reasonably efficient, by having an append-only list of all sets
243  // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
244  // code (currently, a Function) to the set of globals seen so far that are
245  // used together in that unit (GlobalUsesByFunction).
246  //
247  // When we look at the Nth global, we know that any new set is either:
248  // - the singleton set {N}, containing this global only, or
249  // - the union of {N} and a previously-discovered set, containing some
250  // combination of the previous N-1 globals.
251  // Using that knowledge, when looking at the Nth global, we can keep:
252  // - a reference to the singleton set {N} (CurGVOnlySetIdx)
253  // - a list mapping each previous set to its union with {N} (EncounteredUGS),
254  // if it actually occurs.
255 
256  // We keep track of the sets of globals used together "close enough".
257  struct UsedGlobalSet {
258  BitVector Globals;
259  unsigned UsageCount = 1;
260 
261  UsedGlobalSet(size_t Size) : Globals(Size) {}
262  };
263 
264  // Each set is unique in UsedGlobalSets.
265  std::vector<UsedGlobalSet> UsedGlobalSets;
266 
267  // Avoid repeating the create-global-set pattern.
268  auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
269  UsedGlobalSets.emplace_back(Globals.size());
270  return UsedGlobalSets.back();
271  };
272 
273  // The first set is the empty set.
274  CreateGlobalSet().UsageCount = 0;
275 
276  // We define "close enough" to be "in the same function".
277  // FIXME: Grouping uses by function is way too aggressive, so we should have
278  // a better metric for distance between uses.
279  // The obvious alternative would be to group by BasicBlock, but that's in
280  // turn too conservative..
281  // Anything in between wouldn't be trivial to compute, so just stick with
282  // per-function grouping.
283 
284  // The value type is an index into UsedGlobalSets.
285  // The default (0) conveniently points to the empty set.
286  DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
287 
288  // Now, look at each merge-eligible global in turn.
289 
290  // Keep track of the sets we already encountered to which we added the
291  // current global.
292  // Each element matches the same-index element in UsedGlobalSets.
293  // This lets us efficiently tell whether a set has already been expanded to
294  // include the current global.
295  std::vector<size_t> EncounteredUGS;
296 
297  for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
298  GlobalVariable *GV = Globals[GI];
299 
300  // Reset the encountered sets for this global...
301  std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
302  // ...and grow it in case we created new sets for the previous global.
303  EncounteredUGS.resize(UsedGlobalSets.size());
304 
305  // We might need to create a set that only consists of the current global.
306  // Keep track of its index into UsedGlobalSets.
307  size_t CurGVOnlySetIdx = 0;
308 
309  // For each global, look at all its Uses.
310  for (auto &U : GV->uses()) {
311  // This Use might be a ConstantExpr. We're interested in Instruction
312  // users, so look through ConstantExpr...
313  Use *UI, *UE;
314  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
315  if (CE->use_empty())
316  continue;
317  UI = &*CE->use_begin();
318  UE = nullptr;
319  } else if (isa<Instruction>(U.getUser())) {
320  UI = &U;
321  UE = UI->getNext();
322  } else {
323  continue;
324  }
325 
326  // ...to iterate on all the instruction users of the global.
327  // Note that we iterate on Uses and not on Users to be able to getNext().
328  for (; UI != UE; UI = UI->getNext()) {
329  Instruction *I = dyn_cast<Instruction>(UI->getUser());
330  if (!I)
331  continue;
332 
333  Function *ParentFn = I->getParent()->getParent();
334 
335  // If we're only optimizing for size, ignore non-minsize functions.
336  if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
337  continue;
338 
339  size_t UGSIdx = GlobalUsesByFunction[ParentFn];
340 
341  // If this is the first global the basic block uses, map it to the set
342  // consisting of this global only.
343  if (!UGSIdx) {
344  // If that set doesn't exist yet, create it.
345  if (!CurGVOnlySetIdx) {
346  CurGVOnlySetIdx = UsedGlobalSets.size();
347  CreateGlobalSet().Globals.set(GI);
348  } else {
349  ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
350  }
351 
352  GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
353  continue;
354  }
355 
356  // If we already encountered this BB, just increment the counter.
357  if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
358  ++UsedGlobalSets[UGSIdx].UsageCount;
359  continue;
360  }
361 
362  // If not, the previous set wasn't actually used in this function.
363  --UsedGlobalSets[UGSIdx].UsageCount;
364 
365  // If we already expanded the previous set to include this global, just
366  // reuse that expanded set.
367  if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
368  ++UsedGlobalSets[ExpandedIdx].UsageCount;
369  GlobalUsesByFunction[ParentFn] = ExpandedIdx;
370  continue;
371  }
372 
373  // If not, create a new set consisting of the union of the previous set
374  // and this global. Mark it as encountered, so we can reuse it later.
375  GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
376  UsedGlobalSets.size();
377 
378  UsedGlobalSet &NewUGS = CreateGlobalSet();
379  NewUGS.Globals.set(GI);
380  NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
381  }
382  }
383  }
384 
385  // Now we found a bunch of sets of globals used together. We accumulated
386  // the number of times we encountered the sets (i.e., the number of blocks
387  // that use that exact set of globals).
388  //
389  // Multiply that by the size of the set to give us a crude profitability
390  // metric.
391  llvm::stable_sort(UsedGlobalSets,
392  [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
393  return UGS1.Globals.count() * UGS1.UsageCount <
394  UGS2.Globals.count() * UGS2.UsageCount;
395  });
396 
397  // We can choose to merge all globals together, but ignore globals never used
398  // with another global. This catches the obviously non-profitable cases of
399  // having a single global, but is aggressive enough for any other case.
401  BitVector AllGlobals(Globals.size());
402  for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
403  const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
404  if (UGS.UsageCount == 0)
405  continue;
406  if (UGS.Globals.count() > 1)
407  AllGlobals |= UGS.Globals;
408  }
409  return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
410  }
411 
412  // Starting from the sets with the best (=biggest) profitability, find a
413  // good combination.
414  // The ideal (and expensive) solution can only be found by trying all
415  // combinations, looking for the one with the best profitability.
416  // Don't be smart about it, and just pick the first compatible combination,
417  // starting with the sets with the best profitability.
418  BitVector PickedGlobals(Globals.size());
419  bool Changed = false;
420 
421  for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
422  const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
423  if (UGS.UsageCount == 0)
424  continue;
425  if (PickedGlobals.anyCommon(UGS.Globals))
426  continue;
427  PickedGlobals |= UGS.Globals;
428  // If the set only contains one global, there's no point in merging.
429  // Ignore the global for inclusion in other sets though, so keep it in
430  // PickedGlobals.
431  if (UGS.Globals.count() < 2)
432  continue;
433  Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
434  }
435 
436  return Changed;
437 }
438 
439 bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
440  const BitVector &GlobalSet, Module &M, bool isConst,
441  unsigned AddrSpace) const {
442  assert(Globals.size() > 1);
443 
444  Type *Int32Ty = Type::getInt32Ty(M.getContext());
445  Type *Int8Ty = Type::getInt8Ty(M.getContext());
446  auto &DL = M.getDataLayout();
447 
448  LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
449  << GlobalSet.find_first() << "\n");
450 
451  bool Changed = false;
452  ssize_t i = GlobalSet.find_first();
453  while (i != -1) {
454  ssize_t j = 0;
455  uint64_t MergedSize = 0;
456  std::vector<Type*> Tys;
457  std::vector<Constant*> Inits;
458  std::vector<unsigned> StructIdxs;
459 
460  bool HasExternal = false;
461  StringRef FirstExternalName;
462  Align MaxAlign;
463  unsigned CurIdx = 0;
464  for (j = i; j != -1; j = GlobalSet.find_next(j)) {
465  Type *Ty = Globals[j]->getValueType();
466 
467  // Make sure we use the same alignment AsmPrinter would use.
468  Align Alignment = DL.getPreferredAlign(Globals[j]);
469  unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
470  MergedSize += Padding;
471  MergedSize += DL.getTypeAllocSize(Ty);
472  if (MergedSize > MaxOffset) {
473  break;
474  }
475  if (Padding) {
476  Tys.push_back(ArrayType::get(Int8Ty, Padding));
477  Inits.push_back(ConstantAggregateZero::get(Tys.back()));
478  ++CurIdx;
479  }
480  Tys.push_back(Ty);
481  Inits.push_back(Globals[j]->getInitializer());
482  StructIdxs.push_back(CurIdx++);
483 
484  MaxAlign = std::max(MaxAlign, Alignment);
485 
486  if (Globals[j]->hasExternalLinkage() && !HasExternal) {
487  HasExternal = true;
488  FirstExternalName = Globals[j]->getName();
489  }
490  }
491 
492  // Exit early if there is only one global to merge.
493  if (Tys.size() < 2) {
494  i = j;
495  continue;
496  }
497 
498  // If merged variables doesn't have external linkage, we needn't to expose
499  // the symbol after merging.
500  GlobalValue::LinkageTypes Linkage = HasExternal
503  // Use a packed struct so we can control alignment.
504  StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
505  Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
506 
507  // On Darwin external linkage needs to be preserved, otherwise
508  // dsymutil cannot preserve the debug info for the merged
509  // variables. If they have external linkage, use the symbol name
510  // of the first variable merged as the suffix of global symbol
511  // name. This avoids a link-time naming conflict for the
512  // _MergedGlobals symbols.
513  Twine MergedName =
514  (IsMachO && HasExternal)
515  ? "_MergedGlobals_" + FirstExternalName
516  : "_MergedGlobals";
517  auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
518  auto *MergedGV = new GlobalVariable(
519  M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
520  GlobalVariable::NotThreadLocal, AddrSpace);
521 
522  MergedGV->setAlignment(MaxAlign);
523  MergedGV->setSection(Globals[i]->getSection());
524 
525  const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
526  for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
527  GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
528  std::string Name(Globals[k]->getName());
529  GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
531  Globals[k]->getDLLStorageClass();
532 
533  // Copy metadata while adjusting any debug info metadata by the original
534  // global's offset within the merged global.
535  MergedGV->copyMetadata(Globals[k],
536  MergedLayout->getElementOffset(StructIdxs[idx]));
537 
538  Constant *Idx[2] = {
540  ConstantInt::get(Int32Ty, StructIdxs[idx]),
541  };
542  Constant *GEP =
543  ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
544  Globals[k]->replaceAllUsesWith(GEP);
545  Globals[k]->eraseFromParent();
546 
547  // When the linkage is not internal we must emit an alias for the original
548  // variable name as it may be accessed from another object. On non-Mach-O
549  // we can also emit an alias for internal linkage as it's safe to do so.
550  // It's not safe on Mach-O as the alias (and thus the portion of the
551  // MergedGlobals variable) may be dead stripped at link time.
552  if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
553  GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
554  Linkage, Name, GEP, &M);
555  GA->setVisibility(Visibility);
556  GA->setDLLStorageClass(DLLStorage);
557  }
558 
559  NumMerged++;
560  }
561  Changed = true;
562  i = j;
563  }
564 
565  return Changed;
566 }
567 
569  // Extract global variables from llvm.used array
570  const GlobalVariable *GV = M.getGlobalVariable(Name);
571  if (!GV || !GV->hasInitializer()) return;
572 
573  // Should be an array of 'i8*'.
574  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
575 
576  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
577  if (const GlobalVariable *G =
578  dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
579  MustKeepGlobalVariables.insert(G);
580 }
581 
582 void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
583  collectUsedGlobalVariables(M, "llvm.used");
584  collectUsedGlobalVariables(M, "llvm.compiler.used");
585 
586  for (Function &F : M) {
587  for (BasicBlock &BB : F) {
588  Instruction *Pad = BB.getFirstNonPHI();
589  if (!Pad->isEHPad())
590  continue;
591 
592  // Keep globals used by landingpads and catchpads.
593  for (const Use &U : Pad->operands()) {
594  if (const GlobalVariable *GV =
595  dyn_cast<GlobalVariable>(U->stripPointerCasts()))
596  MustKeepGlobalVariables.insert(GV);
597  }
598  }
599  }
600 }
601 
602 bool GlobalMerge::doInitialization(Module &M) {
603  if (!EnableGlobalMerge)
604  return false;
605 
606  IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
607 
608  auto &DL = M.getDataLayout();
610  Globals, ConstGlobals, BSSGlobals;
611  bool Changed = false;
612  setMustKeepGlobalVariables(M);
613 
614  // Grab all non-const globals.
615  for (auto &GV : M.globals()) {
616  // Merge is safe for "normal" internal or external globals only
617  if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
618  continue;
619 
620  // It's not safe to merge globals that may be preempted
621  if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
622  continue;
623 
624  if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
625  !GV.hasInternalLinkage())
626  continue;
627 
628  PointerType *PT = dyn_cast<PointerType>(GV.getType());
629  assert(PT && "Global variable is not a pointer!");
630 
631  unsigned AddressSpace = PT->getAddressSpace();
633 
634  // Ignore all 'special' globals.
635  if (GV.getName().startswith("llvm.") ||
636  GV.getName().startswith(".llvm."))
637  continue;
638 
639  // Ignore all "required" globals:
640  if (isMustKeepGlobalVariable(&GV))
641  continue;
642 
643  Type *Ty = GV.getValueType();
644  if (DL.getTypeAllocSize(Ty) < MaxOffset) {
645  if (TM &&
647  BSSGlobals[{AddressSpace, Section}].push_back(&GV);
648  else if (GV.isConstant())
649  ConstGlobals[{AddressSpace, Section}].push_back(&GV);
650  else
651  Globals[{AddressSpace, Section}].push_back(&GV);
652  }
653  }
654 
655  for (auto &P : Globals)
656  if (P.second.size() > 1)
657  Changed |= doMerge(P.second, M, false, P.first.first);
658 
659  for (auto &P : BSSGlobals)
660  if (P.second.size() > 1)
661  Changed |= doMerge(P.second, M, false, P.first.first);
662 
664  for (auto &P : ConstGlobals)
665  if (P.second.size() > 1)
666  Changed |= doMerge(P.second, M, true, P.first.first);
667 
668  return Changed;
669 }
670 
672  return false;
673 }
674 
675 bool GlobalMerge::doFinalization(Module &M) {
676  MustKeepGlobalVariables.clear();
677  return false;
678 }
679 
681  bool OnlyOptimizeForSize,
682  bool MergeExternalByDefault) {
683  bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
684  MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
685  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
686 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:286
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::initializeGlobalMergePass
void initializeGlobalMergePass(PassRegistry &)
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:408
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:434
llvm::Function
Definition: Function.h:62
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1352
Pass.h
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:112
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
EnableGlobalMergeOnConst
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
llvm::GlobalAlias
Definition: GlobalAlias.h:27
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::object::getSection
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:402
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ConstantExpr::getInBoundsGetElementPtr
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1237
llvm::GlobalValue::LinkageTypes
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
GlobalMergeGroupByUse
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
Use.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:778
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:78
Instruction.h
CommandLine.h
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:843
GlobalValue.h
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::GlobalValue::VisibilityTypes
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:62
Constants.h
Twine.h
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:409
llvm::Triple::isOSBinFormatMachO
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:648
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
BitVector.h
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
SmallPtrSet.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:449
llvm::BitVector
Definition: BitVector.h:74
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetLoweringObjectFile::getKindForGlobal
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Definition: TargetLoweringObjectFile.cpp:202
Type.h
Passes.h
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
BasicBlock.h
llvm::cl::opt< bool >
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition: CommandLine.h:623
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:613
uint64_t
GlobalMergeIgnoreSingleUse
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
llvm::GlobalVariable::hasImplicitSection
bool hasImplicitSection() const
Check if section name is present.
Definition: GlobalVariable.h:243
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
EnableGlobalMerge
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
EnableGlobalMergeOnExternal
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:244
getInitializer
static Constant * getInitializer(Constant *C)
Definition: Evaluator.cpp:204
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:640
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Triple.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
TargetLoweringObjectFile.h
llvm::GlobalValue::setDLLStorageClass
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:265
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
GlobalMergeMaxOffset
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:687
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1686
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:480
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:643
GlobalVariable.h
Casting.h
Function.h
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:661
llvm::BitVector::find_next
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:300
SectionKind.h
GlobalAlias.h
llvm::createGlobalMergePass
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
Definition: GlobalMerge.cpp:680
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::GlobalValue::DLLStorageClassTypes
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:69
llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition: CommandLine.h:623
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:153
llvm::BitVector::find_first
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:292
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:669
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1675
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GlobalMerge.cpp:103
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
InitializePasses.h
Debug.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37