LLVM  14.0.0git
GlobalSplit.cpp
Go to the documentation of this file.
1 //===- GlobalSplit.cpp - global variable splitter -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass uses inrange annotations on GEP indices to split globals where
10 // beneficial. Clang currently attaches these annotations to references to
11 // virtual table globals under the Itanium ABI for the benefit of the
12 // whole-program virtual call optimization and control flow integrity passes.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/GlobalVariable.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Transforms/IPO.h"
36 #include <cstdint>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 static bool splitGlobal(GlobalVariable &GV) {
42  // If the address of the global is taken outside of the module, we cannot
43  // apply this transformation.
44  if (!GV.hasLocalLinkage())
45  return false;
46 
47  // We currently only know how to split ConstantStructs.
48  auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
49  if (!Init)
50  return false;
51 
52  // Verify that each user of the global is an inrange getelementptr constant.
53  // From this it follows that any loads from or stores to that global must use
54  // a pointer derived from an inrange getelementptr constant, which is
55  // sufficient to allow us to apply the splitting transform.
56  for (User *U : GV.users()) {
57  if (!isa<Constant>(U))
58  return false;
59 
60  auto *GEP = dyn_cast<GEPOperator>(U);
61  if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 ||
62  !isa<ConstantInt>(GEP->getOperand(1)) ||
63  !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
64  !isa<ConstantInt>(GEP->getOperand(2)))
65  return false;
66  }
67 
69  GV.getMetadata(LLVMContext::MD_type, Types);
70 
71  const DataLayout &DL = GV.getParent()->getDataLayout();
72  const StructLayout *SL = DL.getStructLayout(Init->getType());
73 
75 
76  std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
77  for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
78  // Build a global representing this split piece.
79  auto *SplitGV =
80  new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
82  Init->getOperand(I), GV.getName() + "." + utostr(I));
83  SplitGlobals[I] = SplitGV;
84 
85  unsigned SplitBegin = SL->getElementOffset(I);
86  unsigned SplitEnd = (I == Init->getNumOperands() - 1)
87  ? SL->getSizeInBytes()
88  : SL->getElementOffset(I + 1);
89 
90  // Rebuild type metadata, adjusting by the split offset.
91  // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
92  for (MDNode *Type : Types) {
93  uint64_t ByteOffset = cast<ConstantInt>(
94  cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
95  ->getZExtValue();
96  // Type metadata may be attached one byte after the end of the vtable, for
97  // classes without virtual methods in Itanium ABI. AFAIK, it is never
98  // attached to the first byte of a vtable. Subtract one to get the right
99  // slice.
100  // This is making an assumption that vtable groups are the only kinds of
101  // global variables that !type metadata can be attached to, and that they
102  // are either Itanium ABI vtable groups or contain a single vtable (i.e.
103  // Microsoft ABI vtables).
104  uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
105  if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
106  continue;
107  SplitGV->addMetadata(
108  LLVMContext::MD_type,
109  *MDNode::get(GV.getContext(),
110  {ConstantAsMetadata::get(
111  ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
112  Type->getOperand(1)}));
113  }
114 
115  if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
116  SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
117  }
118 
119  for (User *U : GV.users()) {
120  auto *GEP = cast<GEPOperator>(U);
121  unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
122  if (I >= SplitGlobals.size())
123  continue;
124 
126  Ops.push_back(ConstantInt::get(Int32Ty, 0));
127  for (unsigned I = 3; I != GEP->getNumOperands(); ++I)
128  Ops.push_back(GEP->getOperand(I));
129 
130  auto *NewGEP = ConstantExpr::getGetElementPtr(
131  SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops,
132  GEP->isInBounds());
133  GEP->replaceAllUsesWith(NewGEP);
134  }
135 
136  // Finally, remove the original global. Any remaining uses refer to invalid
137  // elements of the global, so replace with undef.
138  if (!GV.use_empty())
140  GV.eraseFromParent();
141  return true;
142 }
143 
144 static bool splitGlobals(Module &M) {
145  // First, see if the module uses either of the llvm.type.test or
146  // llvm.type.checked.load intrinsics, which indicates that splitting globals
147  // may be beneficial.
148  Function *TypeTestFunc =
149  M.getFunction(Intrinsic::getName(Intrinsic::type_test));
150  Function *TypeCheckedLoadFunc =
151  M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
152  if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
153  (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
154  return false;
155 
156  bool Changed = false;
157  for (auto I = M.global_begin(); I != M.global_end();) {
158  GlobalVariable &GV = *I;
159  ++I;
160  Changed |= splitGlobal(GV);
161  }
162  return Changed;
163 }
164 
165 namespace {
166 
167 struct GlobalSplit : public ModulePass {
168  static char ID;
169 
170  GlobalSplit() : ModulePass(ID) {
172  }
173 
174  bool runOnModule(Module &M) override {
175  if (skipModule(M))
176  return false;
177 
178  return splitGlobals(M);
179  }
180 };
181 
182 } // end anonymous namespace
183 
184 char GlobalSplit::ID = 0;
185 
186 INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
187 
189  return new GlobalSplit;
190 }
191 
193  if (!splitGlobals(M))
194  return PreservedAnalyses::all();
195  return PreservedAnalyses::none();
196 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:385
llvm::GlobalSplitPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: GlobalSplit.cpp:192
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
Metadata.h
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
GlobalSplit.h
llvm::Function
Definition: Function.h:61
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1197
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
Operator.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1203
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::User
Definition: User.h:44
Intrinsics.h
llvm::createGlobalSplitPass
ModulePass * createGlobalSplitPass()
This pass splits globals into pieces for the benefit of whole-program devirtualization and control-fl...
Definition: GlobalSplit.cpp:188
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
Type.h
splitGlobals
static bool splitGlobals(Module &M)
Definition: GlobalSplit.cpp:144
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
uint64_t
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:604
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
IPO.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
StringExtras.h
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
getInitializer
static Constant * getInitializer(Constant *C)
Definition: Evaluator.cpp:204
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
DataLayout.h
llvm::StructLayout::getSizeInBytes
uint64_t getSizeInBytes() const
Definition: DataLayout.h:611
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:582
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::initializeGlobalSplitPass
void initializeGlobalSplitPass(PassRegistry &)
llvm::Init
Definition: Record.h:271
Constant.h
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:634
GlobalVariable.h
Casting.h
Function.h
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1210
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
SmallVector.h
User.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:153
llvm::GlobalObject::getVCallVisibility
VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1517
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
LLVMContext.h
InitializePasses.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
splitGlobal
static bool splitGlobal(GlobalVariable &GV)
Definition: GlobalSplit.cpp:41
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37