File: | lib/Transforms/IPO/WholeProgramDevirt.cpp |
Warning: | line 966, column 20 Access to field 'TheKind' results in a dereference of a null pointer (loaded from variable 'Res') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This pass implements whole program optimization of virtual calls in cases | |||
11 | // where we know (via !type metadata) that the list of callees is fixed. This | |||
12 | // includes the following: | |||
13 | // - Single implementation devirtualization: if a virtual call has a single | |||
14 | // possible callee, replace all calls with a direct call to that callee. | |||
15 | // - Virtual constant propagation: if the virtual function's return type is an | |||
16 | // integer <=64 bits and all possible callees are readnone, for each class and | |||
17 | // each list of constant arguments: evaluate the function, store the return | |||
18 | // value alongside the virtual table, and rewrite each virtual call as a load | |||
19 | // from the virtual table. | |||
20 | // - Uniform return value optimization: if the conditions for virtual constant | |||
21 | // propagation hold and each function returns the same constant value, replace | |||
22 | // each virtual call with that constant. | |||
23 | // - Unique return value optimization for i1 return values: if the conditions | |||
24 | // for virtual constant propagation hold and a single vtable's function | |||
25 | // returns 0, or a single vtable's function returns 1, replace each virtual | |||
26 | // call with a comparison of the vptr against that vtable's address. | |||
27 | // | |||
28 | // This pass is intended to be used during the regular and thin LTO pipelines. | |||
29 | // During regular LTO, the pass determines the best optimization for each | |||
30 | // virtual call and applies the resolutions directly to virtual calls that are | |||
31 | // eligible for virtual call optimization (i.e. calls that use either of the | |||
32 | // llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During | |||
33 | // ThinLTO, the pass operates in two phases: | |||
34 | // - Export phase: this is run during the thin link over a single merged module | |||
35 | // that contains all vtables with !type metadata that participate in the link. | |||
36 | // The pass computes a resolution for each virtual call and stores it in the | |||
37 | // type identifier summary. | |||
38 | // - Import phase: this is run during the thin backends over the individual | |||
39 | // modules. The pass applies the resolutions previously computed during the | |||
40 | // import phase to each eligible virtual call. | |||
41 | // | |||
42 | //===----------------------------------------------------------------------===// | |||
43 | ||||
44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" | |||
45 | #include "llvm/ADT/ArrayRef.h" | |||
46 | #include "llvm/ADT/DenseMap.h" | |||
47 | #include "llvm/ADT/DenseMapInfo.h" | |||
48 | #include "llvm/ADT/DenseSet.h" | |||
49 | #include "llvm/ADT/MapVector.h" | |||
50 | #include "llvm/ADT/SmallVector.h" | |||
51 | #include "llvm/ADT/iterator_range.h" | |||
52 | #include "llvm/Analysis/AliasAnalysis.h" | |||
53 | #include "llvm/Analysis/BasicAliasAnalysis.h" | |||
54 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
55 | #include "llvm/Analysis/TypeMetadataUtils.h" | |||
56 | #include "llvm/IR/CallSite.h" | |||
57 | #include "llvm/IR/Constants.h" | |||
58 | #include "llvm/IR/DataLayout.h" | |||
59 | #include "llvm/IR/DebugLoc.h" | |||
60 | #include "llvm/IR/DerivedTypes.h" | |||
61 | #include "llvm/IR/Function.h" | |||
62 | #include "llvm/IR/GlobalAlias.h" | |||
63 | #include "llvm/IR/GlobalVariable.h" | |||
64 | #include "llvm/IR/IRBuilder.h" | |||
65 | #include "llvm/IR/InstrTypes.h" | |||
66 | #include "llvm/IR/Instruction.h" | |||
67 | #include "llvm/IR/Instructions.h" | |||
68 | #include "llvm/IR/Intrinsics.h" | |||
69 | #include "llvm/IR/LLVMContext.h" | |||
70 | #include "llvm/IR/Metadata.h" | |||
71 | #include "llvm/IR/Module.h" | |||
72 | #include "llvm/IR/ModuleSummaryIndexYAML.h" | |||
73 | #include "llvm/Pass.h" | |||
74 | #include "llvm/PassRegistry.h" | |||
75 | #include "llvm/PassSupport.h" | |||
76 | #include "llvm/Support/Casting.h" | |||
77 | #include "llvm/Support/Error.h" | |||
78 | #include "llvm/Support/FileSystem.h" | |||
79 | #include "llvm/Support/MathExtras.h" | |||
80 | #include "llvm/Transforms/IPO.h" | |||
81 | #include "llvm/Transforms/IPO/FunctionAttrs.h" | |||
82 | #include "llvm/Transforms/Utils/Evaluator.h" | |||
83 | #include <algorithm> | |||
84 | #include <cstddef> | |||
85 | #include <map> | |||
86 | #include <set> | |||
87 | #include <string> | |||
88 | ||||
89 | using namespace llvm; | |||
90 | using namespace wholeprogramdevirt; | |||
91 | ||||
92 | #define DEBUG_TYPE"wholeprogramdevirt" "wholeprogramdevirt" | |||
93 | ||||
94 | static cl::opt<PassSummaryAction> ClSummaryAction( | |||
95 | "wholeprogramdevirt-summary-action", | |||
96 | cl::desc("What to do with the summary when running this pass"), | |||
97 | cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing")llvm::cl::OptionEnumValue { "none", int(PassSummaryAction::None ), "Do nothing" }, | |||
98 | clEnumValN(PassSummaryAction::Import, "import",llvm::cl::OptionEnumValue { "import", int(PassSummaryAction:: Import), "Import typeid resolutions from summary and globals" } | |||
99 | "Import typeid resolutions from summary and globals")llvm::cl::OptionEnumValue { "import", int(PassSummaryAction:: Import), "Import typeid resolutions from summary and globals" }, | |||
100 | clEnumValN(PassSummaryAction::Export, "export",llvm::cl::OptionEnumValue { "export", int(PassSummaryAction:: Export), "Export typeid resolutions to summary and globals" } | |||
101 | "Export typeid resolutions to summary and globals")llvm::cl::OptionEnumValue { "export", int(PassSummaryAction:: Export), "Export typeid resolutions to summary and globals" }), | |||
102 | cl::Hidden); | |||
103 | ||||
104 | static cl::opt<std::string> ClReadSummary( | |||
105 | "wholeprogramdevirt-read-summary", | |||
106 | cl::desc("Read summary from given YAML file before running pass"), | |||
107 | cl::Hidden); | |||
108 | ||||
109 | static cl::opt<std::string> ClWriteSummary( | |||
110 | "wholeprogramdevirt-write-summary", | |||
111 | cl::desc("Write summary to given YAML file after running pass"), | |||
112 | cl::Hidden); | |||
113 | ||||
114 | // Find the minimum offset that we may store a value of size Size bits at. If | |||
115 | // IsAfter is set, look for an offset before the object, otherwise look for an | |||
116 | // offset after the object. | |||
117 | uint64_t | |||
118 | wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets, | |||
119 | bool IsAfter, uint64_t Size) { | |||
120 | // Find a minimum offset taking into account only vtable sizes. | |||
121 | uint64_t MinByte = 0; | |||
122 | for (const VirtualCallTarget &Target : Targets) { | |||
123 | if (IsAfter) | |||
124 | MinByte = std::max(MinByte, Target.minAfterBytes()); | |||
125 | else | |||
126 | MinByte = std::max(MinByte, Target.minBeforeBytes()); | |||
127 | } | |||
128 | ||||
129 | // Build a vector of arrays of bytes covering, for each target, a slice of the | |||
130 | // used region (see AccumBitVector::BytesUsed in | |||
131 | // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively, | |||
132 | // this aligns the used regions to start at MinByte. | |||
133 | // | |||
134 | // In this example, A, B and C are vtables, # is a byte already allocated for | |||
135 | // a virtual function pointer, AAAA... (etc.) are the used regions for the | |||
136 | // vtables and Offset(X) is the value computed for the Offset variable below | |||
137 | // for X. | |||
138 | // | |||
139 | // Offset(A) | |||
140 | // | | | |||
141 | // |MinByte | |||
142 | // A: ################AAAAAAAA|AAAAAAAA | |||
143 | // B: ########BBBBBBBBBBBBBBBB|BBBB | |||
144 | // C: ########################|CCCCCCCCCCCCCCCC | |||
145 | // | Offset(B) | | |||
146 | // | |||
147 | // This code produces the slices of A, B and C that appear after the divider | |||
148 | // at MinByte. | |||
149 | std::vector<ArrayRef<uint8_t>> Used; | |||
150 | for (const VirtualCallTarget &Target : Targets) { | |||
151 | ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed | |||
152 | : Target.TM->Bits->Before.BytesUsed; | |||
153 | uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes() | |||
154 | : MinByte - Target.minBeforeBytes(); | |||
155 | ||||
156 | // Disregard used regions that are smaller than Offset. These are | |||
157 | // effectively all-free regions that do not need to be checked. | |||
158 | if (VTUsed.size() > Offset) | |||
159 | Used.push_back(VTUsed.slice(Offset)); | |||
160 | } | |||
161 | ||||
162 | if (Size == 1) { | |||
163 | // Find a free bit in each member of Used. | |||
164 | for (unsigned I = 0;; ++I) { | |||
165 | uint8_t BitsUsed = 0; | |||
166 | for (auto &&B : Used) | |||
167 | if (I < B.size()) | |||
168 | BitsUsed |= B[I]; | |||
169 | if (BitsUsed != 0xff) | |||
170 | return (MinByte + I) * 8 + | |||
171 | countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined); | |||
172 | } | |||
173 | } else { | |||
174 | // Find a free (Size/8) byte region in each member of Used. | |||
175 | // FIXME: see if alignment helps. | |||
176 | for (unsigned I = 0;; ++I) { | |||
177 | for (auto &&B : Used) { | |||
178 | unsigned Byte = 0; | |||
179 | while ((I + Byte) < B.size() && Byte < (Size / 8)) { | |||
180 | if (B[I + Byte]) | |||
181 | goto NextI; | |||
182 | ++Byte; | |||
183 | } | |||
184 | } | |||
185 | return (MinByte + I) * 8; | |||
186 | NextI:; | |||
187 | } | |||
188 | } | |||
189 | } | |||
190 | ||||
191 | void wholeprogramdevirt::setBeforeReturnValues( | |||
192 | MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore, | |||
193 | unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) { | |||
194 | if (BitWidth == 1) | |||
195 | OffsetByte = -(AllocBefore / 8 + 1); | |||
196 | else | |||
197 | OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8); | |||
198 | OffsetBit = AllocBefore % 8; | |||
199 | ||||
200 | for (VirtualCallTarget &Target : Targets) { | |||
201 | if (BitWidth == 1) | |||
202 | Target.setBeforeBit(AllocBefore); | |||
203 | else | |||
204 | Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8); | |||
205 | } | |||
206 | } | |||
207 | ||||
208 | void wholeprogramdevirt::setAfterReturnValues( | |||
209 | MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter, | |||
210 | unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) { | |||
211 | if (BitWidth == 1) | |||
212 | OffsetByte = AllocAfter / 8; | |||
213 | else | |||
214 | OffsetByte = (AllocAfter + 7) / 8; | |||
215 | OffsetBit = AllocAfter % 8; | |||
216 | ||||
217 | for (VirtualCallTarget &Target : Targets) { | |||
218 | if (BitWidth == 1) | |||
219 | Target.setAfterBit(AllocAfter); | |||
220 | else | |||
221 | Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8); | |||
222 | } | |||
223 | } | |||
224 | ||||
225 | VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM) | |||
226 | : Fn(Fn), TM(TM), | |||
227 | IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {} | |||
228 | ||||
229 | namespace { | |||
230 | ||||
231 | // A slot in a set of virtual tables. The TypeID identifies the set of virtual | |||
232 | // tables, and the ByteOffset is the offset in bytes from the address point to | |||
233 | // the virtual function pointer. | |||
234 | struct VTableSlot { | |||
235 | Metadata *TypeID; | |||
236 | uint64_t ByteOffset; | |||
237 | }; | |||
238 | ||||
239 | } // end anonymous namespace | |||
240 | ||||
241 | namespace llvm { | |||
242 | ||||
243 | template <> struct DenseMapInfo<VTableSlot> { | |||
244 | static VTableSlot getEmptyKey() { | |||
245 | return {DenseMapInfo<Metadata *>::getEmptyKey(), | |||
246 | DenseMapInfo<uint64_t>::getEmptyKey()}; | |||
247 | } | |||
248 | static VTableSlot getTombstoneKey() { | |||
249 | return {DenseMapInfo<Metadata *>::getTombstoneKey(), | |||
250 | DenseMapInfo<uint64_t>::getTombstoneKey()}; | |||
251 | } | |||
252 | static unsigned getHashValue(const VTableSlot &I) { | |||
253 | return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^ | |||
254 | DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset); | |||
255 | } | |||
256 | static bool isEqual(const VTableSlot &LHS, | |||
257 | const VTableSlot &RHS) { | |||
258 | return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; | |||
259 | } | |||
260 | }; | |||
261 | ||||
262 | } // end namespace llvm | |||
263 | ||||
264 | namespace { | |||
265 | ||||
266 | // A virtual call site. VTable is the loaded virtual table pointer, and CS is | |||
267 | // the indirect virtual call. | |||
268 | struct VirtualCallSite { | |||
269 | Value *VTable; | |||
270 | CallSite CS; | |||
271 | ||||
272 | // If non-null, this field points to the associated unsafe use count stored in | |||
273 | // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description | |||
274 | // of that field for details. | |||
275 | unsigned *NumUnsafeUses; | |||
276 | ||||
277 | void | |||
278 | emitRemark(const StringRef OptName, const StringRef TargetName, | |||
279 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { | |||
280 | Function *F = CS.getCaller(); | |||
281 | DebugLoc DLoc = CS->getDebugLoc(); | |||
282 | BasicBlock *Block = CS.getParent(); | |||
283 | ||||
284 | using namespace ore; | |||
285 | OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE"wholeprogramdevirt", OptName, DLoc, Block) | |||
286 | << NV("Optimization", OptName) | |||
287 | << ": devirtualized a call to " | |||
288 | << NV("FunctionName", TargetName)); | |||
289 | } | |||
290 | ||||
291 | void replaceAndErase( | |||
292 | const StringRef OptName, const StringRef TargetName, bool RemarksEnabled, | |||
293 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter, | |||
294 | Value *New) { | |||
295 | if (RemarksEnabled) | |||
296 | emitRemark(OptName, TargetName, OREGetter); | |||
297 | CS->replaceAllUsesWith(New); | |||
298 | if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) { | |||
299 | BranchInst::Create(II->getNormalDest(), CS.getInstruction()); | |||
300 | II->getUnwindDest()->removePredecessor(II->getParent()); | |||
301 | } | |||
302 | CS->eraseFromParent(); | |||
303 | // This use is no longer unsafe. | |||
304 | if (NumUnsafeUses) | |||
305 | --*NumUnsafeUses; | |||
306 | } | |||
307 | }; | |||
308 | ||||
309 | // Call site information collected for a specific VTableSlot and possibly a list | |||
310 | // of constant integer arguments. The grouping by arguments is handled by the | |||
311 | // VTableSlotInfo class. | |||
312 | struct CallSiteInfo { | |||
313 | /// The set of call sites for this slot. Used during regular LTO and the | |||
314 | /// import phase of ThinLTO (as well as the export phase of ThinLTO for any | |||
315 | /// call sites that appear in the merged module itself); in each of these | |||
316 | /// cases we are directly operating on the call sites at the IR level. | |||
317 | std::vector<VirtualCallSite> CallSites; | |||
318 | ||||
319 | // These fields are used during the export phase of ThinLTO and reflect | |||
320 | // information collected from function summaries. | |||
321 | ||||
322 | /// Whether any function summary contains an llvm.assume(llvm.type.test) for | |||
323 | /// this slot. | |||
324 | bool SummaryHasTypeTestAssumeUsers; | |||
325 | ||||
326 | /// CFI-specific: a vector containing the list of function summaries that use | |||
327 | /// the llvm.type.checked.load intrinsic and therefore will require | |||
328 | /// resolutions for llvm.type.test in order to implement CFI checks if | |||
329 | /// devirtualization was unsuccessful. If devirtualization was successful, the | |||
330 | /// pass will clear this vector by calling markDevirt(). If at the end of the | |||
331 | /// pass the vector is non-empty, we will need to add a use of llvm.type.test | |||
332 | /// to each of the function summaries in the vector. | |||
333 | std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers; | |||
334 | ||||
335 | bool isExported() const { | |||
336 | return SummaryHasTypeTestAssumeUsers || | |||
337 | !SummaryTypeCheckedLoadUsers.empty(); | |||
338 | } | |||
339 | ||||
340 | /// As explained in the comment for SummaryTypeCheckedLoadUsers. | |||
341 | void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); } | |||
342 | }; | |||
343 | ||||
344 | // Call site information collected for a specific VTableSlot. | |||
345 | struct VTableSlotInfo { | |||
346 | // The set of call sites which do not have all constant integer arguments | |||
347 | // (excluding "this"). | |||
348 | CallSiteInfo CSInfo; | |||
349 | ||||
350 | // The set of call sites with all constant integer arguments (excluding | |||
351 | // "this"), grouped by argument list. | |||
352 | std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo; | |||
353 | ||||
354 | void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses); | |||
355 | ||||
356 | private: | |||
357 | CallSiteInfo &findCallSiteInfo(CallSite CS); | |||
358 | }; | |||
359 | ||||
360 | CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) { | |||
361 | std::vector<uint64_t> Args; | |||
362 | auto *CI = dyn_cast<IntegerType>(CS.getType()); | |||
363 | if (!CI || CI->getBitWidth() > 64 || CS.arg_empty()) | |||
364 | return CSInfo; | |||
365 | for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) { | |||
366 | auto *CI = dyn_cast<ConstantInt>(Arg); | |||
367 | if (!CI || CI->getBitWidth() > 64) | |||
368 | return CSInfo; | |||
369 | Args.push_back(CI->getZExtValue()); | |||
370 | } | |||
371 | return ConstCSInfo[Args]; | |||
372 | } | |||
373 | ||||
374 | void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS, | |||
375 | unsigned *NumUnsafeUses) { | |||
376 | findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses}); | |||
377 | } | |||
378 | ||||
379 | struct DevirtModule { | |||
380 | Module &M; | |||
381 | function_ref<AAResults &(Function &)> AARGetter; | |||
382 | ||||
383 | ModuleSummaryIndex *ExportSummary; | |||
384 | const ModuleSummaryIndex *ImportSummary; | |||
385 | ||||
386 | IntegerType *Int8Ty; | |||
387 | PointerType *Int8PtrTy; | |||
388 | IntegerType *Int32Ty; | |||
389 | IntegerType *Int64Ty; | |||
390 | IntegerType *IntPtrTy; | |||
391 | ||||
392 | bool RemarksEnabled; | |||
393 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; | |||
394 | ||||
395 | MapVector<VTableSlot, VTableSlotInfo> CallSlots; | |||
396 | ||||
397 | // This map keeps track of the number of "unsafe" uses of a loaded function | |||
398 | // pointer. The key is the associated llvm.type.test intrinsic call generated | |||
399 | // by this pass. An unsafe use is one that calls the loaded function pointer | |||
400 | // directly. Every time we eliminate an unsafe use (for example, by | |||
401 | // devirtualizing it or by applying virtual constant propagation), we | |||
402 | // decrement the value stored in this map. If a value reaches zero, we can | |||
403 | // eliminate the type check by RAUWing the associated llvm.type.test call with | |||
404 | // true. | |||
405 | std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; | |||
406 | ||||
407 | DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter, | |||
408 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter, | |||
409 | ModuleSummaryIndex *ExportSummary, | |||
410 | const ModuleSummaryIndex *ImportSummary) | |||
411 | : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary), | |||
412 | ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())), | |||
413 | Int8PtrTy(Type::getInt8PtrTy(M.getContext())), | |||
414 | Int32Ty(Type::getInt32Ty(M.getContext())), | |||
415 | Int64Ty(Type::getInt64Ty(M.getContext())), | |||
416 | IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)), | |||
417 | RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) { | |||
418 | assert(!(ExportSummary && ImportSummary))(static_cast <bool> (!(ExportSummary && ImportSummary )) ? void (0) : __assert_fail ("!(ExportSummary && ImportSummary)" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/IPO/WholeProgramDevirt.cpp" , 418, __extension__ __PRETTY_FUNCTION__)); | |||
419 | } | |||
420 | ||||
421 | bool areRemarksEnabled(); | |||
422 | ||||
423 | void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc); | |||
424 | void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); | |||
425 | ||||
426 | void buildTypeIdentifierMap( | |||
427 | std::vector<VTableBits> &Bits, | |||
428 | DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap); | |||
429 | Constant *getPointerAtOffset(Constant *I, uint64_t Offset); | |||
430 | bool | |||
431 | tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot, | |||
432 | const std::set<TypeMemberInfo> &TypeMemberInfos, | |||
433 | uint64_t ByteOffset); | |||
434 | ||||
435 | void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, | |||
436 | bool &IsExported); | |||
437 | bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
438 | VTableSlotInfo &SlotInfo, | |||
439 | WholeProgramDevirtResolution *Res); | |||
440 | ||||
441 | bool tryEvaluateFunctionsWithArgs( | |||
442 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
443 | ArrayRef<uint64_t> Args); | |||
444 | ||||
445 | void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, | |||
446 | uint64_t TheRetVal); | |||
447 | bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
448 | CallSiteInfo &CSInfo, | |||
449 | WholeProgramDevirtResolution::ByArg *Res); | |||
450 | ||||
451 | // Returns the global symbol name that is used to export information about the | |||
452 | // given vtable slot and list of arguments. | |||
453 | std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
454 | StringRef Name); | |||
455 | ||||
456 | bool shouldExportConstantsAsAbsoluteSymbols(); | |||
457 | ||||
458 | // This function is called during the export phase to create a symbol | |||
459 | // definition containing information about the given vtable slot and list of | |||
460 | // arguments. | |||
461 | void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name, | |||
462 | Constant *C); | |||
463 | void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name, | |||
464 | uint32_t Const, uint32_t &Storage); | |||
465 | ||||
466 | // This function is called during the import phase to create a reference to | |||
467 | // the symbol definition created during the export phase. | |||
468 | Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
469 | StringRef Name); | |||
470 | Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
471 | StringRef Name, IntegerType *IntTy, | |||
472 | uint32_t Storage); | |||
473 | ||||
474 | void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne, | |||
475 | Constant *UniqueMemberAddr); | |||
476 | bool tryUniqueRetValOpt(unsigned BitWidth, | |||
477 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
478 | CallSiteInfo &CSInfo, | |||
479 | WholeProgramDevirtResolution::ByArg *Res, | |||
480 | VTableSlot Slot, ArrayRef<uint64_t> Args); | |||
481 | ||||
482 | void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, | |||
483 | Constant *Byte, Constant *Bit); | |||
484 | bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
485 | VTableSlotInfo &SlotInfo, | |||
486 | WholeProgramDevirtResolution *Res, VTableSlot Slot); | |||
487 | ||||
488 | void rebuildGlobal(VTableBits &B); | |||
489 | ||||
490 | // Apply the summary resolution for Slot to all virtual calls in SlotInfo. | |||
491 | void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo); | |||
492 | ||||
493 | // If we were able to eliminate all unsafe uses for a type checked load, | |||
494 | // eliminate the associated type tests by replacing them with true. | |||
495 | void removeRedundantTypeTests(); | |||
496 | ||||
497 | bool run(); | |||
498 | ||||
499 | // Lower the module using the action and summary passed as command line | |||
500 | // arguments. For testing purposes only. | |||
501 | static bool runForTesting( | |||
502 | Module &M, function_ref<AAResults &(Function &)> AARGetter, | |||
503 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); | |||
504 | }; | |||
505 | ||||
506 | struct WholeProgramDevirt : public ModulePass { | |||
507 | static char ID; | |||
508 | ||||
509 | bool UseCommandLine = false; | |||
510 | ||||
511 | ModuleSummaryIndex *ExportSummary; | |||
512 | const ModuleSummaryIndex *ImportSummary; | |||
513 | ||||
514 | WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) { | |||
515 | initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); | |||
516 | } | |||
517 | ||||
518 | WholeProgramDevirt(ModuleSummaryIndex *ExportSummary, | |||
519 | const ModuleSummaryIndex *ImportSummary) | |||
520 | : ModulePass(ID), ExportSummary(ExportSummary), | |||
521 | ImportSummary(ImportSummary) { | |||
522 | initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); | |||
523 | } | |||
524 | ||||
525 | bool runOnModule(Module &M) override { | |||
526 | if (skipModule(M)) | |||
527 | return false; | |||
528 | ||||
529 | // In the new pass manager, we can request the optimization | |||
530 | // remark emitter pass on a per-function-basis, which the | |||
531 | // OREGetter will do for us. | |||
532 | // In the old pass manager, this is harder, so we just build | |||
533 | // an optimization remark emitter on the fly, when we need it. | |||
534 | std::unique_ptr<OptimizationRemarkEmitter> ORE; | |||
535 | auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { | |||
536 | ORE = make_unique<OptimizationRemarkEmitter>(F); | |||
537 | return *ORE; | |||
538 | }; | |||
539 | ||||
540 | if (UseCommandLine) | |||
541 | return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter); | |||
542 | ||||
543 | return DevirtModule(M, LegacyAARGetter(*this), OREGetter, ExportSummary, | |||
544 | ImportSummary) | |||
545 | .run(); | |||
546 | } | |||
547 | ||||
548 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
549 | AU.addRequired<AssumptionCacheTracker>(); | |||
550 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
551 | } | |||
552 | }; | |||
553 | ||||
554 | } // end anonymous namespace | |||
555 | ||||
556 | INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",static void *initializeWholeProgramDevirtPassOnce(PassRegistry &Registry) { | |||
557 | "Whole program devirtualization", false, false)static void *initializeWholeProgramDevirtPassOnce(PassRegistry &Registry) { | |||
558 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
559 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
560 | INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",PassInfo *PI = new PassInfo( "Whole program devirtualization" , "wholeprogramdevirt", &WholeProgramDevirt::ID, PassInfo ::NormalCtor_t(callDefaultCtor<WholeProgramDevirt>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeWholeProgramDevirtPassFlag; void llvm ::initializeWholeProgramDevirtPass(PassRegistry &Registry ) { llvm::call_once(InitializeWholeProgramDevirtPassFlag, initializeWholeProgramDevirtPassOnce , std::ref(Registry)); } | |||
561 | "Whole program devirtualization", false, false)PassInfo *PI = new PassInfo( "Whole program devirtualization" , "wholeprogramdevirt", &WholeProgramDevirt::ID, PassInfo ::NormalCtor_t(callDefaultCtor<WholeProgramDevirt>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeWholeProgramDevirtPassFlag; void llvm ::initializeWholeProgramDevirtPass(PassRegistry &Registry ) { llvm::call_once(InitializeWholeProgramDevirtPassFlag, initializeWholeProgramDevirtPassOnce , std::ref(Registry)); } | |||
562 | char WholeProgramDevirt::ID = 0; | |||
563 | ||||
564 | ModulePass * | |||
565 | llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, | |||
566 | const ModuleSummaryIndex *ImportSummary) { | |||
567 | return new WholeProgramDevirt(ExportSummary, ImportSummary); | |||
568 | } | |||
569 | ||||
570 | PreservedAnalyses WholeProgramDevirtPass::run(Module &M, | |||
571 | ModuleAnalysisManager &AM) { | |||
572 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
573 | auto AARGetter = [&](Function &F) -> AAResults & { | |||
574 | return FAM.getResult<AAManager>(F); | |||
575 | }; | |||
576 | auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { | |||
577 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | |||
578 | }; | |||
579 | if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run()) | |||
580 | return PreservedAnalyses::all(); | |||
581 | return PreservedAnalyses::none(); | |||
582 | } | |||
583 | ||||
584 | bool DevirtModule::runForTesting( | |||
585 | Module &M, function_ref<AAResults &(Function &)> AARGetter, | |||
586 | function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { | |||
587 | ModuleSummaryIndex Summary(/*IsPerformingAnalysis=*/false); | |||
588 | ||||
589 | // Handle the command-line summary arguments. This code is for testing | |||
590 | // purposes only, so we handle errors directly. | |||
591 | if (!ClReadSummary.empty()) { | |||
592 | ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary + | |||
593 | ": "); | |||
594 | auto ReadSummaryFile = | |||
595 | ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); | |||
596 | ||||
597 | yaml::Input In(ReadSummaryFile->getBuffer()); | |||
598 | In >> Summary; | |||
599 | ExitOnErr(errorCodeToError(In.error())); | |||
600 | } | |||
601 | ||||
602 | bool Changed = | |||
603 | DevirtModule( | |||
604 | M, AARGetter, OREGetter, | |||
605 | ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, | |||
606 | ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) | |||
607 | .run(); | |||
608 | ||||
609 | if (!ClWriteSummary.empty()) { | |||
610 | ExitOnError ExitOnErr( | |||
611 | "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": "); | |||
612 | std::error_code EC; | |||
613 | raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); | |||
614 | ExitOnErr(errorCodeToError(EC)); | |||
615 | ||||
616 | yaml::Output Out(OS); | |||
617 | Out << Summary; | |||
618 | } | |||
619 | ||||
620 | return Changed; | |||
621 | } | |||
622 | ||||
623 | void DevirtModule::buildTypeIdentifierMap( | |||
624 | std::vector<VTableBits> &Bits, | |||
625 | DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) { | |||
626 | DenseMap<GlobalVariable *, VTableBits *> GVToBits; | |||
627 | Bits.reserve(M.getGlobalList().size()); | |||
628 | SmallVector<MDNode *, 2> Types; | |||
629 | for (GlobalVariable &GV : M.globals()) { | |||
630 | Types.clear(); | |||
631 | GV.getMetadata(LLVMContext::MD_type, Types); | |||
632 | if (Types.empty()) | |||
633 | continue; | |||
634 | ||||
635 | VTableBits *&BitsPtr = GVToBits[&GV]; | |||
636 | if (!BitsPtr) { | |||
637 | Bits.emplace_back(); | |||
638 | Bits.back().GV = &GV; | |||
639 | Bits.back().ObjectSize = | |||
640 | M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType()); | |||
641 | BitsPtr = &Bits.back(); | |||
642 | } | |||
643 | ||||
644 | for (MDNode *Type : Types) { | |||
645 | auto TypeID = Type->getOperand(1).get(); | |||
646 | ||||
647 | uint64_t Offset = | |||
648 | cast<ConstantInt>( | |||
649 | cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) | |||
650 | ->getZExtValue(); | |||
651 | ||||
652 | TypeIdMap[TypeID].insert({BitsPtr, Offset}); | |||
653 | } | |||
654 | } | |||
655 | } | |||
656 | ||||
657 | Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { | |||
658 | if (I->getType()->isPointerTy()) { | |||
659 | if (Offset == 0) | |||
660 | return I; | |||
661 | return nullptr; | |||
662 | } | |||
663 | ||||
664 | const DataLayout &DL = M.getDataLayout(); | |||
665 | ||||
666 | if (auto *C = dyn_cast<ConstantStruct>(I)) { | |||
667 | const StructLayout *SL = DL.getStructLayout(C->getType()); | |||
668 | if (Offset >= SL->getSizeInBytes()) | |||
669 | return nullptr; | |||
670 | ||||
671 | unsigned Op = SL->getElementContainingOffset(Offset); | |||
672 | return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), | |||
673 | Offset - SL->getElementOffset(Op)); | |||
674 | } | |||
675 | if (auto *C = dyn_cast<ConstantArray>(I)) { | |||
676 | ArrayType *VTableTy = C->getType(); | |||
677 | uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); | |||
678 | ||||
679 | unsigned Op = Offset / ElemSize; | |||
680 | if (Op >= C->getNumOperands()) | |||
681 | return nullptr; | |||
682 | ||||
683 | return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), | |||
684 | Offset % ElemSize); | |||
685 | } | |||
686 | return nullptr; | |||
687 | } | |||
688 | ||||
689 | bool DevirtModule::tryFindVirtualCallTargets( | |||
690 | std::vector<VirtualCallTarget> &TargetsForSlot, | |||
691 | const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) { | |||
692 | for (const TypeMemberInfo &TM : TypeMemberInfos) { | |||
693 | if (!TM.Bits->GV->isConstant()) | |||
694 | return false; | |||
695 | ||||
696 | Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), | |||
697 | TM.Offset + ByteOffset); | |||
698 | if (!Ptr) | |||
699 | return false; | |||
700 | ||||
701 | auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts()); | |||
702 | if (!Fn) | |||
703 | return false; | |||
704 | ||||
705 | // We can disregard __cxa_pure_virtual as a possible call target, as | |||
706 | // calls to pure virtuals are UB. | |||
707 | if (Fn->getName() == "__cxa_pure_virtual") | |||
708 | continue; | |||
709 | ||||
710 | TargetsForSlot.push_back({Fn, &TM}); | |||
711 | } | |||
712 | ||||
713 | // Give up if we couldn't find any targets. | |||
714 | return !TargetsForSlot.empty(); | |||
715 | } | |||
716 | ||||
717 | void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, | |||
718 | Constant *TheFn, bool &IsExported) { | |||
719 | auto Apply = [&](CallSiteInfo &CSInfo) { | |||
720 | for (auto &&VCallSite : CSInfo.CallSites) { | |||
721 | if (RemarksEnabled) | |||
722 | VCallSite.emitRemark("single-impl", TheFn->getName(), OREGetter); | |||
723 | VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast( | |||
724 | TheFn, VCallSite.CS.getCalledValue()->getType())); | |||
725 | // This use is no longer unsafe. | |||
726 | if (VCallSite.NumUnsafeUses) | |||
727 | --*VCallSite.NumUnsafeUses; | |||
728 | } | |||
729 | if (CSInfo.isExported()) { | |||
730 | IsExported = true; | |||
731 | CSInfo.markDevirt(); | |||
732 | } | |||
733 | }; | |||
734 | Apply(SlotInfo.CSInfo); | |||
735 | for (auto &P : SlotInfo.ConstCSInfo) | |||
736 | Apply(P.second); | |||
737 | } | |||
738 | ||||
739 | bool DevirtModule::trySingleImplDevirt( | |||
740 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
741 | VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { | |||
742 | // See if the program contains a single implementation of this virtual | |||
743 | // function. | |||
744 | Function *TheFn = TargetsForSlot[0].Fn; | |||
745 | for (auto &&Target : TargetsForSlot) | |||
746 | if (TheFn != Target.Fn) | |||
747 | return false; | |||
748 | ||||
749 | // If so, update each call site to call that implementation directly. | |||
750 | if (RemarksEnabled) | |||
751 | TargetsForSlot[0].WasDevirt = true; | |||
752 | ||||
753 | bool IsExported = false; | |||
754 | applySingleImplDevirt(SlotInfo, TheFn, IsExported); | |||
755 | if (!IsExported) | |||
756 | return false; | |||
757 | ||||
758 | // If the only implementation has local linkage, we must promote to external | |||
759 | // to make it visible to thin LTO objects. We can only get here during the | |||
760 | // ThinLTO export phase. | |||
761 | if (TheFn->hasLocalLinkage()) { | |||
762 | std::string NewName = (TheFn->getName() + "$merged").str(); | |||
763 | ||||
764 | // Since we are renaming the function, any comdats with the same name must | |||
765 | // also be renamed. This is required when targeting COFF, as the comdat name | |||
766 | // must match one of the names of the symbols in the comdat. | |||
767 | if (Comdat *C = TheFn->getComdat()) { | |||
768 | if (C->getName() == TheFn->getName()) { | |||
769 | Comdat *NewC = M.getOrInsertComdat(NewName); | |||
770 | NewC->setSelectionKind(C->getSelectionKind()); | |||
771 | for (GlobalObject &GO : M.global_objects()) | |||
772 | if (GO.getComdat() == C) | |||
773 | GO.setComdat(NewC); | |||
774 | } | |||
775 | } | |||
776 | ||||
777 | TheFn->setLinkage(GlobalValue::ExternalLinkage); | |||
778 | TheFn->setVisibility(GlobalValue::HiddenVisibility); | |||
779 | TheFn->setName(NewName); | |||
780 | } | |||
781 | ||||
782 | Res->TheKind = WholeProgramDevirtResolution::SingleImpl; | |||
783 | Res->SingleImplName = TheFn->getName(); | |||
784 | ||||
785 | return true; | |||
786 | } | |||
787 | ||||
788 | bool DevirtModule::tryEvaluateFunctionsWithArgs( | |||
789 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
790 | ArrayRef<uint64_t> Args) { | |||
791 | // Evaluate each function and store the result in each target's RetVal | |||
792 | // field. | |||
793 | for (VirtualCallTarget &Target : TargetsForSlot) { | |||
794 | if (Target.Fn->arg_size() != Args.size() + 1) | |||
795 | return false; | |||
796 | ||||
797 | Evaluator Eval(M.getDataLayout(), nullptr); | |||
798 | SmallVector<Constant *, 2> EvalArgs; | |||
799 | EvalArgs.push_back( | |||
800 | Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0))); | |||
801 | for (unsigned I = 0; I != Args.size(); ++I) { | |||
802 | auto *ArgTy = dyn_cast<IntegerType>( | |||
803 | Target.Fn->getFunctionType()->getParamType(I + 1)); | |||
804 | if (!ArgTy) | |||
805 | return false; | |||
806 | EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I])); | |||
807 | } | |||
808 | ||||
809 | Constant *RetVal; | |||
810 | if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) || | |||
811 | !isa<ConstantInt>(RetVal)) | |||
812 | return false; | |||
813 | Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue(); | |||
814 | } | |||
815 | return true; | |||
816 | } | |||
817 | ||||
818 | void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, | |||
819 | uint64_t TheRetVal) { | |||
820 | for (auto Call : CSInfo.CallSites) | |||
821 | Call.replaceAndErase( | |||
822 | "uniform-ret-val", FnName, RemarksEnabled, OREGetter, | |||
823 | ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal)); | |||
824 | CSInfo.markDevirt(); | |||
825 | } | |||
826 | ||||
827 | bool DevirtModule::tryUniformRetValOpt( | |||
828 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo, | |||
829 | WholeProgramDevirtResolution::ByArg *Res) { | |||
830 | // Uniform return value optimization. If all functions return the same | |||
831 | // constant, replace all calls with that constant. | |||
832 | uint64_t TheRetVal = TargetsForSlot[0].RetVal; | |||
833 | for (const VirtualCallTarget &Target : TargetsForSlot) | |||
834 | if (Target.RetVal != TheRetVal) | |||
835 | return false; | |||
836 | ||||
837 | if (CSInfo.isExported()) { | |||
838 | Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal; | |||
839 | Res->Info = TheRetVal; | |||
840 | } | |||
841 | ||||
842 | applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal); | |||
843 | if (RemarksEnabled) | |||
844 | for (auto &&Target : TargetsForSlot) | |||
845 | Target.WasDevirt = true; | |||
846 | return true; | |||
847 | } | |||
848 | ||||
849 | std::string DevirtModule::getGlobalName(VTableSlot Slot, | |||
850 | ArrayRef<uint64_t> Args, | |||
851 | StringRef Name) { | |||
852 | std::string FullName = "__typeid_"; | |||
853 | raw_string_ostream OS(FullName); | |||
854 | OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset; | |||
855 | for (uint64_t Arg : Args) | |||
856 | OS << '_' << Arg; | |||
857 | OS << '_' << Name; | |||
858 | return OS.str(); | |||
859 | } | |||
860 | ||||
861 | bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() { | |||
862 | Triple T(M.getTargetTriple()); | |||
863 | return (T.getArch() == Triple::x86 || T.getArch() == Triple::x86_64) && | |||
864 | T.getObjectFormat() == Triple::ELF; | |||
865 | } | |||
866 | ||||
867 | void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
868 | StringRef Name, Constant *C) { | |||
869 | GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage, | |||
870 | getGlobalName(Slot, Args, Name), C, &M); | |||
871 | GA->setVisibility(GlobalValue::HiddenVisibility); | |||
872 | } | |||
873 | ||||
874 | void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
875 | StringRef Name, uint32_t Const, | |||
876 | uint32_t &Storage) { | |||
877 | if (shouldExportConstantsAsAbsoluteSymbols()) { | |||
878 | exportGlobal( | |||
879 | Slot, Args, Name, | |||
880 | ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy)); | |||
881 | return; | |||
882 | } | |||
883 | ||||
884 | Storage = Const; | |||
885 | } | |||
886 | ||||
887 | Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
888 | StringRef Name) { | |||
889 | Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty); | |||
890 | auto *GV = dyn_cast<GlobalVariable>(C); | |||
891 | if (GV) | |||
892 | GV->setVisibility(GlobalValue::HiddenVisibility); | |||
893 | return C; | |||
894 | } | |||
895 | ||||
896 | Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, | |||
897 | StringRef Name, IntegerType *IntTy, | |||
898 | uint32_t Storage) { | |||
899 | if (!shouldExportConstantsAsAbsoluteSymbols()) | |||
900 | return ConstantInt::get(IntTy, Storage); | |||
901 | ||||
902 | Constant *C = importGlobal(Slot, Args, Name); | |||
903 | auto *GV = cast<GlobalVariable>(C->stripPointerCasts()); | |||
904 | C = ConstantExpr::getPtrToInt(C, IntTy); | |||
905 | ||||
906 | // We only need to set metadata if the global is newly created, in which | |||
907 | // case it would not have hidden visibility. | |||
908 | if (GV->getMetadata(LLVMContext::MD_absolute_symbol)) | |||
909 | return C; | |||
910 | ||||
911 | auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { | |||
912 | auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min)); | |||
913 | auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max)); | |||
914 | GV->setMetadata(LLVMContext::MD_absolute_symbol, | |||
915 | MDNode::get(M.getContext(), {MinC, MaxC})); | |||
916 | }; | |||
917 | unsigned AbsWidth = IntTy->getBitWidth(); | |||
918 | if (AbsWidth == IntPtrTy->getBitWidth()) | |||
919 | SetAbsRange(~0ull, ~0ull); // Full set. | |||
920 | else | |||
921 | SetAbsRange(0, 1ull << AbsWidth); | |||
922 | return C; | |||
923 | } | |||
924 | ||||
925 | void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, | |||
926 | bool IsOne, | |||
927 | Constant *UniqueMemberAddr) { | |||
928 | for (auto &&Call : CSInfo.CallSites) { | |||
929 | IRBuilder<> B(Call.CS.getInstruction()); | |||
930 | Value *Cmp = | |||
931 | B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, | |||
932 | B.CreateBitCast(Call.VTable, Int8PtrTy), UniqueMemberAddr); | |||
933 | Cmp = B.CreateZExt(Cmp, Call.CS->getType()); | |||
934 | Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter, | |||
935 | Cmp); | |||
936 | } | |||
937 | CSInfo.markDevirt(); | |||
938 | } | |||
939 | ||||
940 | bool DevirtModule::tryUniqueRetValOpt( | |||
941 | unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot, | |||
942 | CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res, | |||
943 | VTableSlot Slot, ArrayRef<uint64_t> Args) { | |||
944 | // IsOne controls whether we look for a 0 or a 1. | |||
945 | auto tryUniqueRetValOptFor = [&](bool IsOne) { | |||
946 | const TypeMemberInfo *UniqueMember = nullptr; | |||
947 | for (const VirtualCallTarget &Target : TargetsForSlot) { | |||
948 | if (Target.RetVal == (IsOne ? 1 : 0)) { | |||
949 | if (UniqueMember) | |||
950 | return false; | |||
951 | UniqueMember = Target.TM; | |||
952 | } | |||
953 | } | |||
954 | ||||
955 | // We should have found a unique member or bailed out by now. We already | |||
956 | // checked for a uniform return value in tryUniformRetValOpt. | |||
957 | assert(UniqueMember)(static_cast <bool> (UniqueMember) ? void (0) : __assert_fail ("UniqueMember", "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/IPO/WholeProgramDevirt.cpp" , 957, __extension__ __PRETTY_FUNCTION__)); | |||
958 | ||||
959 | Constant *UniqueMemberAddr = | |||
960 | ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy); | |||
961 | UniqueMemberAddr = ConstantExpr::getGetElementPtr( | |||
962 | Int8Ty, UniqueMemberAddr, | |||
963 | ConstantInt::get(Int64Ty, UniqueMember->Offset)); | |||
964 | ||||
965 | if (CSInfo.isExported()) { | |||
966 | Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal; | |||
| ||||
967 | Res->Info = IsOne; | |||
968 | ||||
969 | exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr); | |||
970 | } | |||
971 | ||||
972 | // Replace each call with the comparison. | |||
973 | applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne, | |||
974 | UniqueMemberAddr); | |||
975 | ||||
976 | // Update devirtualization statistics for targets. | |||
977 | if (RemarksEnabled) | |||
978 | for (auto &&Target : TargetsForSlot) | |||
979 | Target.WasDevirt = true; | |||
980 | ||||
981 | return true; | |||
982 | }; | |||
983 | ||||
984 | if (BitWidth == 1) { | |||
985 | if (tryUniqueRetValOptFor(true)) | |||
986 | return true; | |||
987 | if (tryUniqueRetValOptFor(false)) | |||
988 | return true; | |||
989 | } | |||
990 | return false; | |||
991 | } | |||
992 | ||||
993 | void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, | |||
994 | Constant *Byte, Constant *Bit) { | |||
995 | for (auto Call : CSInfo.CallSites) { | |||
996 | auto *RetType = cast<IntegerType>(Call.CS.getType()); | |||
997 | IRBuilder<> B(Call.CS.getInstruction()); | |||
998 | Value *Addr = | |||
999 | B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte); | |||
1000 | if (RetType->getBitWidth() == 1) { | |||
1001 | Value *Bits = B.CreateLoad(Addr); | |||
1002 | Value *BitsAndBit = B.CreateAnd(Bits, Bit); | |||
1003 | auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0)); | |||
1004 | Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled, | |||
1005 | OREGetter, IsBitSet); | |||
1006 | } else { | |||
1007 | Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); | |||
1008 | Value *Val = B.CreateLoad(RetType, ValAddr); | |||
1009 | Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, | |||
1010 | OREGetter, Val); | |||
1011 | } | |||
1012 | } | |||
1013 | CSInfo.markDevirt(); | |||
1014 | } | |||
1015 | ||||
1016 | bool DevirtModule::tryVirtualConstProp( | |||
1017 | MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo, | |||
1018 | WholeProgramDevirtResolution *Res, VTableSlot Slot) { | |||
1019 | // This only works if the function returns an integer. | |||
1020 | auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType()); | |||
1021 | if (!RetType) | |||
1022 | return false; | |||
1023 | unsigned BitWidth = RetType->getBitWidth(); | |||
1024 | if (BitWidth > 64) | |||
1025 | return false; | |||
1026 | ||||
1027 | // Make sure that each function is defined, does not access memory, takes at | |||
1028 | // least one argument, does not use its first argument (which we assume is | |||
1029 | // 'this'), and has the same return type. | |||
1030 | // | |||
1031 | // Note that we test whether this copy of the function is readnone, rather | |||
1032 | // than testing function attributes, which must hold for any copy of the | |||
1033 | // function, even a less optimized version substituted at link time. This is | |||
1034 | // sound because the virtual constant propagation optimizations effectively | |||
1035 | // inline all implementations of the virtual function into each call site, | |||
1036 | // rather than using function attributes to perform local optimization. | |||
1037 | for (VirtualCallTarget &Target : TargetsForSlot) { | |||
1038 | if (Target.Fn->isDeclaration() || | |||
1039 | computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) != | |||
1040 | MAK_ReadNone || | |||
1041 | Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() || | |||
1042 | Target.Fn->getReturnType() != RetType) | |||
1043 | return false; | |||
1044 | } | |||
1045 | ||||
1046 | for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) { | |||
1047 | if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first)) | |||
1048 | continue; | |||
1049 | ||||
1050 | WholeProgramDevirtResolution::ByArg *ResByArg = nullptr; | |||
1051 | if (Res) | |||
1052 | ResByArg = &Res->ResByArg[CSByConstantArg.first]; | |||
1053 | ||||
1054 | if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg)) | |||
1055 | continue; | |||
1056 | ||||
1057 | if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second, | |||
1058 | ResByArg, Slot, CSByConstantArg.first)) | |||
1059 | continue; | |||
1060 | ||||
1061 | // Find an allocation offset in bits in all vtables associated with the | |||
1062 | // type. | |||
1063 | uint64_t AllocBefore = | |||
1064 | findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth); | |||
1065 | uint64_t AllocAfter = | |||
1066 | findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth); | |||
1067 | ||||
1068 | // Calculate the total amount of padding needed to store a value at both | |||
1069 | // ends of the object. | |||
1070 | uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0; | |||
1071 | for (auto &&Target : TargetsForSlot) { | |||
1072 | TotalPaddingBefore += std::max<int64_t>( | |||
1073 | (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0); | |||
1074 | TotalPaddingAfter += std::max<int64_t>( | |||
1075 | (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0); | |||
1076 | } | |||
1077 | ||||
1078 | // If the amount of padding is too large, give up. | |||
1079 | // FIXME: do something smarter here. | |||
1080 | if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128) | |||
1081 | continue; | |||
1082 | ||||
1083 | // Calculate the offset to the value as a (possibly negative) byte offset | |||
1084 | // and (if applicable) a bit offset, and store the values in the targets. | |||
1085 | int64_t OffsetByte; | |||
1086 | uint64_t OffsetBit; | |||
1087 | if (TotalPaddingBefore <= TotalPaddingAfter) | |||
1088 | setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte, | |||
1089 | OffsetBit); | |||
1090 | else | |||
1091 | setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte, | |||
1092 | OffsetBit); | |||
1093 | ||||
1094 | if (RemarksEnabled) | |||
1095 | for (auto &&Target : TargetsForSlot) | |||
1096 | Target.WasDevirt = true; | |||
1097 | ||||
1098 | ||||
1099 | if (CSByConstantArg.second.isExported()) { | |||
1100 | ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp; | |||
1101 | exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte, | |||
1102 | ResByArg->Byte); | |||
1103 | exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit, | |||
1104 | ResByArg->Bit); | |||
1105 | } | |||
1106 | ||||
1107 | // Rewrite each call to a load from OffsetByte/OffsetBit. | |||
1108 | Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte); | |||
1109 | Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit); | |||
1110 | applyVirtualConstProp(CSByConstantArg.second, | |||
1111 | TargetsForSlot[0].Fn->getName(), ByteConst, BitConst); | |||
1112 | } | |||
1113 | return true; | |||
1114 | } | |||
1115 | ||||
1116 | void DevirtModule::rebuildGlobal(VTableBits &B) { | |||
1117 | if (B.Before.Bytes.empty() && B.After.Bytes.empty()) | |||
1118 | return; | |||
1119 | ||||
1120 | // Align each byte array to pointer width. | |||
1121 | unsigned PointerSize = M.getDataLayout().getPointerSize(); | |||
1122 | B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize)); | |||
1123 | B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize)); | |||
1124 | ||||
1125 | // Before was stored in reverse order; flip it now. | |||
1126 | for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I) | |||
1127 | std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]); | |||
1128 | ||||
1129 | // Build an anonymous global containing the before bytes, followed by the | |||
1130 | // original initializer, followed by the after bytes. | |||
1131 | auto NewInit = ConstantStruct::getAnon( | |||
1132 | {ConstantDataArray::get(M.getContext(), B.Before.Bytes), | |||
1133 | B.GV->getInitializer(), | |||
1134 | ConstantDataArray::get(M.getContext(), B.After.Bytes)}); | |||
1135 | auto NewGV = | |||
1136 | new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(), | |||
1137 | GlobalVariable::PrivateLinkage, NewInit, "", B.GV); | |||
1138 | NewGV->setSection(B.GV->getSection()); | |||
1139 | NewGV->setComdat(B.GV->getComdat()); | |||
1140 | ||||
1141 | // Copy the original vtable's metadata to the anonymous global, adjusting | |||
1142 | // offsets as required. | |||
1143 | NewGV->copyMetadata(B.GV, B.Before.Bytes.size()); | |||
1144 | ||||
1145 | // Build an alias named after the original global, pointing at the second | |||
1146 | // element (the original initializer). | |||
1147 | auto Alias = GlobalAlias::create( | |||
1148 | B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "", | |||
1149 | ConstantExpr::getGetElementPtr( | |||
1150 | NewInit->getType(), NewGV, | |||
1151 | ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0), | |||
1152 | ConstantInt::get(Int32Ty, 1)}), | |||
1153 | &M); | |||
1154 | Alias->setVisibility(B.GV->getVisibility()); | |||
1155 | Alias->takeName(B.GV); | |||
1156 | ||||
1157 | B.GV->replaceAllUsesWith(Alias); | |||
1158 | B.GV->eraseFromParent(); | |||
1159 | } | |||
1160 | ||||
1161 | bool DevirtModule::areRemarksEnabled() { | |||
1162 | const auto &FL = M.getFunctionList(); | |||
1163 | if (FL.empty()) | |||
1164 | return false; | |||
1165 | const Function &Fn = FL.front(); | |||
1166 | ||||
1167 | const auto &BBL = Fn.getBasicBlockList(); | |||
1168 | if (BBL.empty()) | |||
1169 | return false; | |||
1170 | auto DI = OptimizationRemark(DEBUG_TYPE"wholeprogramdevirt", "", DebugLoc(), &BBL.front()); | |||
1171 | return DI.isEnabled(); | |||
1172 | } | |||
1173 | ||||
1174 | void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc, | |||
1175 | Function *AssumeFunc) { | |||
1176 | // Find all virtual calls via a virtual table pointer %p under an assumption | |||
1177 | // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p | |||
1178 | // points to a member of the type identifier %md. Group calls by (type ID, | |||
1179 | // offset) pair (effectively the identity of the virtual function) and store | |||
1180 | // to CallSlots. | |||
1181 | DenseSet<Value *> SeenPtrs; | |||
1182 | for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end(); | |||
1183 | I != E;) { | |||
1184 | auto CI = dyn_cast<CallInst>(I->getUser()); | |||
1185 | ++I; | |||
1186 | if (!CI) | |||
1187 | continue; | |||
1188 | ||||
1189 | // Search for virtual calls based on %p and add them to DevirtCalls. | |||
1190 | SmallVector<DevirtCallSite, 1> DevirtCalls; | |||
1191 | SmallVector<CallInst *, 1> Assumes; | |||
1192 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI); | |||
1193 | ||||
1194 | // If we found any, add them to CallSlots. Only do this if we haven't seen | |||
1195 | // the vtable pointer before, as it may have been CSE'd with pointers from | |||
1196 | // other call sites, and we don't want to process call sites multiple times. | |||
1197 | if (!Assumes.empty()) { | |||
1198 | Metadata *TypeId = | |||
1199 | cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata(); | |||
1200 | Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); | |||
1201 | if (SeenPtrs.insert(Ptr).second) { | |||
1202 | for (DevirtCallSite Call : DevirtCalls) { | |||
1203 | CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr); | |||
1204 | } | |||
1205 | } | |||
1206 | } | |||
1207 | ||||
1208 | // We no longer need the assumes or the type test. | |||
1209 | for (auto Assume : Assumes) | |||
1210 | Assume->eraseFromParent(); | |||
1211 | // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we | |||
1212 | // may use the vtable argument later. | |||
1213 | if (CI->use_empty()) | |||
1214 | CI->eraseFromParent(); | |||
1215 | } | |||
1216 | } | |||
1217 | ||||
1218 | void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { | |||
1219 | Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test); | |||
1220 | ||||
1221 | for (auto I = TypeCheckedLoadFunc->use_begin(), | |||
1222 | E = TypeCheckedLoadFunc->use_end(); | |||
1223 | I != E;) { | |||
1224 | auto CI = dyn_cast<CallInst>(I->getUser()); | |||
1225 | ++I; | |||
1226 | if (!CI) | |||
1227 | continue; | |||
1228 | ||||
1229 | Value *Ptr = CI->getArgOperand(0); | |||
1230 | Value *Offset = CI->getArgOperand(1); | |||
1231 | Value *TypeIdValue = CI->getArgOperand(2); | |||
1232 | Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata(); | |||
1233 | ||||
1234 | SmallVector<DevirtCallSite, 1> DevirtCalls; | |||
1235 | SmallVector<Instruction *, 1> LoadedPtrs; | |||
1236 | SmallVector<Instruction *, 1> Preds; | |||
1237 | bool HasNonCallUses = false; | |||
1238 | findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, | |||
1239 | HasNonCallUses, CI); | |||
1240 | ||||
1241 | // Start by generating "pessimistic" code that explicitly loads the function | |||
1242 | // pointer from the vtable and performs the type check. If possible, we will | |||
1243 | // eliminate the load and the type check later. | |||
1244 | ||||
1245 | // If possible, only generate the load at the point where it is used. | |||
1246 | // This helps avoid unnecessary spills. | |||
1247 | IRBuilder<> LoadB( | |||
1248 | (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI); | |||
1249 | Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset); | |||
1250 | Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy)); | |||
1251 | Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr); | |||
1252 | ||||
1253 | for (Instruction *LoadedPtr : LoadedPtrs) { | |||
1254 | LoadedPtr->replaceAllUsesWith(LoadedValue); | |||
1255 | LoadedPtr->eraseFromParent(); | |||
1256 | } | |||
1257 | ||||
1258 | // Likewise for the type test. | |||
1259 | IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI); | |||
1260 | CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue}); | |||
1261 | ||||
1262 | for (Instruction *Pred : Preds) { | |||
1263 | Pred->replaceAllUsesWith(TypeTestCall); | |||
1264 | Pred->eraseFromParent(); | |||
1265 | } | |||
1266 | ||||
1267 | // We have already erased any extractvalue instructions that refer to the | |||
1268 | // intrinsic call, but the intrinsic may have other non-extractvalue uses | |||
1269 | // (although this is unlikely). In that case, explicitly build a pair and | |||
1270 | // RAUW it. | |||
1271 | if (!CI->use_empty()) { | |||
1272 | Value *Pair = UndefValue::get(CI->getType()); | |||
1273 | IRBuilder<> B(CI); | |||
1274 | Pair = B.CreateInsertValue(Pair, LoadedValue, {0}); | |||
1275 | Pair = B.CreateInsertValue(Pair, TypeTestCall, {1}); | |||
1276 | CI->replaceAllUsesWith(Pair); | |||
1277 | } | |||
1278 | ||||
1279 | // The number of unsafe uses is initially the number of uses. | |||
1280 | auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall]; | |||
1281 | NumUnsafeUses = DevirtCalls.size(); | |||
1282 | ||||
1283 | // If the function pointer has a non-call user, we cannot eliminate the type | |||
1284 | // check, as one of those users may eventually call the pointer. Increment | |||
1285 | // the unsafe use count to make sure it cannot reach zero. | |||
1286 | if (HasNonCallUses) | |||
1287 | ++NumUnsafeUses; | |||
1288 | for (DevirtCallSite Call : DevirtCalls) { | |||
1289 | CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, | |||
1290 | &NumUnsafeUses); | |||
1291 | } | |||
1292 | ||||
1293 | CI->eraseFromParent(); | |||
1294 | } | |||
1295 | } | |||
1296 | ||||
1297 | void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { | |||
1298 | const TypeIdSummary *TidSummary = | |||
1299 | ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString()); | |||
1300 | if (!TidSummary) | |||
1301 | return; | |||
1302 | auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); | |||
1303 | if (ResI == TidSummary->WPDRes.end()) | |||
1304 | return; | |||
1305 | const WholeProgramDevirtResolution &Res = ResI->second; | |||
1306 | ||||
1307 | if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { | |||
1308 | // The type of the function in the declaration is irrelevant because every | |||
1309 | // call site will cast it to the correct type. | |||
1310 | auto *SingleImpl = M.getOrInsertFunction( | |||
1311 | Res.SingleImplName, Type::getVoidTy(M.getContext())); | |||
1312 | ||||
1313 | // This is the import phase so we should not be exporting anything. | |||
1314 | bool IsExported = false; | |||
1315 | applySingleImplDevirt(SlotInfo, SingleImpl, IsExported); | |||
1316 | assert(!IsExported)(static_cast <bool> (!IsExported) ? void (0) : __assert_fail ("!IsExported", "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/IPO/WholeProgramDevirt.cpp" , 1316, __extension__ __PRETTY_FUNCTION__)); | |||
1317 | } | |||
1318 | ||||
1319 | for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) { | |||
1320 | auto I = Res.ResByArg.find(CSByConstantArg.first); | |||
1321 | if (I == Res.ResByArg.end()) | |||
1322 | continue; | |||
1323 | auto &ResByArg = I->second; | |||
1324 | // FIXME: We should figure out what to do about the "function name" argument | |||
1325 | // to the apply* functions, as the function names are unavailable during the | |||
1326 | // importing phase. For now we just pass the empty string. This does not | |||
1327 | // impact correctness because the function names are just used for remarks. | |||
1328 | switch (ResByArg.TheKind) { | |||
1329 | case WholeProgramDevirtResolution::ByArg::UniformRetVal: | |||
1330 | applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info); | |||
1331 | break; | |||
1332 | case WholeProgramDevirtResolution::ByArg::UniqueRetVal: { | |||
1333 | Constant *UniqueMemberAddr = | |||
1334 | importGlobal(Slot, CSByConstantArg.first, "unique_member"); | |||
1335 | applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info, | |||
1336 | UniqueMemberAddr); | |||
1337 | break; | |||
1338 | } | |||
1339 | case WholeProgramDevirtResolution::ByArg::VirtualConstProp: { | |||
1340 | Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte", | |||
1341 | Int32Ty, ResByArg.Byte); | |||
1342 | Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty, | |||
1343 | ResByArg.Bit); | |||
1344 | applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit); | |||
1345 | break; | |||
1346 | } | |||
1347 | default: | |||
1348 | break; | |||
1349 | } | |||
1350 | } | |||
1351 | } | |||
1352 | ||||
1353 | void DevirtModule::removeRedundantTypeTests() { | |||
1354 | auto True = ConstantInt::getTrue(M.getContext()); | |||
1355 | for (auto &&U : NumUnsafeUsesForTypeTest) { | |||
1356 | if (U.second == 0) { | |||
1357 | U.first->replaceAllUsesWith(True); | |||
1358 | U.first->eraseFromParent(); | |||
1359 | } | |||
1360 | } | |||
1361 | } | |||
1362 | ||||
1363 | bool DevirtModule::run() { | |||
1364 | Function *TypeTestFunc = | |||
1365 | M.getFunction(Intrinsic::getName(Intrinsic::type_test)); | |||
1366 | Function *TypeCheckedLoadFunc = | |||
1367 | M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); | |||
1368 | Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume)); | |||
1369 | ||||
1370 | // Normally if there are no users of the devirtualization intrinsics in the | |||
1371 | // module, this pass has nothing to do. But if we are exporting, we also need | |||
1372 | // to handle any users that appear only in the function summaries. | |||
1373 | if (!ExportSummary && | |||
| ||||
1374 | (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || | |||
1375 | AssumeFunc->use_empty()) && | |||
1376 | (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) | |||
1377 | return false; | |||
1378 | ||||
1379 | if (TypeTestFunc && AssumeFunc) | |||
1380 | scanTypeTestUsers(TypeTestFunc, AssumeFunc); | |||
1381 | ||||
1382 | if (TypeCheckedLoadFunc) | |||
1383 | scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); | |||
1384 | ||||
1385 | if (ImportSummary) { | |||
1386 | for (auto &S : CallSlots) | |||
1387 | importResolution(S.first, S.second); | |||
1388 | ||||
1389 | removeRedundantTypeTests(); | |||
1390 | ||||
1391 | // The rest of the code is only necessary when exporting or during regular | |||
1392 | // LTO, so we are done. | |||
1393 | return true; | |||
1394 | } | |||
1395 | ||||
1396 | // Rebuild type metadata into a map for easy lookup. | |||
1397 | std::vector<VTableBits> Bits; | |||
1398 | DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; | |||
1399 | buildTypeIdentifierMap(Bits, TypeIdMap); | |||
1400 | if (TypeIdMap.empty()) | |||
1401 | return true; | |||
1402 | ||||
1403 | // Collect information from summary about which calls to try to devirtualize. | |||
1404 | if (ExportSummary) { | |||
1405 | DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID; | |||
1406 | for (auto &P : TypeIdMap) { | |||
1407 | if (auto *TypeId = dyn_cast<MDString>(P.first)) | |||
1408 | MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back( | |||
1409 | TypeId); | |||
1410 | } | |||
1411 | ||||
1412 | for (auto &P : *ExportSummary) { | |||
1413 | for (auto &S : P.second.SummaryList) { | |||
1414 | auto *FS = dyn_cast<FunctionSummary>(S.get()); | |||
1415 | if (!FS) | |||
1416 | continue; | |||
1417 | // FIXME: Only add live functions. | |||
1418 | for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { | |||
1419 | for (Metadata *MD : MetadataByGUID[VF.GUID]) { | |||
1420 | CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers = | |||
1421 | true; | |||
1422 | } | |||
1423 | } | |||
1424 | for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { | |||
1425 | for (Metadata *MD : MetadataByGUID[VF.GUID]) { | |||
1426 | CallSlots[{MD, VF.Offset}] | |||
1427 | .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS); | |||
1428 | } | |||
1429 | } | |||
1430 | for (const FunctionSummary::ConstVCall &VC : | |||
1431 | FS->type_test_assume_const_vcalls()) { | |||
1432 | for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { | |||
1433 | CallSlots[{MD, VC.VFunc.Offset}] | |||
1434 | .ConstCSInfo[VC.Args] | |||
1435 | .SummaryHasTypeTestAssumeUsers = true; | |||
1436 | } | |||
1437 | } | |||
1438 | for (const FunctionSummary::ConstVCall &VC : | |||
1439 | FS->type_checked_load_const_vcalls()) { | |||
1440 | for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { | |||
1441 | CallSlots[{MD, VC.VFunc.Offset}] | |||
1442 | .ConstCSInfo[VC.Args] | |||
1443 | .SummaryTypeCheckedLoadUsers.push_back(FS); | |||
1444 | } | |||
1445 | } | |||
1446 | } | |||
1447 | } | |||
1448 | } | |||
1449 | ||||
1450 | // For each (type, offset) pair: | |||
1451 | bool DidVirtualConstProp = false; | |||
1452 | std::map<std::string, Function*> DevirtTargets; | |||
1453 | for (auto &S : CallSlots) { | |||
1454 | // Search each of the members of the type identifier for the virtual | |||
1455 | // function implementation at offset S.first.ByteOffset, and add to | |||
1456 | // TargetsForSlot. | |||
1457 | std::vector<VirtualCallTarget> TargetsForSlot; | |||
1458 | if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], | |||
1459 | S.first.ByteOffset)) { | |||
1460 | WholeProgramDevirtResolution *Res = nullptr; | |||
1461 | if (ExportSummary && isa<MDString>(S.first.TypeID)) | |||
1462 | Res = &ExportSummary | |||
1463 | ->getOrInsertTypeIdSummary( | |||
1464 | cast<MDString>(S.first.TypeID)->getString()) | |||
1465 | .WPDRes[S.first.ByteOffset]; | |||
1466 | ||||
1467 | if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) && | |||
1468 | tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first)) | |||
1469 | DidVirtualConstProp = true; | |||
1470 | ||||
1471 | // Collect functions devirtualized at least for one call site for stats. | |||
1472 | if (RemarksEnabled) | |||
1473 | for (const auto &T : TargetsForSlot) | |||
1474 | if (T.WasDevirt) | |||
1475 | DevirtTargets[T.Fn->getName()] = T.Fn; | |||
1476 | } | |||
1477 | ||||
1478 | // CFI-specific: if we are exporting and any llvm.type.checked.load | |||
1479 | // intrinsics were *not* devirtualized, we need to add the resulting | |||
1480 | // llvm.type.test intrinsics to the function summaries so that the | |||
1481 | // LowerTypeTests pass will export them. | |||
1482 | if (ExportSummary && isa<MDString>(S.first.TypeID)) { | |||
1483 | auto GUID = | |||
1484 | GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString()); | |||
1485 | for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers) | |||
1486 | FS->addTypeTest(GUID); | |||
1487 | for (auto &CCS : S.second.ConstCSInfo) | |||
1488 | for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers) | |||
1489 | FS->addTypeTest(GUID); | |||
1490 | } | |||
1491 | } | |||
1492 | ||||
1493 | if (RemarksEnabled) { | |||
1494 | // Generate remarks for each devirtualized function. | |||
1495 | for (const auto &DT : DevirtTargets) { | |||
1496 | Function *F = DT.second; | |||
1497 | ||||
1498 | using namespace ore; | |||
1499 | OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE"wholeprogramdevirt", "Devirtualized", F) | |||
1500 | << "devirtualized " | |||
1501 | << NV("FunctionName", F->getName())); | |||
1502 | } | |||
1503 | } | |||
1504 | ||||
1505 | removeRedundantTypeTests(); | |||
1506 | ||||
1507 | // Rebuild each global we touched as part of virtual constant propagation to | |||
1508 | // include the before and after bytes. | |||
1509 | if (DidVirtualConstProp) | |||
1510 | for (VTableBits &B : Bits) | |||
1511 | rebuildGlobal(B); | |||
1512 | ||||
1513 | return true; | |||
1514 | } |