LLVM 17.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/Dominators.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/InstrTypes.h"
78#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Intrinsics.h"
81#include "llvm/IR/LLVMContext.h"
82#include "llvm/IR/MDBuilder.h"
83#include "llvm/IR/Metadata.h"
84#include "llvm/IR/Module.h"
87#include "llvm/Pass.h"
88#include "llvm/PassRegistry.h"
91#include "llvm/Support/Errc.h"
92#include "llvm/Support/Error.h"
97#include "llvm/Transforms/IPO.h"
102#include <algorithm>
103#include <cstddef>
104#include <map>
105#include <set>
106#include <string>
107
108using namespace llvm;
109using namespace wholeprogramdevirt;
110
111#define DEBUG_TYPE "wholeprogramdevirt"
112
113STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
114STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
115STATISTIC(NumBranchFunnel, "Number of branch funnels");
116STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
117STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
118STATISTIC(NumVirtConstProp1Bit,
119 "Number of 1 bit virtual constant propagations");
120STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
121
123 "wholeprogramdevirt-summary-action",
124 cl::desc("What to do with the summary when running this pass"),
125 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
126 clEnumValN(PassSummaryAction::Import, "import",
127 "Import typeid resolutions from summary and globals"),
128 clEnumValN(PassSummaryAction::Export, "export",
129 "Export typeid resolutions to summary and globals")),
130 cl::Hidden);
131
133 "wholeprogramdevirt-read-summary",
134 cl::desc(
135 "Read summary from given bitcode or YAML file before running pass"),
136 cl::Hidden);
137
139 "wholeprogramdevirt-write-summary",
140 cl::desc("Write summary to given bitcode or YAML file after running pass. "
141 "Output file format is deduced from extension: *.bc means writing "
142 "bitcode, otherwise YAML"),
143 cl::Hidden);
144
146 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
147 cl::init(10),
148 cl::desc("Maximum number of call targets per "
149 "call site to enable branch funnels"));
150
151static cl::opt<bool>
152 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
153 cl::desc("Print index-based devirtualization messages"));
154
155/// Provide a way to force enable whole program visibility in tests.
156/// This is needed to support legacy tests that don't contain
157/// !vcall_visibility metadata (the mere presense of type tests
158/// previously implied hidden visibility).
159static cl::opt<bool>
160 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
161 cl::desc("Enable whole program visibility"));
162
163/// Provide a way to force disable whole program for debugging or workarounds,
164/// when enabled via the linker.
166 "disable-whole-program-visibility", cl::Hidden,
167 cl::desc("Disable whole program visibility (overrides enabling options)"));
168
169/// Provide way to prevent certain function from being devirtualized
171 SkipFunctionNames("wholeprogramdevirt-skip",
172 cl::desc("Prevent function(s) from being devirtualized"),
174
175/// Mechanism to add runtime checking of devirtualization decisions, optionally
176/// trapping or falling back to indirect call on any that are not correct.
177/// Trapping mode is useful for debugging undefined behavior leading to failures
178/// with WPD. Fallback mode is useful for ensuring safety when whole program
179/// visibility may be compromised.
182 "wholeprogramdevirt-check", cl::Hidden,
183 cl::desc("Type of checking for incorrect devirtualizations"),
184 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
185 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
187 "Fallback to indirect when incorrect")));
188
189namespace {
190struct PatternList {
191 std::vector<GlobPattern> Patterns;
192 template <class T> void init(const T &StringList) {
193 for (const auto &S : StringList)
195 Patterns.push_back(std::move(*Pat));
196 }
197 bool match(StringRef S) {
198 for (const GlobPattern &P : Patterns)
199 if (P.match(S))
200 return true;
201 return false;
202 }
203};
204} // namespace
205
206// Find the minimum offset that we may store a value of size Size bits at. If
207// IsAfter is set, look for an offset before the object, otherwise look for an
208// offset after the object.
211 bool IsAfter, uint64_t Size) {
212 // Find a minimum offset taking into account only vtable sizes.
213 uint64_t MinByte = 0;
214 for (const VirtualCallTarget &Target : Targets) {
215 if (IsAfter)
216 MinByte = std::max(MinByte, Target.minAfterBytes());
217 else
218 MinByte = std::max(MinByte, Target.minBeforeBytes());
219 }
220
221 // Build a vector of arrays of bytes covering, for each target, a slice of the
222 // used region (see AccumBitVector::BytesUsed in
223 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
224 // this aligns the used regions to start at MinByte.
225 //
226 // In this example, A, B and C are vtables, # is a byte already allocated for
227 // a virtual function pointer, AAAA... (etc.) are the used regions for the
228 // vtables and Offset(X) is the value computed for the Offset variable below
229 // for X.
230 //
231 // Offset(A)
232 // | |
233 // |MinByte
234 // A: ################AAAAAAAA|AAAAAAAA
235 // B: ########BBBBBBBBBBBBBBBB|BBBB
236 // C: ########################|CCCCCCCCCCCCCCCC
237 // | Offset(B) |
238 //
239 // This code produces the slices of A, B and C that appear after the divider
240 // at MinByte.
241 std::vector<ArrayRef<uint8_t>> Used;
242 for (const VirtualCallTarget &Target : Targets) {
243 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
244 : Target.TM->Bits->Before.BytesUsed;
245 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
246 : MinByte - Target.minBeforeBytes();
247
248 // Disregard used regions that are smaller than Offset. These are
249 // effectively all-free regions that do not need to be checked.
250 if (VTUsed.size() > Offset)
251 Used.push_back(VTUsed.slice(Offset));
252 }
253
254 if (Size == 1) {
255 // Find a free bit in each member of Used.
256 for (unsigned I = 0;; ++I) {
257 uint8_t BitsUsed = 0;
258 for (auto &&B : Used)
259 if (I < B.size())
260 BitsUsed |= B[I];
261 if (BitsUsed != 0xff)
262 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
263 }
264 } else {
265 // Find a free (Size/8) byte region in each member of Used.
266 // FIXME: see if alignment helps.
267 for (unsigned I = 0;; ++I) {
268 for (auto &&B : Used) {
269 unsigned Byte = 0;
270 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
271 if (B[I + Byte])
272 goto NextI;
273 ++Byte;
274 }
275 }
276 return (MinByte + I) * 8;
277 NextI:;
278 }
279 }
280}
281
284 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
285 if (BitWidth == 1)
286 OffsetByte = -(AllocBefore / 8 + 1);
287 else
288 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
289 OffsetBit = AllocBefore % 8;
290
292 if (BitWidth == 1)
293 Target.setBeforeBit(AllocBefore);
294 else
295 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
296 }
297}
298
301 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
302 if (BitWidth == 1)
303 OffsetByte = AllocAfter / 8;
304 else
305 OffsetByte = (AllocAfter + 7) / 8;
306 OffsetBit = AllocAfter % 8;
307
309 if (BitWidth == 1)
310 Target.setAfterBit(AllocAfter);
311 else
312 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
313 }
314}
315
317 : Fn(Fn), TM(TM),
318 IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()),
319 WasDevirt(false) {}
320
321namespace {
322
323// A slot in a set of virtual tables. The TypeID identifies the set of virtual
324// tables, and the ByteOffset is the offset in bytes from the address point to
325// the virtual function pointer.
326struct VTableSlot {
328 uint64_t ByteOffset;
329};
330
331} // end anonymous namespace
332
333namespace llvm {
334
335template <> struct DenseMapInfo<VTableSlot> {
336 static VTableSlot getEmptyKey() {
339 }
340 static VTableSlot getTombstoneKey() {
343 }
344 static unsigned getHashValue(const VTableSlot &I) {
347 }
348 static bool isEqual(const VTableSlot &LHS,
349 const VTableSlot &RHS) {
350 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
351 }
352};
353
354template <> struct DenseMapInfo<VTableSlotSummary> {
358 }
362 }
363 static unsigned getHashValue(const VTableSlotSummary &I) {
366 }
367 static bool isEqual(const VTableSlotSummary &LHS,
368 const VTableSlotSummary &RHS) {
369 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
370 }
371};
372
373} // end namespace llvm
374
375namespace {
376
377// Returns true if the function must be unreachable based on ValueInfo.
378//
379// In particular, identifies a function as unreachable in the following
380// conditions
381// 1) All summaries are live.
382// 2) All function summaries indicate it's unreachable
383// 3) There is no non-function with the same GUID (which is rare)
385 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
386 // Returns false if ValueInfo is absent, or the summary list is empty
387 // (e.g., function declarations).
388 return false;
389 }
390
391 for (const auto &Summary : TheFnVI.getSummaryList()) {
392 // Conservatively returns false if any non-live functions are seen.
393 // In general either all summaries should be live or all should be dead.
394 if (!Summary->isLive())
395 return false;
396 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
397 if (!FS->fflags().MustBeUnreachable)
398 return false;
399 }
400 // Be conservative if a non-function has the same GUID (which is rare).
401 else
402 return false;
403 }
404 // All function summaries are live and all of them agree that the function is
405 // unreachble.
406 return true;
407}
408
409// A virtual call site. VTable is the loaded virtual table pointer, and CS is
410// the indirect virtual call.
411struct VirtualCallSite {
412 Value *VTable = nullptr;
413 CallBase &CB;
414
415 // If non-null, this field points to the associated unsafe use count stored in
416 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
417 // of that field for details.
418 unsigned *NumUnsafeUses = nullptr;
419
420 void
421 emitRemark(const StringRef OptName, const StringRef TargetName,
423 Function *F = CB.getCaller();
424 DebugLoc DLoc = CB.getDebugLoc();
425 BasicBlock *Block = CB.getParent();
426
427 using namespace ore;
428 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
429 << NV("Optimization", OptName)
430 << ": devirtualized a call to "
431 << NV("FunctionName", TargetName));
432 }
433
434 void replaceAndErase(
435 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
437 Value *New) {
438 if (RemarksEnabled)
439 emitRemark(OptName, TargetName, OREGetter);
440 CB.replaceAllUsesWith(New);
441 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
442 BranchInst::Create(II->getNormalDest(), &CB);
443 II->getUnwindDest()->removePredecessor(II->getParent());
444 }
445 CB.eraseFromParent();
446 // This use is no longer unsafe.
447 if (NumUnsafeUses)
448 --*NumUnsafeUses;
449 }
450};
451
452// Call site information collected for a specific VTableSlot and possibly a list
453// of constant integer arguments. The grouping by arguments is handled by the
454// VTableSlotInfo class.
455struct CallSiteInfo {
456 /// The set of call sites for this slot. Used during regular LTO and the
457 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
458 /// call sites that appear in the merged module itself); in each of these
459 /// cases we are directly operating on the call sites at the IR level.
460 std::vector<VirtualCallSite> CallSites;
461
462 /// Whether all call sites represented by this CallSiteInfo, including those
463 /// in summaries, have been devirtualized. This starts off as true because a
464 /// default constructed CallSiteInfo represents no call sites.
465 bool AllCallSitesDevirted = true;
466
467 // These fields are used during the export phase of ThinLTO and reflect
468 // information collected from function summaries.
469
470 /// Whether any function summary contains an llvm.assume(llvm.type.test) for
471 /// this slot.
472 bool SummaryHasTypeTestAssumeUsers = false;
473
474 /// CFI-specific: a vector containing the list of function summaries that use
475 /// the llvm.type.checked.load intrinsic and therefore will require
476 /// resolutions for llvm.type.test in order to implement CFI checks if
477 /// devirtualization was unsuccessful. If devirtualization was successful, the
478 /// pass will clear this vector by calling markDevirt(). If at the end of the
479 /// pass the vector is non-empty, we will need to add a use of llvm.type.test
480 /// to each of the function summaries in the vector.
481 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
482 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
483
484 bool isExported() const {
485 return SummaryHasTypeTestAssumeUsers ||
486 !SummaryTypeCheckedLoadUsers.empty();
487 }
488
489 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
490 SummaryTypeCheckedLoadUsers.push_back(FS);
491 AllCallSitesDevirted = false;
492 }
493
494 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
495 SummaryTypeTestAssumeUsers.push_back(FS);
496 SummaryHasTypeTestAssumeUsers = true;
497 AllCallSitesDevirted = false;
498 }
499
500 void markDevirt() {
501 AllCallSitesDevirted = true;
502
503 // As explained in the comment for SummaryTypeCheckedLoadUsers.
504 SummaryTypeCheckedLoadUsers.clear();
505 }
506};
507
508// Call site information collected for a specific VTableSlot.
509struct VTableSlotInfo {
510 // The set of call sites which do not have all constant integer arguments
511 // (excluding "this").
512 CallSiteInfo CSInfo;
513
514 // The set of call sites with all constant integer arguments (excluding
515 // "this"), grouped by argument list.
516 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
517
518 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
519
520private:
521 CallSiteInfo &findCallSiteInfo(CallBase &CB);
522};
523
524CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
525 std::vector<uint64_t> Args;
526 auto *CBType = dyn_cast<IntegerType>(CB.getType());
527 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
528 return CSInfo;
529 for (auto &&Arg : drop_begin(CB.args())) {
530 auto *CI = dyn_cast<ConstantInt>(Arg);
531 if (!CI || CI->getBitWidth() > 64)
532 return CSInfo;
533 Args.push_back(CI->getZExtValue());
534 }
535 return ConstCSInfo[Args];
536}
537
538void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
539 unsigned *NumUnsafeUses) {
540 auto &CSI = findCallSiteInfo(CB);
541 CSI.AllCallSitesDevirted = false;
542 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
543}
544
545struct DevirtModule {
546 Module &M;
547 function_ref<AAResults &(Function &)> AARGetter;
548 function_ref<DominatorTree &(Function &)> LookupDomTree;
549
550 ModuleSummaryIndex *ExportSummary;
551 const ModuleSummaryIndex *ImportSummary;
552
553 IntegerType *Int8Ty;
554 PointerType *Int8PtrTy;
556 IntegerType *Int64Ty;
557 IntegerType *IntPtrTy;
558 /// Sizeless array type, used for imported vtables. This provides a signal
559 /// to analyzers that these imports may alias, as they do for example
560 /// when multiple unique return values occur in the same vtable.
561 ArrayType *Int8Arr0Ty;
562
563 bool RemarksEnabled;
565
567
568 // Calls that have already been optimized. We may add a call to multiple
569 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
570 // optimize a call more than once.
571 SmallPtrSet<CallBase *, 8> OptimizedCalls;
572
573 // This map keeps track of the number of "unsafe" uses of a loaded function
574 // pointer. The key is the associated llvm.type.test intrinsic call generated
575 // by this pass. An unsafe use is one that calls the loaded function pointer
576 // directly. Every time we eliminate an unsafe use (for example, by
577 // devirtualizing it or by applying virtual constant propagation), we
578 // decrement the value stored in this map. If a value reaches zero, we can
579 // eliminate the type check by RAUWing the associated llvm.type.test call with
580 // true.
581 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
582 PatternList FunctionsToSkip;
583
584 DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
586 function_ref<DominatorTree &(Function &)> LookupDomTree,
587 ModuleSummaryIndex *ExportSummary,
588 const ModuleSummaryIndex *ImportSummary)
589 : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
590 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
591 Int8Ty(Type::getInt8Ty(M.getContext())),
592 Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
593 Int32Ty(Type::getInt32Ty(M.getContext())),
594 Int64Ty(Type::getInt64Ty(M.getContext())),
595 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
596 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
597 RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
598 assert(!(ExportSummary && ImportSummary));
599 FunctionsToSkip.init(SkipFunctionNames);
600 }
601
602 bool areRemarksEnabled();
603
604 void
605 scanTypeTestUsers(Function *TypeTestFunc,
606 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
607 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
608
609 void buildTypeIdentifierMap(
610 std::vector<VTableBits> &Bits,
611 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
612
613 bool
614 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
615 const std::set<TypeMemberInfo> &TypeMemberInfos,
616 uint64_t ByteOffset,
617 ModuleSummaryIndex *ExportSummary);
618
619 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
620 bool &IsExported);
621 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
623 VTableSlotInfo &SlotInfo,
625
626 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
627 bool &IsExported);
628 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
629 VTableSlotInfo &SlotInfo,
630 WholeProgramDevirtResolution *Res, VTableSlot Slot);
631
632 bool tryEvaluateFunctionsWithArgs(
634 ArrayRef<uint64_t> Args);
635
636 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
637 uint64_t TheRetVal);
638 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
639 CallSiteInfo &CSInfo,
641
642 // Returns the global symbol name that is used to export information about the
643 // given vtable slot and list of arguments.
644 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
646
647 bool shouldExportConstantsAsAbsoluteSymbols();
648
649 // This function is called during the export phase to create a symbol
650 // definition containing information about the given vtable slot and list of
651 // arguments.
652 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
653 Constant *C);
654 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
655 uint32_t Const, uint32_t &Storage);
656
657 // This function is called during the import phase to create a reference to
658 // the symbol definition created during the export phase.
659 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
661 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
662 StringRef Name, IntegerType *IntTy,
663 uint32_t Storage);
664
665 Constant *getMemberAddr(const TypeMemberInfo *M);
666
667 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
668 Constant *UniqueMemberAddr);
669 bool tryUniqueRetValOpt(unsigned BitWidth,
671 CallSiteInfo &CSInfo,
673 VTableSlot Slot, ArrayRef<uint64_t> Args);
674
675 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
676 Constant *Byte, Constant *Bit);
677 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
678 VTableSlotInfo &SlotInfo,
679 WholeProgramDevirtResolution *Res, VTableSlot Slot);
680
681 void rebuildGlobal(VTableBits &B);
682
683 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
684 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
685
686 // If we were able to eliminate all unsafe uses for a type checked load,
687 // eliminate the associated type tests by replacing them with true.
688 void removeRedundantTypeTests();
689
690 bool run();
691
692 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
693 //
694 // Caller guarantees that `ExportSummary` is not nullptr.
695 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
696 ModuleSummaryIndex *ExportSummary);
697
698 // Returns true if the function definition must be unreachable.
699 //
700 // Note if this helper function returns true, `F` is guaranteed
701 // to be unreachable; if it returns false, `F` might still
702 // be unreachable but not covered by this helper function.
703 //
704 // Implementation-wise, if function definition is present, IR is analyzed; if
705 // not, look up function flags from ExportSummary as a fallback.
706 static bool mustBeUnreachableFunction(Function *const F,
707 ModuleSummaryIndex *ExportSummary);
708
709 // Lower the module using the action and summary passed as command line
710 // arguments. For testing purposes only.
711 static bool
712 runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
714 function_ref<DominatorTree &(Function &)> LookupDomTree);
715};
716
717struct DevirtIndex {
718 ModuleSummaryIndex &ExportSummary;
719 // The set in which to record GUIDs exported from their module by
720 // devirtualization, used by client to ensure they are not internalized.
721 std::set<GlobalValue::GUID> &ExportedGUIDs;
722 // A map in which to record the information necessary to locate the WPD
723 // resolution for local targets in case they are exported by cross module
724 // importing.
725 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
726
728
729 PatternList FunctionsToSkip;
730
731 DevirtIndex(
732 ModuleSummaryIndex &ExportSummary,
733 std::set<GlobalValue::GUID> &ExportedGUIDs,
734 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
735 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
736 LocalWPDTargetsMap(LocalWPDTargetsMap) {
737 FunctionsToSkip.init(SkipFunctionNames);
738 }
739
740 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
741 const TypeIdCompatibleVtableInfo TIdInfo,
742 uint64_t ByteOffset);
743
744 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
745 VTableSlotSummary &SlotSummary,
746 VTableSlotInfo &SlotInfo,
748 std::set<ValueInfo> &DevirtTargets);
749
750 void run();
751};
752} // end anonymous namespace
753
756 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
757 auto AARGetter = [&](Function &F) -> AAResults & {
758 return FAM.getResult<AAManager>(F);
759 };
760 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
762 };
763 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
765 };
766 if (UseCommandLine) {
767 if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
768 return PreservedAnalyses::all();
770 }
771 if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
772 ImportSummary)
773 .run())
774 return PreservedAnalyses::all();
776}
777
778namespace llvm {
779// Enable whole program visibility if enabled by client (e.g. linker) or
780// internal option, and not force disabled.
781bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
782 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
784}
785
786/// If whole program visibility asserted, then upgrade all public vcall
787/// visibility metadata on vtable definitions to linkage unit visibility in
788/// Module IR (for regular or hybrid LTO).
790 Module &M, bool WholeProgramVisibilityEnabledInLTO,
791 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
792 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
793 return;
794 for (GlobalVariable &GV : M.globals()) {
795 // Add linkage unit visibility to any variable with type metadata, which are
796 // the vtable definitions. We won't have an existing vcall_visibility
797 // metadata on vtable definitions with public visibility.
798 if (GV.hasMetadata(LLVMContext::MD_type) &&
799 GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic &&
800 // Don't upgrade the visibility for symbols exported to the dynamic
801 // linker, as we have no information on their eventual use.
802 !DynamicExportSymbols.count(GV.getGUID()))
803 GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
804 }
805}
806
808 bool WholeProgramVisibilityEnabledInLTO) {
809 Function *PublicTypeTestFunc =
810 M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
811 if (!PublicTypeTestFunc)
812 return;
813 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
814 Function *TypeTestFunc =
815 Intrinsic::getDeclaration(&M, Intrinsic::type_test);
816 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
817 auto *CI = cast<CallInst>(U.getUser());
818 auto *NewCI = CallInst::Create(
819 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)},
820 std::nullopt, "", CI);
821 CI->replaceAllUsesWith(NewCI);
822 CI->eraseFromParent();
823 }
824 } else {
825 auto *True = ConstantInt::getTrue(M.getContext());
826 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
827 auto *CI = cast<CallInst>(U.getUser());
828 CI->replaceAllUsesWith(True);
829 CI->eraseFromParent();
830 }
831 }
832}
833
834/// If whole program visibility asserted, then upgrade all public vcall
835/// visibility metadata on vtable definition summaries to linkage unit
836/// visibility in Module summary index (for ThinLTO).
838 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
839 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
840 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
841 return;
842 for (auto &P : Index) {
843 // Don't upgrade the visibility for symbols exported to the dynamic
844 // linker, as we have no information on their eventual use.
845 if (DynamicExportSymbols.count(P.first))
846 continue;
847 for (auto &S : P.second.SummaryList) {
848 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
849 if (!GVar ||
850 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
851 continue;
852 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
853 }
854 }
855}
856
858 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
859 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
860 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
861}
862
864 ModuleSummaryIndex &Summary,
865 function_ref<bool(StringRef, ValueInfo)> isExported,
866 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
867 for (auto &T : LocalWPDTargetsMap) {
868 auto &VI = T.first;
869 // This was enforced earlier during trySingleImplDevirt.
870 assert(VI.getSummaryList().size() == 1 &&
871 "Devirt of local target has more than one copy");
872 auto &S = VI.getSummaryList()[0];
873 if (!isExported(S->modulePath(), VI))
874 continue;
875
876 // It's been exported by a cross module import.
877 for (auto &SlotSummary : T.second) {
878 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
879 assert(TIdSum);
880 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
881 assert(WPDRes != TIdSum->WPDRes.end());
882 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
883 WPDRes->second.SingleImplName,
884 Summary.getModuleHash(S->modulePath()));
885 }
886 }
887}
888
889} // end namespace llvm
890
892 // Check that summary index contains regular LTO module when performing
893 // export to prevent occasional use of index from pure ThinLTO compilation
894 // (-fno-split-lto-module). This kind of summary index is passed to
895 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
896 const auto &ModPaths = Summary->modulePaths();
899 return createStringError(
901 "combined summary should contain Regular LTO module");
902 return ErrorSuccess();
903}
904
905bool DevirtModule::runForTesting(
906 Module &M, function_ref<AAResults &(Function &)> AARGetter,
908 function_ref<DominatorTree &(Function &)> LookupDomTree) {
909 std::unique_ptr<ModuleSummaryIndex> Summary =
910 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
911
912 // Handle the command-line summary arguments. This code is for testing
913 // purposes only, so we handle errors directly.
914 if (!ClReadSummary.empty()) {
915 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
916 ": ");
917 auto ReadSummaryFile =
919 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
920 getModuleSummaryIndex(*ReadSummaryFile)) {
921 Summary = std::move(*SummaryOrErr);
922 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
923 } else {
924 // Try YAML if we've failed with bitcode.
925 consumeError(SummaryOrErr.takeError());
926 yaml::Input In(ReadSummaryFile->getBuffer());
927 In >> *Summary;
928 ExitOnErr(errorCodeToError(In.error()));
929 }
930 }
931
932 bool Changed =
933 DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
935 : nullptr,
937 : nullptr)
938 .run();
939
940 if (!ClWriteSummary.empty()) {
941 ExitOnError ExitOnErr(
942 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
943 std::error_code EC;
944 if (StringRef(ClWriteSummary).endswith(".bc")) {
946 ExitOnErr(errorCodeToError(EC));
947 writeIndexToFile(*Summary, OS);
948 } else {
950 ExitOnErr(errorCodeToError(EC));
951 yaml::Output Out(OS);
952 Out << *Summary;
953 }
954 }
955
956 return Changed;
957}
958
959void DevirtModule::buildTypeIdentifierMap(
960 std::vector<VTableBits> &Bits,
961 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
963 Bits.reserve(M.global_size());
965 for (GlobalVariable &GV : M.globals()) {
966 Types.clear();
967 GV.getMetadata(LLVMContext::MD_type, Types);
968 if (GV.isDeclaration() || Types.empty())
969 continue;
970
971 VTableBits *&BitsPtr = GVToBits[&GV];
972 if (!BitsPtr) {
973 Bits.emplace_back();
974 Bits.back().GV = &GV;
975 Bits.back().ObjectSize =
976 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
977 BitsPtr = &Bits.back();
978 }
979
980 for (MDNode *Type : Types) {
981 auto TypeID = Type->getOperand(1).get();
982
984 cast<ConstantInt>(
985 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
986 ->getZExtValue();
987
988 TypeIdMap[TypeID].insert({BitsPtr, Offset});
989 }
990 }
991}
992
993bool DevirtModule::tryFindVirtualCallTargets(
994 std::vector<VirtualCallTarget> &TargetsForSlot,
995 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
996 ModuleSummaryIndex *ExportSummary) {
997 for (const TypeMemberInfo &TM : TypeMemberInfos) {
998 if (!TM.Bits->GV->isConstant())
999 return false;
1000
1001 // We cannot perform whole program devirtualization analysis on a vtable
1002 // with public LTO visibility.
1003 if (TM.Bits->GV->getVCallVisibility() ==
1005 return false;
1006
1007 Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
1008 TM.Offset + ByteOffset, M);
1009 if (!Ptr)
1010 return false;
1011
1012 auto C = Ptr->stripPointerCasts();
1013 // Make sure this is a function or alias to a function.
1014 auto Fn = dyn_cast<Function>(C);
1015 auto A = dyn_cast<GlobalAlias>(C);
1016 if (!Fn && A)
1017 Fn = dyn_cast<Function>(A->getAliasee());
1018
1019 if (!Fn)
1020 return false;
1021
1022 if (FunctionsToSkip.match(Fn->getName()))
1023 return false;
1024
1025 // We can disregard __cxa_pure_virtual as a possible call target, as
1026 // calls to pure virtuals are UB.
1027 if (Fn->getName() == "__cxa_pure_virtual")
1028 continue;
1029
1030 // We can disregard unreachable functions as possible call targets, as
1031 // unreachable functions shouldn't be called.
1032 if (mustBeUnreachableFunction(Fn, ExportSummary))
1033 continue;
1034
1035 // Save the symbol used in the vtable to use as the devirtualization
1036 // target.
1037 auto GV = dyn_cast<GlobalValue>(C);
1038 assert(GV);
1039 TargetsForSlot.push_back({GV, &TM});
1040 }
1041
1042 // Give up if we couldn't find any targets.
1043 return !TargetsForSlot.empty();
1044}
1045
1046bool DevirtIndex::tryFindVirtualCallTargets(
1047 std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
1048 uint64_t ByteOffset) {
1049 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1050 // Find a representative copy of the vtable initializer.
1051 // We can have multiple available_externally, linkonce_odr and weak_odr
1052 // vtable initializers. We can also have multiple external vtable
1053 // initializers in the case of comdats, which we cannot check here.
1054 // The linker should give an error in this case.
1055 //
1056 // Also, handle the case of same-named local Vtables with the same path
1057 // and therefore the same GUID. This can happen if there isn't enough
1058 // distinguishing path when compiling the source file. In that case we
1059 // conservatively return false early.
1060 const GlobalVarSummary *VS = nullptr;
1061 bool LocalFound = false;
1062 for (const auto &S : P.VTableVI.getSummaryList()) {
1063 if (GlobalValue::isLocalLinkage(S->linkage())) {
1064 if (LocalFound)
1065 return false;
1066 LocalFound = true;
1067 }
1068 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1069 if (!CurVS->vTableFuncs().empty() ||
1070 // Previously clang did not attach the necessary type metadata to
1071 // available_externally vtables, in which case there would not
1072 // be any vtable functions listed in the summary and we need
1073 // to treat this case conservatively (in case the bitcode is old).
1074 // However, we will also not have any vtable functions in the
1075 // case of a pure virtual base class. In that case we do want
1076 // to set VS to avoid treating it conservatively.
1078 VS = CurVS;
1079 // We cannot perform whole program devirtualization analysis on a vtable
1080 // with public LTO visibility.
1081 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1082 return false;
1083 }
1084 }
1085 // There will be no VS if all copies are available_externally having no
1086 // type metadata. In that case we can't safely perform WPD.
1087 if (!VS)
1088 return false;
1089 if (!VS->isLive())
1090 continue;
1091 for (auto VTP : VS->vTableFuncs()) {
1092 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1093 continue;
1094
1095 if (mustBeUnreachableFunction(VTP.FuncVI))
1096 continue;
1097
1098 TargetsForSlot.push_back(VTP.FuncVI);
1099 }
1100 }
1101
1102 // Give up if we couldn't find any targets.
1103 return !TargetsForSlot.empty();
1104}
1105
1106void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1107 Constant *TheFn, bool &IsExported) {
1108 // Don't devirtualize function if we're told to skip it
1109 // in -wholeprogramdevirt-skip.
1110 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1111 return;
1112 auto Apply = [&](CallSiteInfo &CSInfo) {
1113 for (auto &&VCallSite : CSInfo.CallSites) {
1114 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1115 continue;
1116
1117 if (RemarksEnabled)
1118 VCallSite.emitRemark("single-impl",
1119 TheFn->stripPointerCasts()->getName(), OREGetter);
1120 NumSingleImpl++;
1121 auto &CB = VCallSite.CB;
1122 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1123 IRBuilder<> Builder(&CB);
1124 Value *Callee =
1125 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1126
1127 // If trap checking is enabled, add support to compare the virtual
1128 // function pointer to the devirtualized target. In case of a mismatch,
1129 // perform a debug trap.
1130 if (DevirtCheckMode == WPDCheckMode::Trap) {
1131 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1132 Instruction *ThenTerm =
1133 SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
1134 Builder.SetInsertPoint(ThenTerm);
1135 Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);
1136 auto *CallTrap = Builder.CreateCall(TrapFn);
1137 CallTrap->setDebugLoc(CB.getDebugLoc());
1138 }
1139
1140 // If fallback checking is enabled, add support to compare the virtual
1141 // function pointer to the devirtualized target. In case of a mismatch,
1142 // fall back to indirect call.
1143 if (DevirtCheckMode == WPDCheckMode::Fallback) {
1144 MDNode *Weights =
1145 MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
1146 // Version the indirect call site. If the called value is equal to the
1147 // given callee, 'NewInst' will be executed, otherwise the original call
1148 // site will be executed.
1149 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1150 NewInst.setCalledOperand(Callee);
1151 // Since the new call site is direct, we must clear metadata that
1152 // is only appropriate for indirect calls. This includes !prof and
1153 // !callees metadata.
1154 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1155 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1156 // Additionally, we should remove them from the fallback indirect call,
1157 // so that we don't attempt to perform indirect call promotion later.
1158 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1159 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1160 }
1161
1162 // In either trapping or non-checking mode, devirtualize original call.
1163 else {
1164 // Devirtualize unconditionally.
1166 // Since the call site is now direct, we must clear metadata that
1167 // is only appropriate for indirect calls. This includes !prof and
1168 // !callees metadata.
1169 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1170 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1171 }
1172
1173 // This use is no longer unsafe.
1174 if (VCallSite.NumUnsafeUses)
1175 --*VCallSite.NumUnsafeUses;
1176 }
1177 if (CSInfo.isExported())
1178 IsExported = true;
1179 CSInfo.markDevirt();
1180 };
1181 Apply(SlotInfo.CSInfo);
1182 for (auto &P : SlotInfo.ConstCSInfo)
1183 Apply(P.second);
1184}
1185
1186static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1187 // We can't add calls if we haven't seen a definition
1188 if (Callee.getSummaryList().empty())
1189 return false;
1190
1191 // Insert calls into the summary index so that the devirtualized targets
1192 // are eligible for import.
1193 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1194 // to better ensure we have the opportunity to inline them.
1195 bool IsExported = false;
1196 auto &S = Callee.getSummaryList()[0];
1197 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
1198 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1199 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1200 FS->addCall({Callee, CI});
1201 IsExported |= S->modulePath() != FS->modulePath();
1202 }
1203 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1204 FS->addCall({Callee, CI});
1205 IsExported |= S->modulePath() != FS->modulePath();
1206 }
1207 };
1208 AddCalls(SlotInfo.CSInfo);
1209 for (auto &P : SlotInfo.ConstCSInfo)
1210 AddCalls(P.second);
1211 return IsExported;
1212}
1213
1214bool DevirtModule::trySingleImplDevirt(
1215 ModuleSummaryIndex *ExportSummary,
1216 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1218 // See if the program contains a single implementation of this virtual
1219 // function.
1220 auto *TheFn = TargetsForSlot[0].Fn;
1221 for (auto &&Target : TargetsForSlot)
1222 if (TheFn != Target.Fn)
1223 return false;
1224
1225 // If so, update each call site to call that implementation directly.
1226 if (RemarksEnabled || AreStatisticsEnabled())
1227 TargetsForSlot[0].WasDevirt = true;
1228
1229 bool IsExported = false;
1230 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1231 if (!IsExported)
1232 return false;
1233
1234 // If the only implementation has local linkage, we must promote to external
1235 // to make it visible to thin LTO objects. We can only get here during the
1236 // ThinLTO export phase.
1237 if (TheFn->hasLocalLinkage()) {
1238 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1239
1240 // Since we are renaming the function, any comdats with the same name must
1241 // also be renamed. This is required when targeting COFF, as the comdat name
1242 // must match one of the names of the symbols in the comdat.
1243 if (Comdat *C = TheFn->getComdat()) {
1244 if (C->getName() == TheFn->getName()) {
1245 Comdat *NewC = M.getOrInsertComdat(NewName);
1246 NewC->setSelectionKind(C->getSelectionKind());
1247 for (GlobalObject &GO : M.global_objects())
1248 if (GO.getComdat() == C)
1249 GO.setComdat(NewC);
1250 }
1251 }
1252
1253 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1254 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1255 TheFn->setName(NewName);
1256 }
1257 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1258 // Any needed promotion of 'TheFn' has already been done during
1259 // LTO unit split, so we can ignore return value of AddCalls.
1260 AddCalls(SlotInfo, TheFnVI);
1261
1263 Res->SingleImplName = std::string(TheFn->getName());
1264
1265 return true;
1266}
1267
1268bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1269 VTableSlotSummary &SlotSummary,
1270 VTableSlotInfo &SlotInfo,
1272 std::set<ValueInfo> &DevirtTargets) {
1273 // See if the program contains a single implementation of this virtual
1274 // function.
1275 auto TheFn = TargetsForSlot[0];
1276 for (auto &&Target : TargetsForSlot)
1277 if (TheFn != Target)
1278 return false;
1279
1280 // Don't devirtualize if we don't have target definition.
1281 auto Size = TheFn.getSummaryList().size();
1282 if (!Size)
1283 return false;
1284
1285 // Don't devirtualize function if we're told to skip it
1286 // in -wholeprogramdevirt-skip.
1287 if (FunctionsToSkip.match(TheFn.name()))
1288 return false;
1289
1290 // If the summary list contains multiple summaries where at least one is
1291 // a local, give up, as we won't know which (possibly promoted) name to use.
1292 for (const auto &S : TheFn.getSummaryList())
1293 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1294 return false;
1295
1296 // Collect functions devirtualized at least for one call site for stats.
1298 DevirtTargets.insert(TheFn);
1299
1300 auto &S = TheFn.getSummaryList()[0];
1301 bool IsExported = AddCalls(SlotInfo, TheFn);
1302 if (IsExported)
1303 ExportedGUIDs.insert(TheFn.getGUID());
1304
1305 // Record in summary for use in devirtualization during the ThinLTO import
1306 // step.
1308 if (GlobalValue::isLocalLinkage(S->linkage())) {
1309 if (IsExported)
1310 // If target is a local function and we are exporting it by
1311 // devirtualizing a call in another module, we need to record the
1312 // promoted name.
1314 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1315 else {
1316 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1317 Res->SingleImplName = std::string(TheFn.name());
1318 }
1319 } else
1320 Res->SingleImplName = std::string(TheFn.name());
1321
1322 // Name will be empty if this thin link driven off of serialized combined
1323 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1324 // legacy LTO API anyway.
1325 assert(!Res->SingleImplName.empty());
1326
1327 return true;
1328}
1329
1330void DevirtModule::tryICallBranchFunnel(
1331 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1332 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1333 Triple T(M.getTargetTriple());
1334 if (T.getArch() != Triple::x86_64)
1335 return;
1336
1337 if (TargetsForSlot.size() > ClThreshold)
1338 return;
1339
1340 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1341 if (!HasNonDevirt)
1342 for (auto &P : SlotInfo.ConstCSInfo)
1343 if (!P.second.AllCallSitesDevirted) {
1344 HasNonDevirt = true;
1345 break;
1346 }
1347
1348 if (!HasNonDevirt)
1349 return;
1350
1351 FunctionType *FT =
1352 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1353 Function *JT;
1354 if (isa<MDString>(Slot.TypeID)) {
1356 M.getDataLayout().getProgramAddressSpace(),
1357 getGlobalName(Slot, {}, "branch_funnel"), &M);
1358 JT->setVisibility(GlobalValue::HiddenVisibility);
1359 } else {
1361 M.getDataLayout().getProgramAddressSpace(),
1362 "branch_funnel", &M);
1363 }
1364 JT->addParamAttr(0, Attribute::Nest);
1365
1366 std::vector<Value *> JTArgs;
1367 JTArgs.push_back(JT->arg_begin());
1368 for (auto &T : TargetsForSlot) {
1369 JTArgs.push_back(getMemberAddr(T.TM));
1370 JTArgs.push_back(T.Fn);
1371 }
1372
1373 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1374 Function *Intr =
1375 Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
1376
1377 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1378 CI->setTailCallKind(CallInst::TCK_MustTail);
1379 ReturnInst::Create(M.getContext(), nullptr, BB);
1380
1381 bool IsExported = false;
1382 applyICallBranchFunnel(SlotInfo, JT, IsExported);
1383 if (IsExported)
1385}
1386
1387void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1388 Constant *JT, bool &IsExported) {
1389 auto Apply = [&](CallSiteInfo &CSInfo) {
1390 if (CSInfo.isExported())
1391 IsExported = true;
1392 if (CSInfo.AllCallSitesDevirted)
1393 return;
1394
1395 std::map<CallBase *, CallBase *> CallBases;
1396 for (auto &&VCallSite : CSInfo.CallSites) {
1397 CallBase &CB = VCallSite.CB;
1398
1399 if (CallBases.find(&CB) != CallBases.end()) {
1400 // When finding devirtualizable calls, it's possible to find the same
1401 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1402 // calls, which can cause duplicate call sites to be recorded in
1403 // [Const]CallSites. If we've already found one of these
1404 // call instances, just ignore it. It will be replaced later.
1405 continue;
1406 }
1407
1408 // Jump tables are only profitable if the retpoline mitigation is enabled.
1409 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1410 if (!FSAttr.isValid() ||
1411 !FSAttr.getValueAsString().contains("+retpoline"))
1412 continue;
1413
1414 NumBranchFunnel++;
1415 if (RemarksEnabled)
1416 VCallSite.emitRemark("branch-funnel",
1417 JT->stripPointerCasts()->getName(), OREGetter);
1418
1419 // Pass the address of the vtable in the nest register, which is r10 on
1420 // x86_64.
1421 std::vector<Type *> NewArgs;
1422 NewArgs.push_back(Int8PtrTy);
1423 append_range(NewArgs, CB.getFunctionType()->params());
1424 FunctionType *NewFT =
1426 CB.getFunctionType()->isVarArg());
1427 PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
1428
1429 IRBuilder<> IRB(&CB);
1430 std::vector<Value *> Args;
1431 Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
1432 llvm::append_range(Args, CB.args());
1433
1434 CallBase *NewCS = nullptr;
1435 if (isa<CallInst>(CB))
1436 NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
1437 else
1438 NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
1439 cast<InvokeInst>(CB).getNormalDest(),
1440 cast<InvokeInst>(CB).getUnwindDest(), Args);
1441 NewCS->setCallingConv(CB.getCallingConv());
1442
1444 std::vector<AttributeSet> NewArgAttrs;
1445 NewArgAttrs.push_back(AttributeSet::get(
1446 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1447 M.getContext(), Attribute::Nest)}));
1448 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1449 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1450 NewCS->setAttributes(
1451 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1452 Attrs.getRetAttrs(), NewArgAttrs));
1453
1454 CallBases[&CB] = NewCS;
1455
1456 // This use is no longer unsafe.
1457 if (VCallSite.NumUnsafeUses)
1458 --*VCallSite.NumUnsafeUses;
1459 }
1460 // Don't mark as devirtualized because there may be callers compiled without
1461 // retpoline mitigation, which would mean that they are lowered to
1462 // llvm.type.test and therefore require an llvm.type.test resolution for the
1463 // type identifier.
1464
1465 std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) {
1466 CBs.first->replaceAllUsesWith(CBs.second);
1467 CBs.first->eraseFromParent();
1468 });
1469 };
1470 Apply(SlotInfo.CSInfo);
1471 for (auto &P : SlotInfo.ConstCSInfo)
1472 Apply(P.second);
1473}
1474
1475bool DevirtModule::tryEvaluateFunctionsWithArgs(
1477 ArrayRef<uint64_t> Args) {
1478 // Evaluate each function and store the result in each target's RetVal
1479 // field.
1480 for (VirtualCallTarget &Target : TargetsForSlot) {
1481 // TODO: Skip for now if the vtable symbol was an alias to a function,
1482 // need to evaluate whether it would be correct to analyze the aliasee
1483 // function for this optimization.
1484 auto Fn = dyn_cast<Function>(Target.Fn);
1485 if (!Fn)
1486 return false;
1487
1488 if (Fn->arg_size() != Args.size() + 1)
1489 return false;
1490
1491 Evaluator Eval(M.getDataLayout(), nullptr);
1493 EvalArgs.push_back(
1494 Constant::getNullValue(Fn->getFunctionType()->getParamType(0)));
1495 for (unsigned I = 0; I != Args.size(); ++I) {
1496 auto *ArgTy =
1497 dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
1498 if (!ArgTy)
1499 return false;
1500 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1501 }
1502
1503 Constant *RetVal;
1504 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1505 !isa<ConstantInt>(RetVal))
1506 return false;
1507 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1508 }
1509 return true;
1510}
1511
1512void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1513 uint64_t TheRetVal) {
1514 for (auto Call : CSInfo.CallSites) {
1515 if (!OptimizedCalls.insert(&Call.CB).second)
1516 continue;
1517 NumUniformRetVal++;
1518 Call.replaceAndErase(
1519 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1520 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1521 }
1522 CSInfo.markDevirt();
1523}
1524
1525bool DevirtModule::tryUniformRetValOpt(
1526 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1528 // Uniform return value optimization. If all functions return the same
1529 // constant, replace all calls with that constant.
1530 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1531 for (const VirtualCallTarget &Target : TargetsForSlot)
1532 if (Target.RetVal != TheRetVal)
1533 return false;
1534
1535 if (CSInfo.isExported()) {
1537 Res->Info = TheRetVal;
1538 }
1539
1540 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1541 if (RemarksEnabled || AreStatisticsEnabled())
1542 for (auto &&Target : TargetsForSlot)
1543 Target.WasDevirt = true;
1544 return true;
1545}
1546
1547std::string DevirtModule::getGlobalName(VTableSlot Slot,
1548 ArrayRef<uint64_t> Args,
1549 StringRef Name) {
1550 std::string FullName = "__typeid_";
1551 raw_string_ostream OS(FullName);
1552 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1553 for (uint64_t Arg : Args)
1554 OS << '_' << Arg;
1555 OS << '_' << Name;
1556 return OS.str();
1557}
1558
1559bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1560 Triple T(M.getTargetTriple());
1561 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1562}
1563
1564void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1567 getGlobalName(Slot, Args, Name), C, &M);
1569}
1570
1571void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1572 StringRef Name, uint32_t Const,
1573 uint32_t &Storage) {
1574 if (shouldExportConstantsAsAbsoluteSymbols()) {
1575 exportGlobal(
1576 Slot, Args, Name,
1578 return;
1579 }
1580
1581 Storage = Const;
1582}
1583
1584Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1585 StringRef Name) {
1586 Constant *C =
1587 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1588 auto *GV = dyn_cast<GlobalVariable>(C);
1589 if (GV)
1590 GV->setVisibility(GlobalValue::HiddenVisibility);
1591 return C;
1592}
1593
1594Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1595 StringRef Name, IntegerType *IntTy,
1596 uint32_t Storage) {
1597 if (!shouldExportConstantsAsAbsoluteSymbols())
1598 return ConstantInt::get(IntTy, Storage);
1599
1600 Constant *C = importGlobal(Slot, Args, Name);
1601 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1602 C = ConstantExpr::getPtrToInt(C, IntTy);
1603
1604 // We only need to set metadata if the global is newly created, in which
1605 // case it would not have hidden visibility.
1606 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1607 return C;
1608
1609 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1610 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1611 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1612 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1613 MDNode::get(M.getContext(), {MinC, MaxC}));
1614 };
1615 unsigned AbsWidth = IntTy->getBitWidth();
1616 if (AbsWidth == IntPtrTy->getBitWidth())
1617 SetAbsRange(~0ull, ~0ull); // Full set.
1618 else
1619 SetAbsRange(0, 1ull << AbsWidth);
1620 return C;
1621}
1622
1623void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1624 bool IsOne,
1625 Constant *UniqueMemberAddr) {
1626 for (auto &&Call : CSInfo.CallSites) {
1627 if (!OptimizedCalls.insert(&Call.CB).second)
1628 continue;
1629 IRBuilder<> B(&Call.CB);
1630 Value *Cmp =
1631 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1632 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1633 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1634 NumUniqueRetVal++;
1635 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1636 Cmp);
1637 }
1638 CSInfo.markDevirt();
1639}
1640
1641Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1642 Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
1643 return ConstantExpr::getGetElementPtr(Int8Ty, C,
1644 ConstantInt::get(Int64Ty, M->Offset));
1645}
1646
1647bool DevirtModule::tryUniqueRetValOpt(
1648 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1649 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1650 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1651 // IsOne controls whether we look for a 0 or a 1.
1652 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1653 const TypeMemberInfo *UniqueMember = nullptr;
1654 for (const VirtualCallTarget &Target : TargetsForSlot) {
1655 if (Target.RetVal == (IsOne ? 1 : 0)) {
1656 if (UniqueMember)
1657 return false;
1658 UniqueMember = Target.TM;
1659 }
1660 }
1661
1662 // We should have found a unique member or bailed out by now. We already
1663 // checked for a uniform return value in tryUniformRetValOpt.
1664 assert(UniqueMember);
1665
1666 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1667 if (CSInfo.isExported()) {
1669 Res->Info = IsOne;
1670
1671 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1672 }
1673
1674 // Replace each call with the comparison.
1675 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1676 UniqueMemberAddr);
1677
1678 // Update devirtualization statistics for targets.
1679 if (RemarksEnabled || AreStatisticsEnabled())
1680 for (auto &&Target : TargetsForSlot)
1681 Target.WasDevirt = true;
1682
1683 return true;
1684 };
1685
1686 if (BitWidth == 1) {
1687 if (tryUniqueRetValOptFor(true))
1688 return true;
1689 if (tryUniqueRetValOptFor(false))
1690 return true;
1691 }
1692 return false;
1693}
1694
1695void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1696 Constant *Byte, Constant *Bit) {
1697 for (auto Call : CSInfo.CallSites) {
1698 if (!OptimizedCalls.insert(&Call.CB).second)
1699 continue;
1700 auto *RetType = cast<IntegerType>(Call.CB.getType());
1701 IRBuilder<> B(&Call.CB);
1702 Value *Addr =
1703 B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
1704 if (RetType->getBitWidth() == 1) {
1705 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1706 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1707 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1708 NumVirtConstProp1Bit++;
1709 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1710 OREGetter, IsBitSet);
1711 } else {
1712 Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
1713 Value *Val = B.CreateLoad(RetType, ValAddr);
1714 NumVirtConstProp++;
1715 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1716 OREGetter, Val);
1717 }
1718 }
1719 CSInfo.markDevirt();
1720}
1721
1722bool DevirtModule::tryVirtualConstProp(
1723 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1724 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1725 // TODO: Skip for now if the vtable symbol was an alias to a function,
1726 // need to evaluate whether it would be correct to analyze the aliasee
1727 // function for this optimization.
1728 auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1729 if (!Fn)
1730 return false;
1731 // This only works if the function returns an integer.
1732 auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1733 if (!RetType)
1734 return false;
1735 unsigned BitWidth = RetType->getBitWidth();
1736 if (BitWidth > 64)
1737 return false;
1738
1739 // Make sure that each function is defined, does not access memory, takes at
1740 // least one argument, does not use its first argument (which we assume is
1741 // 'this'), and has the same return type.
1742 //
1743 // Note that we test whether this copy of the function is readnone, rather
1744 // than testing function attributes, which must hold for any copy of the
1745 // function, even a less optimized version substituted at link time. This is
1746 // sound because the virtual constant propagation optimizations effectively
1747 // inline all implementations of the virtual function into each call site,
1748 // rather than using function attributes to perform local optimization.
1749 for (VirtualCallTarget &Target : TargetsForSlot) {
1750 // TODO: Skip for now if the vtable symbol was an alias to a function,
1751 // need to evaluate whether it would be correct to analyze the aliasee
1752 // function for this optimization.
1753 auto Fn = dyn_cast<Function>(Target.Fn);
1754 if (!Fn)
1755 return false;
1756
1757 if (Fn->isDeclaration() ||
1758 !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
1759 .doesNotAccessMemory() ||
1760 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1761 Fn->getReturnType() != RetType)
1762 return false;
1763 }
1764
1765 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1766 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1767 continue;
1768
1769 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1770 if (Res)
1771 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1772
1773 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1774 continue;
1775
1776 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1777 ResByArg, Slot, CSByConstantArg.first))
1778 continue;
1779
1780 // Find an allocation offset in bits in all vtables associated with the
1781 // type.
1782 uint64_t AllocBefore =
1783 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1784 uint64_t AllocAfter =
1785 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1786
1787 // Calculate the total amount of padding needed to store a value at both
1788 // ends of the object.
1789 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1790 for (auto &&Target : TargetsForSlot) {
1791 TotalPaddingBefore += std::max<int64_t>(
1792 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1793 TotalPaddingAfter += std::max<int64_t>(
1794 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1795 }
1796
1797 // If the amount of padding is too large, give up.
1798 // FIXME: do something smarter here.
1799 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1800 continue;
1801
1802 // Calculate the offset to the value as a (possibly negative) byte offset
1803 // and (if applicable) a bit offset, and store the values in the targets.
1804 int64_t OffsetByte;
1805 uint64_t OffsetBit;
1806 if (TotalPaddingBefore <= TotalPaddingAfter)
1807 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1808 OffsetBit);
1809 else
1810 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1811 OffsetBit);
1812
1813 if (RemarksEnabled || AreStatisticsEnabled())
1814 for (auto &&Target : TargetsForSlot)
1815 Target.WasDevirt = true;
1816
1817
1818 if (CSByConstantArg.second.isExported()) {
1820 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1821 ResByArg->Byte);
1822 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1823 ResByArg->Bit);
1824 }
1825
1826 // Rewrite each call to a load from OffsetByte/OffsetBit.
1827 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1828 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1829 applyVirtualConstProp(CSByConstantArg.second,
1830 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1831 }
1832 return true;
1833}
1834
1835void DevirtModule::rebuildGlobal(VTableBits &B) {
1836 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
1837 return;
1838
1839 // Align the before byte array to the global's minimum alignment so that we
1840 // don't break any alignment requirements on the global.
1841 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
1842 B.GV->getAlign(), B.GV->getValueType());
1843 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
1844
1845 // Before was stored in reverse order; flip it now.
1846 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
1847 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
1848
1849 // Build an anonymous global containing the before bytes, followed by the
1850 // original initializer, followed by the after bytes.
1851 auto NewInit = ConstantStruct::getAnon(
1852 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
1853 B.GV->getInitializer(),
1854 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
1855 auto NewGV =
1856 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
1857 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
1858 NewGV->setSection(B.GV->getSection());
1859 NewGV->setComdat(B.GV->getComdat());
1860 NewGV->setAlignment(B.GV->getAlign());
1861
1862 // Copy the original vtable's metadata to the anonymous global, adjusting
1863 // offsets as required.
1864 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
1865
1866 // Build an alias named after the original global, pointing at the second
1867 // element (the original initializer).
1868 auto Alias = GlobalAlias::create(
1869 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
1871 NewInit->getType(), NewGV,
1872 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
1873 ConstantInt::get(Int32Ty, 1)}),
1874 &M);
1875 Alias->setVisibility(B.GV->getVisibility());
1876 Alias->takeName(B.GV);
1877
1878 B.GV->replaceAllUsesWith(Alias);
1879 B.GV->eraseFromParent();
1880}
1881
1882bool DevirtModule::areRemarksEnabled() {
1883 const auto &FL = M.getFunctionList();
1884 for (const Function &Fn : FL) {
1885 if (Fn.empty())
1886 continue;
1887 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
1888 return DI.isEnabled();
1889 }
1890 return false;
1891}
1892
1893void DevirtModule::scanTypeTestUsers(
1894 Function *TypeTestFunc,
1895 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1896 // Find all virtual calls via a virtual table pointer %p under an assumption
1897 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
1898 // points to a member of the type identifier %md. Group calls by (type ID,
1899 // offset) pair (effectively the identity of the virtual function) and store
1900 // to CallSlots.
1901 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1902 auto *CI = dyn_cast<CallInst>(U.getUser());
1903 if (!CI)
1904 continue;
1905
1906 // Search for virtual calls based on %p and add them to DevirtCalls.
1909 auto &DT = LookupDomTree(*CI->getFunction());
1910 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1911
1912 Metadata *TypeId =
1913 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
1914 // If we found any, add them to CallSlots.
1915 if (!Assumes.empty()) {
1916 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
1917 for (DevirtCallSite Call : DevirtCalls)
1918 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
1919 }
1920
1921 auto RemoveTypeTestAssumes = [&]() {
1922 // We no longer need the assumes or the type test.
1923 for (auto *Assume : Assumes)
1924 Assume->eraseFromParent();
1925 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
1926 // may use the vtable argument later.
1927 if (CI->use_empty())
1928 CI->eraseFromParent();
1929 };
1930
1931 // At this point we could remove all type test assume sequences, as they
1932 // were originally inserted for WPD. However, we can keep these in the
1933 // code stream for later analysis (e.g. to help drive more efficient ICP
1934 // sequences). They will eventually be removed by a second LowerTypeTests
1935 // invocation that cleans them up. In order to do this correctly, the first
1936 // LowerTypeTests invocation needs to know that they have "Unknown" type
1937 // test resolution, so that they aren't treated as Unsat and lowered to
1938 // False, which will break any uses on assumes. Below we remove any type
1939 // test assumes that will not be treated as Unknown by LTT.
1940
1941 // The type test assumes will be treated by LTT as Unsat if the type id is
1942 // not used on a global (in which case it has no entry in the TypeIdMap).
1943 if (!TypeIdMap.count(TypeId))
1944 RemoveTypeTestAssumes();
1945
1946 // For ThinLTO importing, we need to remove the type test assumes if this is
1947 // an MDString type id without a corresponding TypeIdSummary. Any
1948 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
1949 // type test assumes can be kept. If the MDString type id is missing a
1950 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
1951 // exporting phase of WPD from analyzing it), then it would be treated as
1952 // Unsat by LTT and we need to remove its type test assumes here. If not
1953 // used on a vcall we don't need them for later optimization use in any
1954 // case.
1955 else if (ImportSummary && isa<MDString>(TypeId)) {
1956 const TypeIdSummary *TidSummary =
1957 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
1958 if (!TidSummary)
1959 RemoveTypeTestAssumes();
1960 else
1961 // If one was created it should not be Unsat, because if we reached here
1962 // the type id was used on a global.
1964 }
1965 }
1966}
1967
1968void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
1969 Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
1970
1971 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
1972 auto *CI = dyn_cast<CallInst>(U.getUser());
1973 if (!CI)
1974 continue;
1975
1976 Value *Ptr = CI->getArgOperand(0);
1977 Value *Offset = CI->getArgOperand(1);
1978 Value *TypeIdValue = CI->getArgOperand(2);
1979 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
1980
1984 bool HasNonCallUses = false;
1985 auto &DT = LookupDomTree(*CI->getFunction());
1986 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
1987 HasNonCallUses, CI, DT);
1988
1989 // Start by generating "pessimistic" code that explicitly loads the function
1990 // pointer from the vtable and performs the type check. If possible, we will
1991 // eliminate the load and the type check later.
1992
1993 // If possible, only generate the load at the point where it is used.
1994 // This helps avoid unnecessary spills.
1995 IRBuilder<> LoadB(
1996 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
1997 Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
1998 Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
1999 Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
2000
2001 for (Instruction *LoadedPtr : LoadedPtrs) {
2002 LoadedPtr->replaceAllUsesWith(LoadedValue);
2003 LoadedPtr->eraseFromParent();
2004 }
2005
2006 // Likewise for the type test.
2007 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2008 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2009
2010 for (Instruction *Pred : Preds) {
2011 Pred->replaceAllUsesWith(TypeTestCall);
2012 Pred->eraseFromParent();
2013 }
2014
2015 // We have already erased any extractvalue instructions that refer to the
2016 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2017 // (although this is unlikely). In that case, explicitly build a pair and
2018 // RAUW it.
2019 if (!CI->use_empty()) {
2020 Value *Pair = PoisonValue::get(CI->getType());
2021 IRBuilder<> B(CI);
2022 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2023 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2024 CI->replaceAllUsesWith(Pair);
2025 }
2026
2027 // The number of unsafe uses is initially the number of uses.
2028 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2029 NumUnsafeUses = DevirtCalls.size();
2030
2031 // If the function pointer has a non-call user, we cannot eliminate the type
2032 // check, as one of those users may eventually call the pointer. Increment
2033 // the unsafe use count to make sure it cannot reach zero.
2034 if (HasNonCallUses)
2035 ++NumUnsafeUses;
2036 for (DevirtCallSite Call : DevirtCalls) {
2037 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2038 &NumUnsafeUses);
2039 }
2040
2041 CI->eraseFromParent();
2042 }
2043}
2044
2045void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2046 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2047 if (!TypeId)
2048 return;
2049 const TypeIdSummary *TidSummary =
2050 ImportSummary->getTypeIdSummary(TypeId->getString());
2051 if (!TidSummary)
2052 return;
2053 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2054 if (ResI == TidSummary->WPDRes.end())
2055 return;
2056 const WholeProgramDevirtResolution &Res = ResI->second;
2057
2059 assert(!Res.SingleImplName.empty());
2060 // The type of the function in the declaration is irrelevant because every
2061 // call site will cast it to the correct type.
2062 Constant *SingleImpl =
2063 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2064 Type::getVoidTy(M.getContext()))
2065 .getCallee());
2066
2067 // This is the import phase so we should not be exporting anything.
2068 bool IsExported = false;
2069 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2070 assert(!IsExported);
2071 }
2072
2073 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2074 auto I = Res.ResByArg.find(CSByConstantArg.first);
2075 if (I == Res.ResByArg.end())
2076 continue;
2077 auto &ResByArg = I->second;
2078 // FIXME: We should figure out what to do about the "function name" argument
2079 // to the apply* functions, as the function names are unavailable during the
2080 // importing phase. For now we just pass the empty string. This does not
2081 // impact correctness because the function names are just used for remarks.
2082 switch (ResByArg.TheKind) {
2084 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2085 break;
2087 Constant *UniqueMemberAddr =
2088 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2089 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2090 UniqueMemberAddr);
2091 break;
2092 }
2094 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2095 Int32Ty, ResByArg.Byte);
2096 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2097 ResByArg.Bit);
2098 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2099 break;
2100 }
2101 default:
2102 break;
2103 }
2104 }
2105
2107 // The type of the function is irrelevant, because it's bitcast at calls
2108 // anyhow.
2109 Constant *JT = cast<Constant>(
2110 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2111 Type::getVoidTy(M.getContext()))
2112 .getCallee());
2113 bool IsExported = false;
2114 applyICallBranchFunnel(SlotInfo, JT, IsExported);
2115 assert(!IsExported);
2116 }
2117}
2118
2119void DevirtModule::removeRedundantTypeTests() {
2120 auto True = ConstantInt::getTrue(M.getContext());
2121 for (auto &&U : NumUnsafeUsesForTypeTest) {
2122 if (U.second == 0) {
2123 U.first->replaceAllUsesWith(True);
2124 U.first->eraseFromParent();
2125 }
2126 }
2127}
2128
2130DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2131 ModuleSummaryIndex *ExportSummary) {
2132 assert((ExportSummary != nullptr) &&
2133 "Caller guarantees ExportSummary is not nullptr");
2134
2135 const auto TheFnGUID = TheFn->getGUID();
2136 const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
2137 // Look up ValueInfo with the GUID in the current linkage.
2138 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2139 // If no entry is found and GUID is different from GUID computed using
2140 // exported name, look up ValueInfo with the exported name unconditionally.
2141 // This is a fallback.
2142 //
2143 // The reason to have a fallback:
2144 // 1. LTO could enable global value internalization via
2145 // `enable-lto-internalization`.
2146 // 2. The GUID in ExportedSummary is computed using exported name.
2147 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2148 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2149 }
2150 return TheFnVI;
2151}
2152
2153bool DevirtModule::mustBeUnreachableFunction(
2154 Function *const F, ModuleSummaryIndex *ExportSummary) {
2155 // First, learn unreachability by analyzing function IR.
2156 if (!F->isDeclaration()) {
2157 // A function must be unreachable if its entry block ends with an
2158 // 'unreachable'.
2159 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2160 }
2161 // Learn unreachability from ExportSummary if ExportSummary is present.
2162 return ExportSummary &&
2164 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2165}
2166
2167bool DevirtModule::run() {
2168 // If only some of the modules were split, we cannot correctly perform
2169 // this transformation. We already checked for the presense of type tests
2170 // with partially split modules during the thin link, and would have emitted
2171 // an error if any were found, so here we can simply return.
2172 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2173 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2174 return false;
2175
2176 Function *TypeTestFunc =
2177 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
2178 Function *TypeCheckedLoadFunc =
2179 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
2180 Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
2181
2182 // Normally if there are no users of the devirtualization intrinsics in the
2183 // module, this pass has nothing to do. But if we are exporting, we also need
2184 // to handle any users that appear only in the function summaries.
2185 if (!ExportSummary &&
2186 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2187 AssumeFunc->use_empty()) &&
2188 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
2189 return false;
2190
2191 // Rebuild type metadata into a map for easy lookup.
2192 std::vector<VTableBits> Bits;
2194 buildTypeIdentifierMap(Bits, TypeIdMap);
2195
2196 if (TypeTestFunc && AssumeFunc)
2197 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2198
2199 if (TypeCheckedLoadFunc)
2200 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2201
2202 if (ImportSummary) {
2203 for (auto &S : CallSlots)
2204 importResolution(S.first, S.second);
2205
2206 removeRedundantTypeTests();
2207
2208 // We have lowered or deleted the type intrinsics, so we will no longer have
2209 // enough information to reason about the liveness of virtual function
2210 // pointers in GlobalDCE.
2211 for (GlobalVariable &GV : M.globals())
2212 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2213
2214 // The rest of the code is only necessary when exporting or during regular
2215 // LTO, so we are done.
2216 return true;
2217 }
2218
2219 if (TypeIdMap.empty())
2220 return true;
2221
2222 // Collect information from summary about which calls to try to devirtualize.
2223 if (ExportSummary) {
2225 for (auto &P : TypeIdMap) {
2226 if (auto *TypeId = dyn_cast<MDString>(P.first))
2227 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2228 TypeId);
2229 }
2230
2231 for (auto &P : *ExportSummary) {
2232 for (auto &S : P.second.SummaryList) {
2233 auto *FS = dyn_cast<FunctionSummary>(S.get());
2234 if (!FS)
2235 continue;
2236 // FIXME: Only add live functions.
2237 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2238 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2239 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2240 }
2241 }
2242 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2243 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2244 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2245 }
2246 }
2247 for (const FunctionSummary::ConstVCall &VC :
2248 FS->type_test_assume_const_vcalls()) {
2249 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2250 CallSlots[{MD, VC.VFunc.Offset}]
2251 .ConstCSInfo[VC.Args]
2252 .addSummaryTypeTestAssumeUser(FS);
2253 }
2254 }
2255 for (const FunctionSummary::ConstVCall &VC :
2256 FS->type_checked_load_const_vcalls()) {
2257 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2258 CallSlots[{MD, VC.VFunc.Offset}]
2259 .ConstCSInfo[VC.Args]
2260 .addSummaryTypeCheckedLoadUser(FS);
2261 }
2262 }
2263 }
2264 }
2265 }
2266
2267 // For each (type, offset) pair:
2268 bool DidVirtualConstProp = false;
2269 std::map<std::string, GlobalValue *> DevirtTargets;
2270 for (auto &S : CallSlots) {
2271 // Search each of the members of the type identifier for the virtual
2272 // function implementation at offset S.first.ByteOffset, and add to
2273 // TargetsForSlot.
2274 std::vector<VirtualCallTarget> TargetsForSlot;
2275 WholeProgramDevirtResolution *Res = nullptr;
2276 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2277 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2278 TypeMemberInfos.size())
2279 // For any type id used on a global's type metadata, create the type id
2280 // summary resolution regardless of whether we can devirtualize, so that
2281 // lower type tests knows the type id is not Unsat. If it was not used on
2282 // a global's type metadata, the TypeIdMap entry set will be empty, and
2283 // we don't want to create an entry (with the default Unknown type
2284 // resolution), which can prevent detection of the Unsat.
2285 Res = &ExportSummary
2286 ->getOrInsertTypeIdSummary(
2287 cast<MDString>(S.first.TypeID)->getString())
2288 .WPDRes[S.first.ByteOffset];
2289 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2290 S.first.ByteOffset, ExportSummary)) {
2291
2292 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2293 DidVirtualConstProp |=
2294 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2295
2296 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2297 }
2298
2299 // Collect functions devirtualized at least for one call site for stats.
2300 if (RemarksEnabled || AreStatisticsEnabled())
2301 for (const auto &T : TargetsForSlot)
2302 if (T.WasDevirt)
2303 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2304 }
2305
2306 // CFI-specific: if we are exporting and any llvm.type.checked.load
2307 // intrinsics were *not* devirtualized, we need to add the resulting
2308 // llvm.type.test intrinsics to the function summaries so that the
2309 // LowerTypeTests pass will export them.
2310 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2311 auto GUID =
2312 GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
2313 for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
2314 FS->addTypeTest(GUID);
2315 for (auto &CCS : S.second.ConstCSInfo)
2316 for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)
2317 FS->addTypeTest(GUID);
2318 }
2319 }
2320
2321 if (RemarksEnabled) {
2322 // Generate remarks for each devirtualized function.
2323 for (const auto &DT : DevirtTargets) {
2324 GlobalValue *GV = DT.second;
2325 auto F = dyn_cast<Function>(GV);
2326 if (!F) {
2327 auto A = dyn_cast<GlobalAlias>(GV);
2328 assert(A && isa<Function>(A->getAliasee()));
2329 F = dyn_cast<Function>(A->getAliasee());
2330 assert(F);
2331 }
2332
2333 using namespace ore;
2334 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2335 << "devirtualized "
2336 << NV("FunctionName", DT.first));
2337 }
2338 }
2339
2340 NumDevirtTargets += DevirtTargets.size();
2341
2342 removeRedundantTypeTests();
2343
2344 // Rebuild each global we touched as part of virtual constant propagation to
2345 // include the before and after bytes.
2346 if (DidVirtualConstProp)
2347 for (VTableBits &B : Bits)
2348 rebuildGlobal(B);
2349
2350 // We have lowered or deleted the type intrinsics, so we will no longer have
2351 // enough information to reason about the liveness of virtual function
2352 // pointers in GlobalDCE.
2353 for (GlobalVariable &GV : M.globals())
2354 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2355
2356 return true;
2357}
2358
2359void DevirtIndex::run() {
2360 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2361 return;
2362
2364 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2365 NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
2366 // Create the type id summary resolution regardlness of whether we can
2367 // devirtualize, so that lower type tests knows the type id is used on
2368 // a global and not Unsat. We do this here rather than in the loop over the
2369 // CallSlots, since that handling will only see type tests that directly
2370 // feed assumes, and we would miss any that aren't currently handled by WPD
2371 // (such as type tests that feed assumes via phis).
2372 ExportSummary.getOrInsertTypeIdSummary(P.first);
2373 }
2374
2375 // Collect information from summary about which calls to try to devirtualize.
2376 for (auto &P : ExportSummary) {
2377 for (auto &S : P.second.SummaryList) {
2378 auto *FS = dyn_cast<FunctionSummary>(S.get());
2379 if (!FS)
2380 continue;
2381 // FIXME: Only add live functions.
2382 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2383 for (StringRef Name : NameByGUID[VF.GUID]) {
2384 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2385 }
2386 }
2387 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2388 for (StringRef Name : NameByGUID[VF.GUID]) {
2389 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2390 }
2391 }
2392 for (const FunctionSummary::ConstVCall &VC :
2393 FS->type_test_assume_const_vcalls()) {
2394 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2395 CallSlots[{Name, VC.VFunc.Offset}]
2396 .ConstCSInfo[VC.Args]
2397 .addSummaryTypeTestAssumeUser(FS);
2398 }
2399 }
2400 for (const FunctionSummary::ConstVCall &VC :
2401 FS->type_checked_load_const_vcalls()) {
2402 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2403 CallSlots[{Name, VC.VFunc.Offset}]
2404 .ConstCSInfo[VC.Args]
2405 .addSummaryTypeCheckedLoadUser(FS);
2406 }
2407 }
2408 }
2409 }
2410
2411 std::set<ValueInfo> DevirtTargets;
2412 // For each (type, offset) pair:
2413 for (auto &S : CallSlots) {
2414 // Search each of the members of the type identifier for the virtual
2415 // function implementation at offset S.first.ByteOffset, and add to
2416 // TargetsForSlot.
2417 std::vector<ValueInfo> TargetsForSlot;
2418 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2419 assert(TidSummary);
2420 // The type id summary would have been created while building the NameByGUID
2421 // map earlier.
2423 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2424 ->WPDRes[S.first.ByteOffset];
2425 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2426 S.first.ByteOffset)) {
2427
2428 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2429 DevirtTargets))
2430 continue;
2431 }
2432 }
2433
2434 // Optionally have the thin link print message for each devirtualized
2435 // function.
2437 for (const auto &DT : DevirtTargets)
2438 errs() << "Devirtualized call to " << DT << "\n";
2439
2440 NumDevirtTargets += DevirtTargets.size();
2441}
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Addr
std::string Name
uint64_t Size
Provides passes for computing function attributes based on interprocedural analyses.
#define DEBUG_TYPE
Hexagon Common GEP
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
static bool mustBeUnreachableFunction(const Function &F)
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Targets
Definition: TextStubV5.cpp:90
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:711
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:317
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:187
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1471
bool arg_empty() const
Definition: InstrTypes.h:1350
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1467
Value * getCalledOperand() const
Definition: InstrTypes.h:1401
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1490
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1266
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1344
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1444
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1486
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:695
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2206
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2192
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, std::optional< unsigned > InRangeIndex=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1232
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2220
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:466
This is an important base class in LLVM.
Definition: Constant.h:41
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:328
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
Helper for check-and-exit error handling.
Definition: Error.h:1355
Tagged union holding either a T or a Error.
Definition: Error.h:470
Function summary information to aid decisions and implementation of importing.
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
bool isVarArg() const
Definition: DerivedTypes.h:123
Type * getReturnType() const
Definition: DerivedTypes.h:124
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:136
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:670
static Expected< GlobPattern > create(StringRef Pat)
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:520
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:404
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:374
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:591
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:587
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
Global variable summary information to aid decisions and implementation of importing.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:61
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1750
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:422
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
bool use_empty() const
Definition: Value.h:344
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1370
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:454
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:979
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
@ FS
Definition: X86.h:208
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ CommaSeparated
Definition: CommandLine.h:164
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:770
uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
@ Offset
Definition: DWP.cpp:406
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map< std::string, GVSummaryMapTy > *ModuleToSummariesForIndex=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
@ Export
Export information to summary.
@ Import
Import information from summary.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1246
bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1186
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:92
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1043
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
Constant * getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, Constant *TopLevelGlobal=nullptr)
Processes a Constant recursively looking into elements of arrays, structs and expressions to find a t...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
An "identifier" for a virtual function.
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)