LLVM 20.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
67#include "llvm/IR/Constants.h"
68#include "llvm/IR/DataLayout.h"
69#include "llvm/IR/DebugLoc.h"
71#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/IRBuilder.h"
76#include "llvm/IR/InstrTypes.h"
77#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/LLVMContext.h"
81#include "llvm/IR/MDBuilder.h"
82#include "llvm/IR/Metadata.h"
83#include "llvm/IR/Module.h"
87#include "llvm/Support/Errc.h"
88#include "llvm/Support/Error.h"
92#include "llvm/Transforms/IPO.h"
97#include <algorithm>
98#include <cstddef>
99#include <map>
100#include <set>
101#include <string>
102
103using namespace llvm;
104using namespace wholeprogramdevirt;
105
106#define DEBUG_TYPE "wholeprogramdevirt"
107
108STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
109STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
110STATISTIC(NumBranchFunnel, "Number of branch funnels");
111STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
112STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
113STATISTIC(NumVirtConstProp1Bit,
114 "Number of 1 bit virtual constant propagations");
115STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
116
118 "wholeprogramdevirt-summary-action",
119 cl::desc("What to do with the summary when running this pass"),
120 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
121 clEnumValN(PassSummaryAction::Import, "import",
122 "Import typeid resolutions from summary and globals"),
123 clEnumValN(PassSummaryAction::Export, "export",
124 "Export typeid resolutions to summary and globals")),
125 cl::Hidden);
126
128 "wholeprogramdevirt-read-summary",
129 cl::desc(
130 "Read summary from given bitcode or YAML file before running pass"),
131 cl::Hidden);
132
134 "wholeprogramdevirt-write-summary",
135 cl::desc("Write summary to given bitcode or YAML file after running pass. "
136 "Output file format is deduced from extension: *.bc means writing "
137 "bitcode, otherwise YAML"),
138 cl::Hidden);
139
141 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
142 cl::init(10),
143 cl::desc("Maximum number of call targets per "
144 "call site to enable branch funnels"));
145
146static cl::opt<bool>
147 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
148 cl::desc("Print index-based devirtualization messages"));
149
150/// Provide a way to force enable whole program visibility in tests.
151/// This is needed to support legacy tests that don't contain
152/// !vcall_visibility metadata (the mere presense of type tests
153/// previously implied hidden visibility).
154static cl::opt<bool>
155 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
156 cl::desc("Enable whole program visibility"));
157
158/// Provide a way to force disable whole program for debugging or workarounds,
159/// when enabled via the linker.
161 "disable-whole-program-visibility", cl::Hidden,
162 cl::desc("Disable whole program visibility (overrides enabling options)"));
163
164/// Provide way to prevent certain function from being devirtualized
166 SkipFunctionNames("wholeprogramdevirt-skip",
167 cl::desc("Prevent function(s) from being devirtualized"),
169
170/// With Clang, a pure virtual class's deleting destructor is emitted as a
171/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
172/// context of whole program devirtualization, the deleting destructor of a pure
173/// virtual class won't be invoked by the source code so safe to skip as a
174/// devirtualize target.
175///
176/// However, not all unreachable functions are safe to skip. In some cases, the
177/// program intends to run such functions and terminate, for instance, a unit
178/// test may run a death test. A non-test program might (or allowed to) invoke
179/// such functions to report failures (whether/when it's a good practice or not
180/// is a different topic).
181///
182/// This option is enabled to keep an unreachable function as a possible
183/// devirtualize target to conservatively keep the program behavior.
184///
185/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
186/// in Clang's codegen for more devirtualization in LLVM.
188 "wholeprogramdevirt-keep-unreachable-function",
189 cl::desc("Regard unreachable functions as possible devirtualize targets."),
190 cl::Hidden, cl::init(true));
191
192/// If explicitly specified, the devirt module pass will stop transformation
193/// once the total number of devirtualizations reach the cutoff value. Setting
194/// this option to 0 explicitly will do 0 devirtualization.
196 "wholeprogramdevirt-cutoff",
197 cl::desc("Max number of devirtualizations for devirt module pass"),
198 cl::init(0));
199
200/// Mechanism to add runtime checking of devirtualization decisions, optionally
201/// trapping or falling back to indirect call on any that are not correct.
202/// Trapping mode is useful for debugging undefined behavior leading to failures
203/// with WPD. Fallback mode is useful for ensuring safety when whole program
204/// visibility may be compromised.
207 "wholeprogramdevirt-check", cl::Hidden,
208 cl::desc("Type of checking for incorrect devirtualizations"),
209 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
210 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
212 "Fallback to indirect when incorrect")));
213
214namespace {
215struct PatternList {
216 std::vector<GlobPattern> Patterns;
217 template <class T> void init(const T &StringList) {
218 for (const auto &S : StringList)
220 Patterns.push_back(std::move(*Pat));
221 }
222 bool match(StringRef S) {
223 for (const GlobPattern &P : Patterns)
224 if (P.match(S))
225 return true;
226 return false;
227 }
228};
229} // namespace
230
231// Find the minimum offset that we may store a value of size Size bits at. If
232// IsAfter is set, look for an offset before the object, otherwise look for an
233// offset after the object.
236 bool IsAfter, uint64_t Size) {
237 // Find a minimum offset taking into account only vtable sizes.
238 uint64_t MinByte = 0;
239 for (const VirtualCallTarget &Target : Targets) {
240 if (IsAfter)
241 MinByte = std::max(MinByte, Target.minAfterBytes());
242 else
243 MinByte = std::max(MinByte, Target.minBeforeBytes());
244 }
245
246 // Build a vector of arrays of bytes covering, for each target, a slice of the
247 // used region (see AccumBitVector::BytesUsed in
248 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
249 // this aligns the used regions to start at MinByte.
250 //
251 // In this example, A, B and C are vtables, # is a byte already allocated for
252 // a virtual function pointer, AAAA... (etc.) are the used regions for the
253 // vtables and Offset(X) is the value computed for the Offset variable below
254 // for X.
255 //
256 // Offset(A)
257 // | |
258 // |MinByte
259 // A: ################AAAAAAAA|AAAAAAAA
260 // B: ########BBBBBBBBBBBBBBBB|BBBB
261 // C: ########################|CCCCCCCCCCCCCCCC
262 // | Offset(B) |
263 //
264 // This code produces the slices of A, B and C that appear after the divider
265 // at MinByte.
266 std::vector<ArrayRef<uint8_t>> Used;
267 for (const VirtualCallTarget &Target : Targets) {
268 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
269 : Target.TM->Bits->Before.BytesUsed;
270 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
271 : MinByte - Target.minBeforeBytes();
272
273 // Disregard used regions that are smaller than Offset. These are
274 // effectively all-free regions that do not need to be checked.
275 if (VTUsed.size() > Offset)
276 Used.push_back(VTUsed.slice(Offset));
277 }
278
279 if (Size == 1) {
280 // Find a free bit in each member of Used.
281 for (unsigned I = 0;; ++I) {
282 uint8_t BitsUsed = 0;
283 for (auto &&B : Used)
284 if (I < B.size())
285 BitsUsed |= B[I];
286 if (BitsUsed != 0xff)
287 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
288 }
289 } else {
290 // Find a free (Size/8) byte region in each member of Used.
291 // FIXME: see if alignment helps.
292 for (unsigned I = 0;; ++I) {
293 for (auto &&B : Used) {
294 unsigned Byte = 0;
295 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
296 if (B[I + Byte])
297 goto NextI;
298 ++Byte;
299 }
300 }
301 return (MinByte + I) * 8;
302 NextI:;
303 }
304 }
305}
306
308 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
309 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
310 if (BitWidth == 1)
311 OffsetByte = -(AllocBefore / 8 + 1);
312 else
313 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
314 OffsetBit = AllocBefore % 8;
315
316 for (VirtualCallTarget &Target : Targets) {
317 if (BitWidth == 1)
318 Target.setBeforeBit(AllocBefore);
319 else
320 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
321 }
322}
323
326 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
327 if (BitWidth == 1)
328 OffsetByte = AllocAfter / 8;
329 else
330 OffsetByte = (AllocAfter + 7) / 8;
331 OffsetBit = AllocAfter % 8;
332
333 for (VirtualCallTarget &Target : Targets) {
334 if (BitWidth == 1)
335 Target.setAfterBit(AllocAfter);
336 else
337 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
338 }
339}
340
342 : Fn(Fn), TM(TM),
343 IsBigEndian(Fn->getDataLayout().isBigEndian()),
344 WasDevirt(false) {}
345
346namespace {
347
348// Tracks the number of devirted calls in the IR transformation.
349static unsigned NumDevirtCalls = 0;
350
351// A slot in a set of virtual tables. The TypeID identifies the set of virtual
352// tables, and the ByteOffset is the offset in bytes from the address point to
353// the virtual function pointer.
354struct VTableSlot {
356 uint64_t ByteOffset;
357};
358
359} // end anonymous namespace
360
361namespace llvm {
362
363template <> struct DenseMapInfo<VTableSlot> {
364 static VTableSlot getEmptyKey() {
367 }
368 static VTableSlot getTombstoneKey() {
371 }
372 static unsigned getHashValue(const VTableSlot &I) {
375 }
376 static bool isEqual(const VTableSlot &LHS,
377 const VTableSlot &RHS) {
378 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
379 }
380};
381
382template <> struct DenseMapInfo<VTableSlotSummary> {
386 }
390 }
391 static unsigned getHashValue(const VTableSlotSummary &I) {
394 }
395 static bool isEqual(const VTableSlotSummary &LHS,
396 const VTableSlotSummary &RHS) {
397 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
398 }
399};
400
401} // end namespace llvm
402
403// Returns true if the function must be unreachable based on ValueInfo.
404//
405// In particular, identifies a function as unreachable in the following
406// conditions
407// 1) All summaries are live.
408// 2) All function summaries indicate it's unreachable
409// 3) There is no non-function with the same GUID (which is rare)
412 return false;
413
414 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
415 // Returns false if ValueInfo is absent, or the summary list is empty
416 // (e.g., function declarations).
417 return false;
418 }
419
420 for (const auto &Summary : TheFnVI.getSummaryList()) {
421 // Conservatively returns false if any non-live functions are seen.
422 // In general either all summaries should be live or all should be dead.
423 if (!Summary->isLive())
424 return false;
425 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
426 if (!FS->fflags().MustBeUnreachable)
427 return false;
428 }
429 // Be conservative if a non-function has the same GUID (which is rare).
430 else
431 return false;
432 }
433 // All function summaries are live and all of them agree that the function is
434 // unreachble.
435 return true;
436}
437
438namespace {
439// A virtual call site. VTable is the loaded virtual table pointer, and CS is
440// the indirect virtual call.
441struct VirtualCallSite {
442 Value *VTable = nullptr;
443 CallBase &CB;
444
445 // If non-null, this field points to the associated unsafe use count stored in
446 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
447 // of that field for details.
448 unsigned *NumUnsafeUses = nullptr;
449
450 void
451 emitRemark(const StringRef OptName, const StringRef TargetName,
453 Function *F = CB.getCaller();
454 DebugLoc DLoc = CB.getDebugLoc();
455 BasicBlock *Block = CB.getParent();
456
457 using namespace ore;
458 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
459 << NV("Optimization", OptName)
460 << ": devirtualized a call to "
461 << NV("FunctionName", TargetName));
462 }
463
464 void replaceAndErase(
465 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
467 Value *New) {
468 if (RemarksEnabled)
469 emitRemark(OptName, TargetName, OREGetter);
470 CB.replaceAllUsesWith(New);
471 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
472 BranchInst::Create(II->getNormalDest(), CB.getIterator());
473 II->getUnwindDest()->removePredecessor(II->getParent());
474 }
475 CB.eraseFromParent();
476 // This use is no longer unsafe.
477 if (NumUnsafeUses)
478 --*NumUnsafeUses;
479 }
480};
481
482// Call site information collected for a specific VTableSlot and possibly a list
483// of constant integer arguments. The grouping by arguments is handled by the
484// VTableSlotInfo class.
485struct CallSiteInfo {
486 /// The set of call sites for this slot. Used during regular LTO and the
487 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
488 /// call sites that appear in the merged module itself); in each of these
489 /// cases we are directly operating on the call sites at the IR level.
490 std::vector<VirtualCallSite> CallSites;
491
492 /// Whether all call sites represented by this CallSiteInfo, including those
493 /// in summaries, have been devirtualized. This starts off as true because a
494 /// default constructed CallSiteInfo represents no call sites.
495 bool AllCallSitesDevirted = true;
496
497 // These fields are used during the export phase of ThinLTO and reflect
498 // information collected from function summaries.
499
500 /// Whether any function summary contains an llvm.assume(llvm.type.test) for
501 /// this slot.
502 bool SummaryHasTypeTestAssumeUsers = false;
503
504 /// CFI-specific: a vector containing the list of function summaries that use
505 /// the llvm.type.checked.load intrinsic and therefore will require
506 /// resolutions for llvm.type.test in order to implement CFI checks if
507 /// devirtualization was unsuccessful. If devirtualization was successful, the
508 /// pass will clear this vector by calling markDevirt(). If at the end of the
509 /// pass the vector is non-empty, we will need to add a use of llvm.type.test
510 /// to each of the function summaries in the vector.
511 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
512 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
513
514 bool isExported() const {
515 return SummaryHasTypeTestAssumeUsers ||
516 !SummaryTypeCheckedLoadUsers.empty();
517 }
518
519 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
520 SummaryTypeCheckedLoadUsers.push_back(FS);
521 AllCallSitesDevirted = false;
522 }
523
524 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
525 SummaryTypeTestAssumeUsers.push_back(FS);
526 SummaryHasTypeTestAssumeUsers = true;
527 AllCallSitesDevirted = false;
528 }
529
530 void markDevirt() {
531 AllCallSitesDevirted = true;
532
533 // As explained in the comment for SummaryTypeCheckedLoadUsers.
534 SummaryTypeCheckedLoadUsers.clear();
535 }
536};
537
538// Call site information collected for a specific VTableSlot.
539struct VTableSlotInfo {
540 // The set of call sites which do not have all constant integer arguments
541 // (excluding "this").
542 CallSiteInfo CSInfo;
543
544 // The set of call sites with all constant integer arguments (excluding
545 // "this"), grouped by argument list.
546 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
547
548 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
549
550private:
551 CallSiteInfo &findCallSiteInfo(CallBase &CB);
552};
553
554CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
555 std::vector<uint64_t> Args;
556 auto *CBType = dyn_cast<IntegerType>(CB.getType());
557 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
558 return CSInfo;
559 for (auto &&Arg : drop_begin(CB.args())) {
560 auto *CI = dyn_cast<ConstantInt>(Arg);
561 if (!CI || CI->getBitWidth() > 64)
562 return CSInfo;
563 Args.push_back(CI->getZExtValue());
564 }
565 return ConstCSInfo[Args];
566}
567
568void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
569 unsigned *NumUnsafeUses) {
570 auto &CSI = findCallSiteInfo(CB);
571 CSI.AllCallSitesDevirted = false;
572 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
573}
574
575struct DevirtModule {
576 Module &M;
577 function_ref<AAResults &(Function &)> AARGetter;
578 function_ref<DominatorTree &(Function &)> LookupDomTree;
579
580 ModuleSummaryIndex *ExportSummary;
581 const ModuleSummaryIndex *ImportSummary;
582
583 IntegerType *Int8Ty;
584 PointerType *Int8PtrTy;
585 IntegerType *Int32Ty;
586 IntegerType *Int64Ty;
587 IntegerType *IntPtrTy;
588 /// Sizeless array type, used for imported vtables. This provides a signal
589 /// to analyzers that these imports may alias, as they do for example
590 /// when multiple unique return values occur in the same vtable.
591 ArrayType *Int8Arr0Ty;
592
593 bool RemarksEnabled;
595
597
598 // Calls that have already been optimized. We may add a call to multiple
599 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
600 // optimize a call more than once.
601 SmallPtrSet<CallBase *, 8> OptimizedCalls;
602
603 // Store calls that had their ptrauth bundle removed. They are to be deleted
604 // at the end of the optimization.
605 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
606
607 // This map keeps track of the number of "unsafe" uses of a loaded function
608 // pointer. The key is the associated llvm.type.test intrinsic call generated
609 // by this pass. An unsafe use is one that calls the loaded function pointer
610 // directly. Every time we eliminate an unsafe use (for example, by
611 // devirtualizing it or by applying virtual constant propagation), we
612 // decrement the value stored in this map. If a value reaches zero, we can
613 // eliminate the type check by RAUWing the associated llvm.type.test call with
614 // true.
615 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
616 PatternList FunctionsToSkip;
617
618 DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
620 function_ref<DominatorTree &(Function &)> LookupDomTree,
621 ModuleSummaryIndex *ExportSummary,
622 const ModuleSummaryIndex *ImportSummary)
623 : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
624 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
625 Int8Ty(Type::getInt8Ty(M.getContext())),
626 Int8PtrTy(PointerType::getUnqual(M.getContext())),
627 Int32Ty(Type::getInt32Ty(M.getContext())),
628 Int64Ty(Type::getInt64Ty(M.getContext())),
629 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
630 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
631 RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
632 assert(!(ExportSummary && ImportSummary));
633 FunctionsToSkip.init(SkipFunctionNames);
634 }
635
636 bool areRemarksEnabled();
637
638 void
639 scanTypeTestUsers(Function *TypeTestFunc,
640 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
641 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
642
643 void buildTypeIdentifierMap(
644 std::vector<VTableBits> &Bits,
645 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
646
647 bool
648 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
649 const std::set<TypeMemberInfo> &TypeMemberInfos,
650 uint64_t ByteOffset,
651 ModuleSummaryIndex *ExportSummary);
652
653 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
654 bool &IsExported);
655 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
657 VTableSlotInfo &SlotInfo,
659
660 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
661 bool &IsExported);
662 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
663 VTableSlotInfo &SlotInfo,
664 WholeProgramDevirtResolution *Res, VTableSlot Slot);
665
666 bool tryEvaluateFunctionsWithArgs(
668 ArrayRef<uint64_t> Args);
669
670 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
671 uint64_t TheRetVal);
672 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
673 CallSiteInfo &CSInfo,
675
676 // Returns the global symbol name that is used to export information about the
677 // given vtable slot and list of arguments.
678 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
680
681 bool shouldExportConstantsAsAbsoluteSymbols();
682
683 // This function is called during the export phase to create a symbol
684 // definition containing information about the given vtable slot and list of
685 // arguments.
686 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
687 Constant *C);
688 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
689 uint32_t Const, uint32_t &Storage);
690
691 // This function is called during the import phase to create a reference to
692 // the symbol definition created during the export phase.
693 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
695 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
696 StringRef Name, IntegerType *IntTy,
697 uint32_t Storage);
698
699 Constant *getMemberAddr(const TypeMemberInfo *M);
700
701 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
702 Constant *UniqueMemberAddr);
703 bool tryUniqueRetValOpt(unsigned BitWidth,
705 CallSiteInfo &CSInfo,
707 VTableSlot Slot, ArrayRef<uint64_t> Args);
708
709 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
710 Constant *Byte, Constant *Bit);
711 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
712 VTableSlotInfo &SlotInfo,
713 WholeProgramDevirtResolution *Res, VTableSlot Slot);
714
715 void rebuildGlobal(VTableBits &B);
716
717 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
718 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
719
720 // If we were able to eliminate all unsafe uses for a type checked load,
721 // eliminate the associated type tests by replacing them with true.
722 void removeRedundantTypeTests();
723
724 bool run();
725
726 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
727 //
728 // Caller guarantees that `ExportSummary` is not nullptr.
729 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
730 ModuleSummaryIndex *ExportSummary);
731
732 // Returns true if the function definition must be unreachable.
733 //
734 // Note if this helper function returns true, `F` is guaranteed
735 // to be unreachable; if it returns false, `F` might still
736 // be unreachable but not covered by this helper function.
737 //
738 // Implementation-wise, if function definition is present, IR is analyzed; if
739 // not, look up function flags from ExportSummary as a fallback.
740 static bool mustBeUnreachableFunction(Function *const F,
741 ModuleSummaryIndex *ExportSummary);
742
743 // Lower the module using the action and summary passed as command line
744 // arguments. For testing purposes only.
745 static bool
746 runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
748 function_ref<DominatorTree &(Function &)> LookupDomTree);
749};
750
751struct DevirtIndex {
752 ModuleSummaryIndex &ExportSummary;
753 // The set in which to record GUIDs exported from their module by
754 // devirtualization, used by client to ensure they are not internalized.
755 std::set<GlobalValue::GUID> &ExportedGUIDs;
756 // A map in which to record the information necessary to locate the WPD
757 // resolution for local targets in case they are exported by cross module
758 // importing.
759 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
760
762
763 PatternList FunctionsToSkip;
764
765 DevirtIndex(
766 ModuleSummaryIndex &ExportSummary,
767 std::set<GlobalValue::GUID> &ExportedGUIDs,
768 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
769 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
770 LocalWPDTargetsMap(LocalWPDTargetsMap) {
771 FunctionsToSkip.init(SkipFunctionNames);
772 }
773
774 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
775 const TypeIdCompatibleVtableInfo TIdInfo,
776 uint64_t ByteOffset);
777
778 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
779 VTableSlotSummary &SlotSummary,
780 VTableSlotInfo &SlotInfo,
782 std::set<ValueInfo> &DevirtTargets);
783
784 void run();
785};
786} // end anonymous namespace
787
790 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
791 auto AARGetter = [&](Function &F) -> AAResults & {
792 return FAM.getResult<AAManager>(F);
793 };
794 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
796 };
797 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
799 };
800 if (UseCommandLine) {
801 if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
802 return PreservedAnalyses::all();
804 }
805 if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
806 ImportSummary)
807 .run())
808 return PreservedAnalyses::all();
810}
811
812// Enable whole program visibility if enabled by client (e.g. linker) or
813// internal option, and not force disabled.
814bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
815 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
817}
818
819static bool
821 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
822 // TypeID for member function pointer type is an internal construct
823 // and won't exist in IsVisibleToRegularObj. The full TypeID
824 // will be present and participate in invalidation.
825 if (TypeID.ends_with(".virtual"))
826 return false;
827
828 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
829 // non-externally visible types which cannot interact with
830 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
831 if (!TypeID.consume_front("_ZTS"))
832 return false;
833
834 // TypeID is keyed off the type name symbol (_ZTS). However, the native
835 // object may not contain this symbol if it does not contain a key
836 // function for the base type and thus only contains a reference to the
837 // type info (_ZTI). To catch this case we query using the type info
838 // symbol corresponding to the TypeID.
839 std::string typeInfo = ("_ZTI" + TypeID).str();
840 return IsVisibleToRegularObj(typeInfo);
841}
842
843static bool
845 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
847 GV.getMetadata(LLVMContext::MD_type, Types);
848
849 for (auto Type : Types)
850 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
851 return typeIDVisibleToRegularObj(TypeID->getString(),
852 IsVisibleToRegularObj);
853
854 return false;
855}
856
857/// If whole program visibility asserted, then upgrade all public vcall
858/// visibility metadata on vtable definitions to linkage unit visibility in
859/// Module IR (for regular or hybrid LTO).
861 Module &M, bool WholeProgramVisibilityEnabledInLTO,
862 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
863 bool ValidateAllVtablesHaveTypeInfos,
864 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
865 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
866 return;
867 for (GlobalVariable &GV : M.globals()) {
868 // Add linkage unit visibility to any variable with type metadata, which are
869 // the vtable definitions. We won't have an existing vcall_visibility
870 // metadata on vtable definitions with public visibility.
871 if (GV.hasMetadata(LLVMContext::MD_type) &&
873 // Don't upgrade the visibility for symbols exported to the dynamic
874 // linker, as we have no information on their eventual use.
875 !DynamicExportSymbols.count(GV.getGUID()) &&
876 // With validation enabled, we want to exclude symbols visible to
877 // regular objects. Local symbols will be in this group due to the
878 // current implementation but those with VCallVisibilityTranslationUnit
879 // will have already been marked in clang so are unaffected.
880 !(ValidateAllVtablesHaveTypeInfos &&
881 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
883 }
884}
885
887 bool WholeProgramVisibilityEnabledInLTO) {
888 Function *PublicTypeTestFunc =
889 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
890 if (!PublicTypeTestFunc)
891 return;
892 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
893 Function *TypeTestFunc =
894 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
895 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
896 auto *CI = cast<CallInst>(U.getUser());
897 auto *NewCI = CallInst::Create(
898 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
899 CI->getIterator());
900 CI->replaceAllUsesWith(NewCI);
901 CI->eraseFromParent();
902 }
903 } else {
904 auto *True = ConstantInt::getTrue(M.getContext());
905 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
906 auto *CI = cast<CallInst>(U.getUser());
907 CI->replaceAllUsesWith(True);
908 CI->eraseFromParent();
909 }
910 }
911}
912
913/// Based on typeID string, get all associated vtable GUIDS that are
914/// visible to regular objects.
916 ModuleSummaryIndex &Index,
917 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
918 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
919 for (const auto &typeID : Index.typeIdCompatibleVtableMap()) {
920 if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj))
921 for (const TypeIdOffsetVtableInfo &P : typeID.second)
922 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
923 }
924}
925
926/// If whole program visibility asserted, then upgrade all public vcall
927/// visibility metadata on vtable definition summaries to linkage unit
928/// visibility in Module summary index (for ThinLTO).
930 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
931 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
932 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
933 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
934 return;
935 for (auto &P : Index) {
936 // Don't upgrade the visibility for symbols exported to the dynamic
937 // linker, as we have no information on their eventual use.
938 if (DynamicExportSymbols.count(P.first))
939 continue;
940 for (auto &S : P.second.SummaryList) {
941 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
942 if (!GVar ||
943 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
944 continue;
945 // With validation enabled, we want to exclude symbols visible to regular
946 // objects. Local symbols will be in this group due to the current
947 // implementation but those with VCallVisibilityTranslationUnit will have
948 // already been marked in clang so are unaffected.
949 if (VisibleToRegularObjSymbols.count(P.first))
950 continue;
951 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
952 }
953 }
954}
955
957 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
958 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
959 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
960}
961
963 ModuleSummaryIndex &Summary,
964 function_ref<bool(StringRef, ValueInfo)> isExported,
965 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
966 for (auto &T : LocalWPDTargetsMap) {
967 auto &VI = T.first;
968 // This was enforced earlier during trySingleImplDevirt.
969 assert(VI.getSummaryList().size() == 1 &&
970 "Devirt of local target has more than one copy");
971 auto &S = VI.getSummaryList()[0];
972 if (!isExported(S->modulePath(), VI))
973 continue;
974
975 // It's been exported by a cross module import.
976 for (auto &SlotSummary : T.second) {
977 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
978 assert(TIdSum);
979 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
980 assert(WPDRes != TIdSum->WPDRes.end());
981 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
982 WPDRes->second.SingleImplName,
983 Summary.getModuleHash(S->modulePath()));
984 }
985 }
986}
987
989 // Check that summary index contains regular LTO module when performing
990 // export to prevent occasional use of index from pure ThinLTO compilation
991 // (-fno-split-lto-module). This kind of summary index is passed to
992 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
993 const auto &ModPaths = Summary->modulePaths();
996 return createStringError(
998 "combined summary should contain Regular LTO module");
999 return ErrorSuccess();
1000}
1001
1002bool DevirtModule::runForTesting(
1003 Module &M, function_ref<AAResults &(Function &)> AARGetter,
1005 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1006 std::unique_ptr<ModuleSummaryIndex> Summary =
1007 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1008
1009 // Handle the command-line summary arguments. This code is for testing
1010 // purposes only, so we handle errors directly.
1011 if (!ClReadSummary.empty()) {
1012 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1013 ": ");
1014 auto ReadSummaryFile =
1016 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1017 getModuleSummaryIndex(*ReadSummaryFile)) {
1018 Summary = std::move(*SummaryOrErr);
1019 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1020 } else {
1021 // Try YAML if we've failed with bitcode.
1022 consumeError(SummaryOrErr.takeError());
1023 yaml::Input In(ReadSummaryFile->getBuffer());
1024 In >> *Summary;
1025 ExitOnErr(errorCodeToError(In.error()));
1026 }
1027 }
1028
1029 bool Changed =
1030 DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
1032 : nullptr,
1034 : nullptr)
1035 .run();
1036
1037 if (!ClWriteSummary.empty()) {
1038 ExitOnError ExitOnErr(
1039 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1040 std::error_code EC;
1041 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1043 ExitOnErr(errorCodeToError(EC));
1044 writeIndexToFile(*Summary, OS);
1045 } else {
1047 ExitOnErr(errorCodeToError(EC));
1048 yaml::Output Out(OS);
1049 Out << *Summary;
1050 }
1051 }
1052
1053 return Changed;
1054}
1055
1056void DevirtModule::buildTypeIdentifierMap(
1057 std::vector<VTableBits> &Bits,
1058 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1060 Bits.reserve(M.global_size());
1062 for (GlobalVariable &GV : M.globals()) {
1063 Types.clear();
1064 GV.getMetadata(LLVMContext::MD_type, Types);
1065 if (GV.isDeclaration() || Types.empty())
1066 continue;
1067
1068 VTableBits *&BitsPtr = GVToBits[&GV];
1069 if (!BitsPtr) {
1070 Bits.emplace_back();
1071 Bits.back().GV = &GV;
1072 Bits.back().ObjectSize =
1073 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1074 BitsPtr = &Bits.back();
1075 }
1076
1077 for (MDNode *Type : Types) {
1078 auto TypeID = Type->getOperand(1).get();
1079
1081 cast<ConstantInt>(
1082 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1083 ->getZExtValue();
1084
1085 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1086 }
1087 }
1088}
1089
1090bool DevirtModule::tryFindVirtualCallTargets(
1091 std::vector<VirtualCallTarget> &TargetsForSlot,
1092 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1093 ModuleSummaryIndex *ExportSummary) {
1094 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1095 if (!TM.Bits->GV->isConstant())
1096 return false;
1097
1098 // We cannot perform whole program devirtualization analysis on a vtable
1099 // with public LTO visibility.
1100 if (TM.Bits->GV->getVCallVisibility() ==
1102 return false;
1103
1104 Function *Fn = nullptr;
1105 Constant *C = nullptr;
1106 std::tie(Fn, C) =
1107 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1108
1109 if (!Fn)
1110 return false;
1111
1112 if (FunctionsToSkip.match(Fn->getName()))
1113 return false;
1114
1115 // We can disregard __cxa_pure_virtual as a possible call target, as
1116 // calls to pure virtuals are UB.
1117 if (Fn->getName() == "__cxa_pure_virtual")
1118 continue;
1119
1120 // We can disregard unreachable functions as possible call targets, as
1121 // unreachable functions shouldn't be called.
1122 if (mustBeUnreachableFunction(Fn, ExportSummary))
1123 continue;
1124
1125 // Save the symbol used in the vtable to use as the devirtualization
1126 // target.
1127 auto GV = dyn_cast<GlobalValue>(C);
1128 assert(GV);
1129 TargetsForSlot.push_back({GV, &TM});
1130 }
1131
1132 // Give up if we couldn't find any targets.
1133 return !TargetsForSlot.empty();
1134}
1135
1136bool DevirtIndex::tryFindVirtualCallTargets(
1137 std::vector<ValueInfo> &TargetsForSlot,
1138 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1139 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1140 // Find a representative copy of the vtable initializer.
1141 // We can have multiple available_externally, linkonce_odr and weak_odr
1142 // vtable initializers. We can also have multiple external vtable
1143 // initializers in the case of comdats, which we cannot check here.
1144 // The linker should give an error in this case.
1145 //
1146 // Also, handle the case of same-named local Vtables with the same path
1147 // and therefore the same GUID. This can happen if there isn't enough
1148 // distinguishing path when compiling the source file. In that case we
1149 // conservatively return false early.
1150 const GlobalVarSummary *VS = nullptr;
1151 bool LocalFound = false;
1152 for (const auto &S : P.VTableVI.getSummaryList()) {
1153 if (GlobalValue::isLocalLinkage(S->linkage())) {
1154 if (LocalFound)
1155 return false;
1156 LocalFound = true;
1157 }
1158 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1159 if (!CurVS->vTableFuncs().empty() ||
1160 // Previously clang did not attach the necessary type metadata to
1161 // available_externally vtables, in which case there would not
1162 // be any vtable functions listed in the summary and we need
1163 // to treat this case conservatively (in case the bitcode is old).
1164 // However, we will also not have any vtable functions in the
1165 // case of a pure virtual base class. In that case we do want
1166 // to set VS to avoid treating it conservatively.
1168 VS = CurVS;
1169 // We cannot perform whole program devirtualization analysis on a vtable
1170 // with public LTO visibility.
1171 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1172 return false;
1173 }
1174 }
1175 // There will be no VS if all copies are available_externally having no
1176 // type metadata. In that case we can't safely perform WPD.
1177 if (!VS)
1178 return false;
1179 if (!VS->isLive())
1180 continue;
1181 for (auto VTP : VS->vTableFuncs()) {
1182 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1183 continue;
1184
1185 if (mustBeUnreachableFunction(VTP.FuncVI))
1186 continue;
1187
1188 TargetsForSlot.push_back(VTP.FuncVI);
1189 }
1190 }
1191
1192 // Give up if we couldn't find any targets.
1193 return !TargetsForSlot.empty();
1194}
1195
1196void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1197 Constant *TheFn, bool &IsExported) {
1198 // Don't devirtualize function if we're told to skip it
1199 // in -wholeprogramdevirt-skip.
1200 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1201 return;
1202 auto Apply = [&](CallSiteInfo &CSInfo) {
1203 for (auto &&VCallSite : CSInfo.CallSites) {
1204 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1205 continue;
1206
1207 // Stop when the number of devirted calls reaches the cutoff.
1208 if (WholeProgramDevirtCutoff.getNumOccurrences() > 0 &&
1209 NumDevirtCalls >= WholeProgramDevirtCutoff)
1210 return;
1211
1212 if (RemarksEnabled)
1213 VCallSite.emitRemark("single-impl",
1214 TheFn->stripPointerCasts()->getName(), OREGetter);
1215 NumSingleImpl++;
1216 NumDevirtCalls++;
1217 auto &CB = VCallSite.CB;
1218 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1219 IRBuilder<> Builder(&CB);
1220 Value *Callee =
1221 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1222
1223 // If trap checking is enabled, add support to compare the virtual
1224 // function pointer to the devirtualized target. In case of a mismatch,
1225 // perform a debug trap.
1226 if (DevirtCheckMode == WPDCheckMode::Trap) {
1227 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1228 Instruction *ThenTerm =
1229 SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
1230 Builder.SetInsertPoint(ThenTerm);
1231 Function *TrapFn =
1232 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1233 auto *CallTrap = Builder.CreateCall(TrapFn);
1234 CallTrap->setDebugLoc(CB.getDebugLoc());
1235 }
1236
1237 // If fallback checking is enabled, add support to compare the virtual
1238 // function pointer to the devirtualized target. In case of a mismatch,
1239 // fall back to indirect call.
1240 if (DevirtCheckMode == WPDCheckMode::Fallback) {
1241 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1242 // Version the indirect call site. If the called value is equal to the
1243 // given callee, 'NewInst' will be executed, otherwise the original call
1244 // site will be executed.
1245 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1246 NewInst.setCalledOperand(Callee);
1247 // Since the new call site is direct, we must clear metadata that
1248 // is only appropriate for indirect calls. This includes !prof and
1249 // !callees metadata.
1250 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1251 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1252 // Additionally, we should remove them from the fallback indirect call,
1253 // so that we don't attempt to perform indirect call promotion later.
1254 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1255 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1256 }
1257
1258 // In either trapping or non-checking mode, devirtualize original call.
1259 else {
1260 // Devirtualize unconditionally.
1261 CB.setCalledOperand(Callee);
1262 // Since the call site is now direct, we must clear metadata that
1263 // is only appropriate for indirect calls. This includes !prof and
1264 // !callees metadata.
1265 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1266 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1267 if (CB.getCalledOperand() &&
1269 auto *NewCS = CallBase::removeOperandBundle(
1271 CB.replaceAllUsesWith(NewCS);
1272 // Schedule for deletion at the end of pass run.
1273 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1274 }
1275 }
1276
1277 // This use is no longer unsafe.
1278 if (VCallSite.NumUnsafeUses)
1279 --*VCallSite.NumUnsafeUses;
1280 }
1281 if (CSInfo.isExported())
1282 IsExported = true;
1283 CSInfo.markDevirt();
1284 };
1285 Apply(SlotInfo.CSInfo);
1286 for (auto &P : SlotInfo.ConstCSInfo)
1287 Apply(P.second);
1288}
1289
1290static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1291 // We can't add calls if we haven't seen a definition
1292 if (Callee.getSummaryList().empty())
1293 return false;
1294
1295 // Insert calls into the summary index so that the devirtualized targets
1296 // are eligible for import.
1297 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1298 // to better ensure we have the opportunity to inline them.
1299 bool IsExported = false;
1300 auto &S = Callee.getSummaryList()[0];
1301 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1302 /* RelBF = */ 0);
1303 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1304 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1305 FS->addCall({Callee, CI});
1306 IsExported |= S->modulePath() != FS->modulePath();
1307 }
1308 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1309 FS->addCall({Callee, CI});
1310 IsExported |= S->modulePath() != FS->modulePath();
1311 }
1312 };
1313 AddCalls(SlotInfo.CSInfo);
1314 for (auto &P : SlotInfo.ConstCSInfo)
1315 AddCalls(P.second);
1316 return IsExported;
1317}
1318
1319bool DevirtModule::trySingleImplDevirt(
1320 ModuleSummaryIndex *ExportSummary,
1321 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1323 // See if the program contains a single implementation of this virtual
1324 // function.
1325 auto *TheFn = TargetsForSlot[0].Fn;
1326 for (auto &&Target : TargetsForSlot)
1327 if (TheFn != Target.Fn)
1328 return false;
1329
1330 // If so, update each call site to call that implementation directly.
1331 if (RemarksEnabled || AreStatisticsEnabled())
1332 TargetsForSlot[0].WasDevirt = true;
1333
1334 bool IsExported = false;
1335 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1336 if (!IsExported)
1337 return false;
1338
1339 // If the only implementation has local linkage, we must promote to external
1340 // to make it visible to thin LTO objects. We can only get here during the
1341 // ThinLTO export phase.
1342 if (TheFn->hasLocalLinkage()) {
1343 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1344
1345 // Since we are renaming the function, any comdats with the same name must
1346 // also be renamed. This is required when targeting COFF, as the comdat name
1347 // must match one of the names of the symbols in the comdat.
1348 if (Comdat *C = TheFn->getComdat()) {
1349 if (C->getName() == TheFn->getName()) {
1350 Comdat *NewC = M.getOrInsertComdat(NewName);
1351 NewC->setSelectionKind(C->getSelectionKind());
1352 for (GlobalObject &GO : M.global_objects())
1353 if (GO.getComdat() == C)
1354 GO.setComdat(NewC);
1355 }
1356 }
1357
1358 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1359 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1360 TheFn->setName(NewName);
1361 }
1362 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1363 // Any needed promotion of 'TheFn' has already been done during
1364 // LTO unit split, so we can ignore return value of AddCalls.
1365 AddCalls(SlotInfo, TheFnVI);
1366
1368 Res->SingleImplName = std::string(TheFn->getName());
1369
1370 return true;
1371}
1372
1373bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1374 VTableSlotSummary &SlotSummary,
1375 VTableSlotInfo &SlotInfo,
1377 std::set<ValueInfo> &DevirtTargets) {
1378 // See if the program contains a single implementation of this virtual
1379 // function.
1380 auto TheFn = TargetsForSlot[0];
1381 for (auto &&Target : TargetsForSlot)
1382 if (TheFn != Target)
1383 return false;
1384
1385 // Don't devirtualize if we don't have target definition.
1386 auto Size = TheFn.getSummaryList().size();
1387 if (!Size)
1388 return false;
1389
1390 // Don't devirtualize function if we're told to skip it
1391 // in -wholeprogramdevirt-skip.
1392 if (FunctionsToSkip.match(TheFn.name()))
1393 return false;
1394
1395 // If the summary list contains multiple summaries where at least one is
1396 // a local, give up, as we won't know which (possibly promoted) name to use.
1397 for (const auto &S : TheFn.getSummaryList())
1398 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1399 return false;
1400
1401 // Collect functions devirtualized at least for one call site for stats.
1403 DevirtTargets.insert(TheFn);
1404
1405 auto &S = TheFn.getSummaryList()[0];
1406 bool IsExported = AddCalls(SlotInfo, TheFn);
1407 if (IsExported)
1408 ExportedGUIDs.insert(TheFn.getGUID());
1409
1410 // Record in summary for use in devirtualization during the ThinLTO import
1411 // step.
1413 if (GlobalValue::isLocalLinkage(S->linkage())) {
1414 if (IsExported)
1415 // If target is a local function and we are exporting it by
1416 // devirtualizing a call in another module, we need to record the
1417 // promoted name.
1419 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1420 else {
1421 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1422 Res->SingleImplName = std::string(TheFn.name());
1423 }
1424 } else
1425 Res->SingleImplName = std::string(TheFn.name());
1426
1427 // Name will be empty if this thin link driven off of serialized combined
1428 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1429 // legacy LTO API anyway.
1430 assert(!Res->SingleImplName.empty());
1431
1432 return true;
1433}
1434
1435void DevirtModule::tryICallBranchFunnel(
1436 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1437 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1438 Triple T(M.getTargetTriple());
1439 if (T.getArch() != Triple::x86_64)
1440 return;
1441
1442 if (TargetsForSlot.size() > ClThreshold)
1443 return;
1444
1445 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1446 if (!HasNonDevirt)
1447 for (auto &P : SlotInfo.ConstCSInfo)
1448 if (!P.second.AllCallSitesDevirted) {
1449 HasNonDevirt = true;
1450 break;
1451 }
1452
1453 if (!HasNonDevirt)
1454 return;
1455
1456 FunctionType *FT =
1457 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1458 Function *JT;
1459 if (isa<MDString>(Slot.TypeID)) {
1461 M.getDataLayout().getProgramAddressSpace(),
1462 getGlobalName(Slot, {}, "branch_funnel"), &M);
1463 JT->setVisibility(GlobalValue::HiddenVisibility);
1464 } else {
1466 M.getDataLayout().getProgramAddressSpace(),
1467 "branch_funnel", &M);
1468 }
1469 JT->addParamAttr(0, Attribute::Nest);
1470
1471 std::vector<Value *> JTArgs;
1472 JTArgs.push_back(JT->arg_begin());
1473 for (auto &T : TargetsForSlot) {
1474 JTArgs.push_back(getMemberAddr(T.TM));
1475 JTArgs.push_back(T.Fn);
1476 }
1477
1478 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1480 &M, llvm::Intrinsic::icall_branch_funnel, {});
1481
1482 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1483 CI->setTailCallKind(CallInst::TCK_MustTail);
1484 ReturnInst::Create(M.getContext(), nullptr, BB);
1485
1486 bool IsExported = false;
1487 applyICallBranchFunnel(SlotInfo, JT, IsExported);
1488 if (IsExported)
1490}
1491
1492void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1493 Constant *JT, bool &IsExported) {
1494 auto Apply = [&](CallSiteInfo &CSInfo) {
1495 if (CSInfo.isExported())
1496 IsExported = true;
1497 if (CSInfo.AllCallSitesDevirted)
1498 return;
1499
1500 std::map<CallBase *, CallBase *> CallBases;
1501 for (auto &&VCallSite : CSInfo.CallSites) {
1502 CallBase &CB = VCallSite.CB;
1503
1504 if (CallBases.find(&CB) != CallBases.end()) {
1505 // When finding devirtualizable calls, it's possible to find the same
1506 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1507 // calls, which can cause duplicate call sites to be recorded in
1508 // [Const]CallSites. If we've already found one of these
1509 // call instances, just ignore it. It will be replaced later.
1510 continue;
1511 }
1512
1513 // Jump tables are only profitable if the retpoline mitigation is enabled.
1514 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1515 if (!FSAttr.isValid() ||
1516 !FSAttr.getValueAsString().contains("+retpoline"))
1517 continue;
1518
1519 NumBranchFunnel++;
1520 if (RemarksEnabled)
1521 VCallSite.emitRemark("branch-funnel",
1522 JT->stripPointerCasts()->getName(), OREGetter);
1523
1524 // Pass the address of the vtable in the nest register, which is r10 on
1525 // x86_64.
1526 std::vector<Type *> NewArgs;
1527 NewArgs.push_back(Int8PtrTy);
1528 append_range(NewArgs, CB.getFunctionType()->params());
1529 FunctionType *NewFT =
1531 CB.getFunctionType()->isVarArg());
1532 PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
1533
1534 IRBuilder<> IRB(&CB);
1535 std::vector<Value *> Args;
1536 Args.push_back(VCallSite.VTable);
1537 llvm::append_range(Args, CB.args());
1538
1539 CallBase *NewCS = nullptr;
1540 if (isa<CallInst>(CB))
1541 NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
1542 else
1543 NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
1544 cast<InvokeInst>(CB).getNormalDest(),
1545 cast<InvokeInst>(CB).getUnwindDest(), Args);
1546 NewCS->setCallingConv(CB.getCallingConv());
1547
1549 std::vector<AttributeSet> NewArgAttrs;
1550 NewArgAttrs.push_back(AttributeSet::get(
1551 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1552 M.getContext(), Attribute::Nest)}));
1553 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1554 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1555 NewCS->setAttributes(
1556 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1557 Attrs.getRetAttrs(), NewArgAttrs));
1558
1559 CallBases[&CB] = NewCS;
1560
1561 // This use is no longer unsafe.
1562 if (VCallSite.NumUnsafeUses)
1563 --*VCallSite.NumUnsafeUses;
1564 }
1565 // Don't mark as devirtualized because there may be callers compiled without
1566 // retpoline mitigation, which would mean that they are lowered to
1567 // llvm.type.test and therefore require an llvm.type.test resolution for the
1568 // type identifier.
1569
1570 for (auto &[Old, New] : CallBases) {
1571 Old->replaceAllUsesWith(New);
1572 Old->eraseFromParent();
1573 }
1574 };
1575 Apply(SlotInfo.CSInfo);
1576 for (auto &P : SlotInfo.ConstCSInfo)
1577 Apply(P.second);
1578}
1579
1580bool DevirtModule::tryEvaluateFunctionsWithArgs(
1582 ArrayRef<uint64_t> Args) {
1583 // Evaluate each function and store the result in each target's RetVal
1584 // field.
1585 for (VirtualCallTarget &Target : TargetsForSlot) {
1586 // TODO: Skip for now if the vtable symbol was an alias to a function,
1587 // need to evaluate whether it would be correct to analyze the aliasee
1588 // function for this optimization.
1589 auto Fn = dyn_cast<Function>(Target.Fn);
1590 if (!Fn)
1591 return false;
1592
1593 if (Fn->arg_size() != Args.size() + 1)
1594 return false;
1595
1596 Evaluator Eval(M.getDataLayout(), nullptr);
1598 EvalArgs.push_back(
1600 for (unsigned I = 0; I != Args.size(); ++I) {
1601 auto *ArgTy =
1602 dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
1603 if (!ArgTy)
1604 return false;
1605 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1606 }
1607
1608 Constant *RetVal;
1609 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1610 !isa<ConstantInt>(RetVal))
1611 return false;
1612 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1613 }
1614 return true;
1615}
1616
1617void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1618 uint64_t TheRetVal) {
1619 for (auto Call : CSInfo.CallSites) {
1620 if (!OptimizedCalls.insert(&Call.CB).second)
1621 continue;
1622 NumUniformRetVal++;
1623 Call.replaceAndErase(
1624 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1625 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1626 }
1627 CSInfo.markDevirt();
1628}
1629
1630bool DevirtModule::tryUniformRetValOpt(
1631 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1633 // Uniform return value optimization. If all functions return the same
1634 // constant, replace all calls with that constant.
1635 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1636 for (const VirtualCallTarget &Target : TargetsForSlot)
1637 if (Target.RetVal != TheRetVal)
1638 return false;
1639
1640 if (CSInfo.isExported()) {
1642 Res->Info = TheRetVal;
1643 }
1644
1645 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1646 if (RemarksEnabled || AreStatisticsEnabled())
1647 for (auto &&Target : TargetsForSlot)
1648 Target.WasDevirt = true;
1649 return true;
1650}
1651
1652std::string DevirtModule::getGlobalName(VTableSlot Slot,
1653 ArrayRef<uint64_t> Args,
1654 StringRef Name) {
1655 std::string FullName = "__typeid_";
1656 raw_string_ostream OS(FullName);
1657 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1658 for (uint64_t Arg : Args)
1659 OS << '_' << Arg;
1660 OS << '_' << Name;
1661 return FullName;
1662}
1663
1664bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1665 Triple T(M.getTargetTriple());
1666 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1667}
1668
1669void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1672 getGlobalName(Slot, Args, Name), C, &M);
1674}
1675
1676void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1677 StringRef Name, uint32_t Const,
1678 uint32_t &Storage) {
1679 if (shouldExportConstantsAsAbsoluteSymbols()) {
1680 exportGlobal(
1681 Slot, Args, Name,
1682 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1683 return;
1684 }
1685
1686 Storage = Const;
1687}
1688
1689Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1690 StringRef Name) {
1691 Constant *C =
1692 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1693 auto *GV = dyn_cast<GlobalVariable>(C);
1694 if (GV)
1696 return C;
1697}
1698
1699Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1700 StringRef Name, IntegerType *IntTy,
1701 uint32_t Storage) {
1702 if (!shouldExportConstantsAsAbsoluteSymbols())
1703 return ConstantInt::get(IntTy, Storage);
1704
1705 Constant *C = importGlobal(Slot, Args, Name);
1706 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1707 C = ConstantExpr::getPtrToInt(C, IntTy);
1708
1709 // We only need to set metadata if the global is newly created, in which
1710 // case it would not have hidden visibility.
1711 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1712 return C;
1713
1714 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1715 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1716 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1717 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1718 MDNode::get(M.getContext(), {MinC, MaxC}));
1719 };
1720 unsigned AbsWidth = IntTy->getBitWidth();
1721 if (AbsWidth == IntPtrTy->getBitWidth())
1722 SetAbsRange(~0ull, ~0ull); // Full set.
1723 else
1724 SetAbsRange(0, 1ull << AbsWidth);
1725 return C;
1726}
1727
1728void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1729 bool IsOne,
1730 Constant *UniqueMemberAddr) {
1731 for (auto &&Call : CSInfo.CallSites) {
1732 if (!OptimizedCalls.insert(&Call.CB).second)
1733 continue;
1734 IRBuilder<> B(&Call.CB);
1735 Value *Cmp =
1736 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1737 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1738 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1739 NumUniqueRetVal++;
1740 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1741 Cmp);
1742 }
1743 CSInfo.markDevirt();
1744}
1745
1746Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1747 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1748 ConstantInt::get(Int64Ty, M->Offset));
1749}
1750
1751bool DevirtModule::tryUniqueRetValOpt(
1752 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1753 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1754 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1755 // IsOne controls whether we look for a 0 or a 1.
1756 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1757 const TypeMemberInfo *UniqueMember = nullptr;
1758 for (const VirtualCallTarget &Target : TargetsForSlot) {
1759 if (Target.RetVal == (IsOne ? 1 : 0)) {
1760 if (UniqueMember)
1761 return false;
1762 UniqueMember = Target.TM;
1763 }
1764 }
1765
1766 // We should have found a unique member or bailed out by now. We already
1767 // checked for a uniform return value in tryUniformRetValOpt.
1768 assert(UniqueMember);
1769
1770 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1771 if (CSInfo.isExported()) {
1773 Res->Info = IsOne;
1774
1775 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1776 }
1777
1778 // Replace each call with the comparison.
1779 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1780 UniqueMemberAddr);
1781
1782 // Update devirtualization statistics for targets.
1783 if (RemarksEnabled || AreStatisticsEnabled())
1784 for (auto &&Target : TargetsForSlot)
1785 Target.WasDevirt = true;
1786
1787 return true;
1788 };
1789
1790 if (BitWidth == 1) {
1791 if (tryUniqueRetValOptFor(true))
1792 return true;
1793 if (tryUniqueRetValOptFor(false))
1794 return true;
1795 }
1796 return false;
1797}
1798
1799void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1800 Constant *Byte, Constant *Bit) {
1801 for (auto Call : CSInfo.CallSites) {
1802 if (!OptimizedCalls.insert(&Call.CB).second)
1803 continue;
1804 auto *RetType = cast<IntegerType>(Call.CB.getType());
1805 IRBuilder<> B(&Call.CB);
1806 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1807 if (RetType->getBitWidth() == 1) {
1808 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1809 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1810 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1811 NumVirtConstProp1Bit++;
1812 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1813 OREGetter, IsBitSet);
1814 } else {
1815 Value *Val = B.CreateLoad(RetType, Addr);
1816 NumVirtConstProp++;
1817 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1818 OREGetter, Val);
1819 }
1820 }
1821 CSInfo.markDevirt();
1822}
1823
1824bool DevirtModule::tryVirtualConstProp(
1825 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1826 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1827 // TODO: Skip for now if the vtable symbol was an alias to a function,
1828 // need to evaluate whether it would be correct to analyze the aliasee
1829 // function for this optimization.
1830 auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1831 if (!Fn)
1832 return false;
1833 // This only works if the function returns an integer.
1834 auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1835 if (!RetType)
1836 return false;
1837 unsigned BitWidth = RetType->getBitWidth();
1838 if (BitWidth > 64)
1839 return false;
1840
1841 // Make sure that each function is defined, does not access memory, takes at
1842 // least one argument, does not use its first argument (which we assume is
1843 // 'this'), and has the same return type.
1844 //
1845 // Note that we test whether this copy of the function is readnone, rather
1846 // than testing function attributes, which must hold for any copy of the
1847 // function, even a less optimized version substituted at link time. This is
1848 // sound because the virtual constant propagation optimizations effectively
1849 // inline all implementations of the virtual function into each call site,
1850 // rather than using function attributes to perform local optimization.
1851 for (VirtualCallTarget &Target : TargetsForSlot) {
1852 // TODO: Skip for now if the vtable symbol was an alias to a function,
1853 // need to evaluate whether it would be correct to analyze the aliasee
1854 // function for this optimization.
1855 auto Fn = dyn_cast<Function>(Target.Fn);
1856 if (!Fn)
1857 return false;
1858
1859 if (Fn->isDeclaration() ||
1860 !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
1861 .doesNotAccessMemory() ||
1862 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1863 Fn->getReturnType() != RetType)
1864 return false;
1865 }
1866
1867 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1868 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1869 continue;
1870
1871 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1872 if (Res)
1873 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1874
1875 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1876 continue;
1877
1878 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1879 ResByArg, Slot, CSByConstantArg.first))
1880 continue;
1881
1882 // Find an allocation offset in bits in all vtables associated with the
1883 // type.
1884 uint64_t AllocBefore =
1885 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1886 uint64_t AllocAfter =
1887 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1888
1889 // Calculate the total amount of padding needed to store a value at both
1890 // ends of the object.
1891 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1892 for (auto &&Target : TargetsForSlot) {
1893 TotalPaddingBefore += std::max<int64_t>(
1894 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1895 TotalPaddingAfter += std::max<int64_t>(
1896 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1897 }
1898
1899 // If the amount of padding is too large, give up.
1900 // FIXME: do something smarter here.
1901 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1902 continue;
1903
1904 // Calculate the offset to the value as a (possibly negative) byte offset
1905 // and (if applicable) a bit offset, and store the values in the targets.
1906 int64_t OffsetByte;
1907 uint64_t OffsetBit;
1908 if (TotalPaddingBefore <= TotalPaddingAfter)
1909 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1910 OffsetBit);
1911 else
1912 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1913 OffsetBit);
1914
1915 if (RemarksEnabled || AreStatisticsEnabled())
1916 for (auto &&Target : TargetsForSlot)
1917 Target.WasDevirt = true;
1918
1919
1920 if (CSByConstantArg.second.isExported()) {
1922 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1923 ResByArg->Byte);
1924 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1925 ResByArg->Bit);
1926 }
1927
1928 // Rewrite each call to a load from OffsetByte/OffsetBit.
1929 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1930 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1931 applyVirtualConstProp(CSByConstantArg.second,
1932 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1933 }
1934 return true;
1935}
1936
1937void DevirtModule::rebuildGlobal(VTableBits &B) {
1938 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
1939 return;
1940
1941 // Align the before byte array to the global's minimum alignment so that we
1942 // don't break any alignment requirements on the global.
1943 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
1944 B.GV->getAlign(), B.GV->getValueType());
1945 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
1946
1947 // Before was stored in reverse order; flip it now.
1948 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
1949 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
1950
1951 // Build an anonymous global containing the before bytes, followed by the
1952 // original initializer, followed by the after bytes.
1953 auto NewInit = ConstantStruct::getAnon(
1954 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
1955 B.GV->getInitializer(),
1956 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
1957 auto NewGV =
1958 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
1959 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
1960 NewGV->setSection(B.GV->getSection());
1961 NewGV->setComdat(B.GV->getComdat());
1962 NewGV->setAlignment(B.GV->getAlign());
1963
1964 // Copy the original vtable's metadata to the anonymous global, adjusting
1965 // offsets as required.
1966 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
1967
1968 // Build an alias named after the original global, pointing at the second
1969 // element (the original initializer).
1970 auto Alias = GlobalAlias::create(
1971 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
1973 NewInit->getType(), NewGV,
1974 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
1975 ConstantInt::get(Int32Ty, 1)}),
1976 &M);
1977 Alias->setVisibility(B.GV->getVisibility());
1978 Alias->takeName(B.GV);
1979
1980 B.GV->replaceAllUsesWith(Alias);
1981 B.GV->eraseFromParent();
1982}
1983
1984bool DevirtModule::areRemarksEnabled() {
1985 const auto &FL = M.getFunctionList();
1986 for (const Function &Fn : FL) {
1987 if (Fn.empty())
1988 continue;
1989 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
1990 return DI.isEnabled();
1991 }
1992 return false;
1993}
1994
1995void DevirtModule::scanTypeTestUsers(
1996 Function *TypeTestFunc,
1997 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1998 // Find all virtual calls via a virtual table pointer %p under an assumption
1999 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
2000 // points to a member of the type identifier %md. Group calls by (type ID,
2001 // offset) pair (effectively the identity of the virtual function) and store
2002 // to CallSlots.
2003 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2004 auto *CI = dyn_cast<CallInst>(U.getUser());
2005 if (!CI)
2006 continue;
2007
2008 // Search for virtual calls based on %p and add them to DevirtCalls.
2011 auto &DT = LookupDomTree(*CI->getFunction());
2012 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2013
2014 Metadata *TypeId =
2015 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2016 // If we found any, add them to CallSlots.
2017 if (!Assumes.empty()) {
2018 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2019 for (DevirtCallSite Call : DevirtCalls)
2020 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2021 }
2022
2023 auto RemoveTypeTestAssumes = [&]() {
2024 // We no longer need the assumes or the type test.
2025 for (auto *Assume : Assumes)
2026 Assume->eraseFromParent();
2027 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2028 // may use the vtable argument later.
2029 if (CI->use_empty())
2030 CI->eraseFromParent();
2031 };
2032
2033 // At this point we could remove all type test assume sequences, as they
2034 // were originally inserted for WPD. However, we can keep these in the
2035 // code stream for later analysis (e.g. to help drive more efficient ICP
2036 // sequences). They will eventually be removed by a second LowerTypeTests
2037 // invocation that cleans them up. In order to do this correctly, the first
2038 // LowerTypeTests invocation needs to know that they have "Unknown" type
2039 // test resolution, so that they aren't treated as Unsat and lowered to
2040 // False, which will break any uses on assumes. Below we remove any type
2041 // test assumes that will not be treated as Unknown by LTT.
2042
2043 // The type test assumes will be treated by LTT as Unsat if the type id is
2044 // not used on a global (in which case it has no entry in the TypeIdMap).
2045 if (!TypeIdMap.count(TypeId))
2046 RemoveTypeTestAssumes();
2047
2048 // For ThinLTO importing, we need to remove the type test assumes if this is
2049 // an MDString type id without a corresponding TypeIdSummary. Any
2050 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2051 // type test assumes can be kept. If the MDString type id is missing a
2052 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2053 // exporting phase of WPD from analyzing it), then it would be treated as
2054 // Unsat by LTT and we need to remove its type test assumes here. If not
2055 // used on a vcall we don't need them for later optimization use in any
2056 // case.
2057 else if (ImportSummary && isa<MDString>(TypeId)) {
2058 const TypeIdSummary *TidSummary =
2059 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2060 if (!TidSummary)
2061 RemoveTypeTestAssumes();
2062 else
2063 // If one was created it should not be Unsat, because if we reached here
2064 // the type id was used on a global.
2066 }
2067 }
2068}
2069
2070void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2071 Function *TypeTestFunc =
2072 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2073
2074 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2075 auto *CI = dyn_cast<CallInst>(U.getUser());
2076 if (!CI)
2077 continue;
2078
2079 Value *Ptr = CI->getArgOperand(0);
2080 Value *Offset = CI->getArgOperand(1);
2081 Value *TypeIdValue = CI->getArgOperand(2);
2082 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2083
2087 bool HasNonCallUses = false;
2088 auto &DT = LookupDomTree(*CI->getFunction());
2089 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2090 HasNonCallUses, CI, DT);
2091
2092 // Start by generating "pessimistic" code that explicitly loads the function
2093 // pointer from the vtable and performs the type check. If possible, we will
2094 // eliminate the load and the type check later.
2095
2096 // If possible, only generate the load at the point where it is used.
2097 // This helps avoid unnecessary spills.
2098 IRBuilder<> LoadB(
2099 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2100
2101 Value *LoadedValue = nullptr;
2102 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2103 Intrinsic::type_checked_load_relative) {
2104 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2105 LoadedValue = LoadB.CreateLoad(Int32Ty, GEP);
2106 LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);
2107 GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);
2108 LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);
2109 LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);
2110 } else {
2111 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2112 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2113 }
2114
2115 for (Instruction *LoadedPtr : LoadedPtrs) {
2116 LoadedPtr->replaceAllUsesWith(LoadedValue);
2117 LoadedPtr->eraseFromParent();
2118 }
2119
2120 // Likewise for the type test.
2121 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2122 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2123
2124 for (Instruction *Pred : Preds) {
2125 Pred->replaceAllUsesWith(TypeTestCall);
2126 Pred->eraseFromParent();
2127 }
2128
2129 // We have already erased any extractvalue instructions that refer to the
2130 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2131 // (although this is unlikely). In that case, explicitly build a pair and
2132 // RAUW it.
2133 if (!CI->use_empty()) {
2134 Value *Pair = PoisonValue::get(CI->getType());
2135 IRBuilder<> B(CI);
2136 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2137 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2138 CI->replaceAllUsesWith(Pair);
2139 }
2140
2141 // The number of unsafe uses is initially the number of uses.
2142 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2143 NumUnsafeUses = DevirtCalls.size();
2144
2145 // If the function pointer has a non-call user, we cannot eliminate the type
2146 // check, as one of those users may eventually call the pointer. Increment
2147 // the unsafe use count to make sure it cannot reach zero.
2148 if (HasNonCallUses)
2149 ++NumUnsafeUses;
2150 for (DevirtCallSite Call : DevirtCalls) {
2151 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2152 &NumUnsafeUses);
2153 }
2154
2155 CI->eraseFromParent();
2156 }
2157}
2158
2159void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2160 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2161 if (!TypeId)
2162 return;
2163 const TypeIdSummary *TidSummary =
2164 ImportSummary->getTypeIdSummary(TypeId->getString());
2165 if (!TidSummary)
2166 return;
2167 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2168 if (ResI == TidSummary->WPDRes.end())
2169 return;
2170 const WholeProgramDevirtResolution &Res = ResI->second;
2171
2173 assert(!Res.SingleImplName.empty());
2174 // The type of the function in the declaration is irrelevant because every
2175 // call site will cast it to the correct type.
2176 Constant *SingleImpl =
2177 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2178 Type::getVoidTy(M.getContext()))
2179 .getCallee());
2180
2181 // This is the import phase so we should not be exporting anything.
2182 bool IsExported = false;
2183 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2184 assert(!IsExported);
2185 }
2186
2187 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2188 auto I = Res.ResByArg.find(CSByConstantArg.first);
2189 if (I == Res.ResByArg.end())
2190 continue;
2191 auto &ResByArg = I->second;
2192 // FIXME: We should figure out what to do about the "function name" argument
2193 // to the apply* functions, as the function names are unavailable during the
2194 // importing phase. For now we just pass the empty string. This does not
2195 // impact correctness because the function names are just used for remarks.
2196 switch (ResByArg.TheKind) {
2198 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2199 break;
2201 Constant *UniqueMemberAddr =
2202 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2203 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2204 UniqueMemberAddr);
2205 break;
2206 }
2208 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2209 Int32Ty, ResByArg.Byte);
2210 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2211 ResByArg.Bit);
2212 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2213 break;
2214 }
2215 default:
2216 break;
2217 }
2218 }
2219
2221 // The type of the function is irrelevant, because it's bitcast at calls
2222 // anyhow.
2223 Constant *JT = cast<Constant>(
2224 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2225 Type::getVoidTy(M.getContext()))
2226 .getCallee());
2227 bool IsExported = false;
2228 applyICallBranchFunnel(SlotInfo, JT, IsExported);
2229 assert(!IsExported);
2230 }
2231}
2232
2233void DevirtModule::removeRedundantTypeTests() {
2234 auto True = ConstantInt::getTrue(M.getContext());
2235 for (auto &&U : NumUnsafeUsesForTypeTest) {
2236 if (U.second == 0) {
2237 U.first->replaceAllUsesWith(True);
2238 U.first->eraseFromParent();
2239 }
2240 }
2241}
2242
2244DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2245 ModuleSummaryIndex *ExportSummary) {
2246 assert((ExportSummary != nullptr) &&
2247 "Caller guarantees ExportSummary is not nullptr");
2248
2249 const auto TheFnGUID = TheFn->getGUID();
2250 const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
2251 // Look up ValueInfo with the GUID in the current linkage.
2252 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2253 // If no entry is found and GUID is different from GUID computed using
2254 // exported name, look up ValueInfo with the exported name unconditionally.
2255 // This is a fallback.
2256 //
2257 // The reason to have a fallback:
2258 // 1. LTO could enable global value internalization via
2259 // `enable-lto-internalization`.
2260 // 2. The GUID in ExportedSummary is computed using exported name.
2261 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2262 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2263 }
2264 return TheFnVI;
2265}
2266
2267bool DevirtModule::mustBeUnreachableFunction(
2268 Function *const F, ModuleSummaryIndex *ExportSummary) {
2270 return false;
2271 // First, learn unreachability by analyzing function IR.
2272 if (!F->isDeclaration()) {
2273 // A function must be unreachable if its entry block ends with an
2274 // 'unreachable'.
2275 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2276 }
2277 // Learn unreachability from ExportSummary if ExportSummary is present.
2278 return ExportSummary &&
2280 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2281}
2282
2283bool DevirtModule::run() {
2284 // If only some of the modules were split, we cannot correctly perform
2285 // this transformation. We already checked for the presense of type tests
2286 // with partially split modules during the thin link, and would have emitted
2287 // an error if any were found, so here we can simply return.
2288 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2289 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2290 return false;
2291
2292 Function *TypeTestFunc =
2293 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2294 Function *TypeCheckedLoadFunc =
2295 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2296 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2297 &M, Intrinsic::type_checked_load_relative);
2298 Function *AssumeFunc =
2299 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2300
2301 // Normally if there are no users of the devirtualization intrinsics in the
2302 // module, this pass has nothing to do. But if we are exporting, we also need
2303 // to handle any users that appear only in the function summaries.
2304 if (!ExportSummary &&
2305 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2306 AssumeFunc->use_empty()) &&
2307 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2308 (!TypeCheckedLoadRelativeFunc ||
2309 TypeCheckedLoadRelativeFunc->use_empty()))
2310 return false;
2311
2312 // Rebuild type metadata into a map for easy lookup.
2313 std::vector<VTableBits> Bits;
2315 buildTypeIdentifierMap(Bits, TypeIdMap);
2316
2317 if (TypeTestFunc && AssumeFunc)
2318 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2319
2320 if (TypeCheckedLoadFunc)
2321 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2322
2323 if (TypeCheckedLoadRelativeFunc)
2324 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2325
2326 if (ImportSummary) {
2327 for (auto &S : CallSlots)
2328 importResolution(S.first, S.second);
2329
2330 removeRedundantTypeTests();
2331
2332 // We have lowered or deleted the type intrinsics, so we will no longer have
2333 // enough information to reason about the liveness of virtual function
2334 // pointers in GlobalDCE.
2335 for (GlobalVariable &GV : M.globals())
2336 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2337
2338 // The rest of the code is only necessary when exporting or during regular
2339 // LTO, so we are done.
2340 return true;
2341 }
2342
2343 if (TypeIdMap.empty())
2344 return true;
2345
2346 // Collect information from summary about which calls to try to devirtualize.
2347 if (ExportSummary) {
2349 for (auto &P : TypeIdMap) {
2350 if (auto *TypeId = dyn_cast<MDString>(P.first))
2351 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2352 TypeId);
2353 }
2354
2355 for (auto &P : *ExportSummary) {
2356 for (auto &S : P.second.SummaryList) {
2357 auto *FS = dyn_cast<FunctionSummary>(S.get());
2358 if (!FS)
2359 continue;
2360 // FIXME: Only add live functions.
2361 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2362 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2363 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2364 }
2365 }
2366 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2367 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2368 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2369 }
2370 }
2371 for (const FunctionSummary::ConstVCall &VC :
2372 FS->type_test_assume_const_vcalls()) {
2373 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2374 CallSlots[{MD, VC.VFunc.Offset}]
2375 .ConstCSInfo[VC.Args]
2376 .addSummaryTypeTestAssumeUser(FS);
2377 }
2378 }
2379 for (const FunctionSummary::ConstVCall &VC :
2380 FS->type_checked_load_const_vcalls()) {
2381 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2382 CallSlots[{MD, VC.VFunc.Offset}]
2383 .ConstCSInfo[VC.Args]
2384 .addSummaryTypeCheckedLoadUser(FS);
2385 }
2386 }
2387 }
2388 }
2389 }
2390
2391 // For each (type, offset) pair:
2392 bool DidVirtualConstProp = false;
2393 std::map<std::string, GlobalValue *> DevirtTargets;
2394 for (auto &S : CallSlots) {
2395 // Search each of the members of the type identifier for the virtual
2396 // function implementation at offset S.first.ByteOffset, and add to
2397 // TargetsForSlot.
2398 std::vector<VirtualCallTarget> TargetsForSlot;
2399 WholeProgramDevirtResolution *Res = nullptr;
2400 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2401 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2402 TypeMemberInfos.size())
2403 // For any type id used on a global's type metadata, create the type id
2404 // summary resolution regardless of whether we can devirtualize, so that
2405 // lower type tests knows the type id is not Unsat. If it was not used on
2406 // a global's type metadata, the TypeIdMap entry set will be empty, and
2407 // we don't want to create an entry (with the default Unknown type
2408 // resolution), which can prevent detection of the Unsat.
2409 Res = &ExportSummary
2410 ->getOrInsertTypeIdSummary(
2411 cast<MDString>(S.first.TypeID)->getString())
2412 .WPDRes[S.first.ByteOffset];
2413 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2414 S.first.ByteOffset, ExportSummary)) {
2415
2416 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2417 DidVirtualConstProp |=
2418 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2419
2420 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2421 }
2422
2423 // Collect functions devirtualized at least for one call site for stats.
2424 if (RemarksEnabled || AreStatisticsEnabled())
2425 for (const auto &T : TargetsForSlot)
2426 if (T.WasDevirt)
2427 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2428 }
2429
2430 // CFI-specific: if we are exporting and any llvm.type.checked.load
2431 // intrinsics were *not* devirtualized, we need to add the resulting
2432 // llvm.type.test intrinsics to the function summaries so that the
2433 // LowerTypeTests pass will export them.
2434 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2435 auto GUID =
2436 GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
2437 for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
2438 FS->addTypeTest(GUID);
2439 for (auto &CCS : S.second.ConstCSInfo)
2440 for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)
2441 FS->addTypeTest(GUID);
2442 }
2443 }
2444
2445 if (RemarksEnabled) {
2446 // Generate remarks for each devirtualized function.
2447 for (const auto &DT : DevirtTargets) {
2448 GlobalValue *GV = DT.second;
2449 auto F = dyn_cast<Function>(GV);
2450 if (!F) {
2451 auto A = dyn_cast<GlobalAlias>(GV);
2452 assert(A && isa<Function>(A->getAliasee()));
2453 F = dyn_cast<Function>(A->getAliasee());
2454 assert(F);
2455 }
2456
2457 using namespace ore;
2458 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2459 << "devirtualized "
2460 << NV("FunctionName", DT.first));
2461 }
2462 }
2463
2464 NumDevirtTargets += DevirtTargets.size();
2465
2466 removeRedundantTypeTests();
2467
2468 // Rebuild each global we touched as part of virtual constant propagation to
2469 // include the before and after bytes.
2470 if (DidVirtualConstProp)
2471 for (VTableBits &B : Bits)
2472 rebuildGlobal(B);
2473
2474 // We have lowered or deleted the type intrinsics, so we will no longer have
2475 // enough information to reason about the liveness of virtual function
2476 // pointers in GlobalDCE.
2477 for (GlobalVariable &GV : M.globals())
2478 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2479
2480 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2481 CI->eraseFromParent();
2482
2483 return true;
2484}
2485
2486void DevirtIndex::run() {
2487 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2488 return;
2489
2491 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2492 NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
2493 // Create the type id summary resolution regardlness of whether we can
2494 // devirtualize, so that lower type tests knows the type id is used on
2495 // a global and not Unsat. We do this here rather than in the loop over the
2496 // CallSlots, since that handling will only see type tests that directly
2497 // feed assumes, and we would miss any that aren't currently handled by WPD
2498 // (such as type tests that feed assumes via phis).
2499 ExportSummary.getOrInsertTypeIdSummary(P.first);
2500 }
2501
2502 // Collect information from summary about which calls to try to devirtualize.
2503 for (auto &P : ExportSummary) {
2504 for (auto &S : P.second.SummaryList) {
2505 auto *FS = dyn_cast<FunctionSummary>(S.get());
2506 if (!FS)
2507 continue;
2508 // FIXME: Only add live functions.
2509 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2510 for (StringRef Name : NameByGUID[VF.GUID]) {
2511 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2512 }
2513 }
2514 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2515 for (StringRef Name : NameByGUID[VF.GUID]) {
2516 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2517 }
2518 }
2519 for (const FunctionSummary::ConstVCall &VC :
2520 FS->type_test_assume_const_vcalls()) {
2521 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2522 CallSlots[{Name, VC.VFunc.Offset}]
2523 .ConstCSInfo[VC.Args]
2524 .addSummaryTypeTestAssumeUser(FS);
2525 }
2526 }
2527 for (const FunctionSummary::ConstVCall &VC :
2528 FS->type_checked_load_const_vcalls()) {
2529 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2530 CallSlots[{Name, VC.VFunc.Offset}]
2531 .ConstCSInfo[VC.Args]
2532 .addSummaryTypeCheckedLoadUser(FS);
2533 }
2534 }
2535 }
2536 }
2537
2538 std::set<ValueInfo> DevirtTargets;
2539 // For each (type, offset) pair:
2540 for (auto &S : CallSlots) {
2541 // Search each of the members of the type identifier for the virtual
2542 // function implementation at offset S.first.ByteOffset, and add to
2543 // TargetsForSlot.
2544 std::vector<ValueInfo> TargetsForSlot;
2545 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2546 assert(TidSummary);
2547 // The type id summary would have been created while building the NameByGUID
2548 // map earlier.
2550 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2551 ->WPDRes[S.first.ByteOffset];
2552 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2553 S.first.ByteOffset)) {
2554
2555 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2556 DevirtTargets))
2557 continue;
2558 }
2559 }
2560
2561 // Optionally have the thin link print message for each devirtualized
2562 // function.
2564 for (const auto &DT : DevirtTargets)
2565 errs() << "Devirtualized call to " << DT << "\n";
2566
2567 NumDevirtTargets += DevirtTargets.size();
2568}
unsigned Intr
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Addr
std::string Name
uint64_t Size
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
static bool mustBeUnreachableFunction(const Function &F)
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< unsigned > WholeProgramDevirtCutoff("wholeprogramdevirt-cutoff", cl::desc("Max number of devirtualizations for devirt module pass"), cl::init(0))
If explicitly specified, the devirt module pass will stop transformation once the total number of dev...
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:897
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:208
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1411
bool arg_empty() const
Definition: InstrTypes.h:1291
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:2051
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1407
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1428
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1207
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1285
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1385
static CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1294
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2293
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1267
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:480
This is an important base class in LLVM.
Definition: Constant.h:42
const Constant * stripPointerCasts() const
Definition: Constant.h:218
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:335
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
Helper for check-and-exit error handling.
Definition: Error.h:1413
Tagged union holding either a T or a Error.
Definition: Error.h:481
Function summary information to aid decisions and implementation of importing.
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
bool isVarArg() const
Definition: DerivedTypes.h:125
Type * getReturnType() const
Definition: DerivedTypes.h:126
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
bool empty() const
Definition: Function.h:859
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool arg_empty() const
Definition: Function.h:902
const BasicBlock & front() const
Definition: Function.h:860
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:251
arg_iterator arg_begin()
Definition: Function.h:868
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition: GlobPattern.h:50
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:589
void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
Definition: Metadata.cpp:1531
VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1859
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1576
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:565
void setVCallVisibilityMetadata(VCallVisibility Visibility)
Definition: Metadata.cpp:1849
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:409
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: Globals.cpp:75
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:595
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Global variable summary information to aid decisions and implementation of importing.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition: MDBuilder.cpp:42
Metadata node.
Definition: Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:62
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:424
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1576
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:746
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
@ FS
Definition: X86.h:211
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ CommaSeparated
Definition: CommandLine.h:163
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:767
uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
@ Export
Export information to summary.
@ Import
Import information from summary.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1291
bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1231
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:111
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1069
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
An "identifier" for a virtual function.
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)