LLVM 22.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular/thin and non-LTO
28// pipelines:
29//
30// During regular LTO, the pass determines the best optimization for each
31// virtual call and applies the resolutions directly to virtual calls that are
32// eligible for virtual call optimization (i.e. calls that use either of the
33// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
34//
35// During hybrid Regular/ThinLTO, the pass operates in two phases:
36// - Export phase: this is run during the thin link over a single merged module
37// that contains all vtables with !type metadata that participate in the link.
38// The pass computes a resolution for each virtual call and stores it in the
39// type identifier summary.
40// - Import phase: this is run during the thin backends over the individual
41// modules. The pass applies the resolutions previously computed during the
42// import phase to each eligible virtual call.
43//
44// During ThinLTO, the pass operates in two phases:
45// - Export phase: this is run during the thin link over the index which
46// contains a summary of all vtables with !type metadata that participate in
47// the link. It computes a resolution for each virtual call and stores it in
48// the type identifier summary. Only single implementation devirtualization
49// is supported.
50// - Import phase: (same as with hybrid case above).
51//
52// During Speculative devirtualization mode -not restricted to LTO-:
53// - The pass applies speculative devirtualization without requiring any type of
54// visibility.
55// - Skips other features like virtual constant propagation, uniform return
56// value optimization, unique return value optimization and branch funnels as
57// they need LTO.
58// - This mode is enabled via 'devirtualize-speculatively' flag.
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/ArrayRef.h"
64#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/DenseSet.h"
67#include "llvm/ADT/MapVector.h"
69#include "llvm/ADT/Statistic.h"
79#include "llvm/IR/Constants.h"
80#include "llvm/IR/DataLayout.h"
81#include "llvm/IR/DebugLoc.h"
84#include "llvm/IR/Dominators.h"
85#include "llvm/IR/Function.h"
86#include "llvm/IR/GlobalAlias.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstrTypes.h"
90#include "llvm/IR/Instruction.h"
92#include "llvm/IR/Intrinsics.h"
93#include "llvm/IR/LLVMContext.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Metadata.h"
96#include "llvm/IR/Module.h"
98#include "llvm/IR/PassManager.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Errc.h"
104#include "llvm/Support/Error.h"
109#include "llvm/Transforms/IPO.h"
114#include <algorithm>
115#include <cmath>
116#include <cstddef>
117#include <map>
118#include <set>
119#include <string>
120
121using namespace llvm;
122using namespace wholeprogramdevirt;
123
124#define DEBUG_TYPE "wholeprogramdevirt"
125
126STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
127STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
128STATISTIC(NumBranchFunnel, "Number of branch funnels");
129STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
130STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
131STATISTIC(NumVirtConstProp1Bit,
132 "Number of 1 bit virtual constant propagations");
133STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
134DEBUG_COUNTER(CallsToDevirt, "calls-to-devirt",
135 "Controls how many calls should be devirtualized.");
136
137namespace llvm {
138
140 "wholeprogramdevirt-summary-action",
141 cl::desc("What to do with the summary when running this pass"),
142 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
144 "Import typeid resolutions from summary and globals"),
146 "Export typeid resolutions to summary and globals")),
147 cl::Hidden);
148
150 "wholeprogramdevirt-read-summary",
151 cl::desc(
152 "Read summary from given bitcode or YAML file before running pass"),
153 cl::Hidden);
154
156 "wholeprogramdevirt-write-summary",
157 cl::desc("Write summary to given bitcode or YAML file after running pass. "
158 "Output file format is deduced from extension: *.bc means writing "
159 "bitcode, otherwise YAML"),
160 cl::Hidden);
161
162// TODO: This option eventually should support any public visibility vtables
163// with/out LTO.
165 "devirtualize-speculatively",
166 cl::desc("Enable speculative devirtualization optimization"),
167 cl::init(false));
168
170 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
171 cl::init(10),
172 cl::desc("Maximum number of call targets per "
173 "call site to enable branch funnels"));
174
175static cl::opt<bool>
176 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
177 cl::desc("Print index-based devirtualization messages"));
178
179/// Provide a way to force enable whole program visibility in tests.
180/// This is needed to support legacy tests that don't contain
181/// !vcall_visibility metadata (the mere presense of type tests
182/// previously implied hidden visibility).
183static cl::opt<bool>
184 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
185 cl::desc("Enable whole program visibility"));
186
187/// Provide a way to force disable whole program for debugging or workarounds,
188/// when enabled via the linker.
190 "disable-whole-program-visibility", cl::Hidden,
191 cl::desc("Disable whole program visibility (overrides enabling options)"));
192
193/// Provide way to prevent certain function from being devirtualized
195 SkipFunctionNames("wholeprogramdevirt-skip",
196 cl::desc("Prevent function(s) from being devirtualized"),
198
200
201} // end namespace llvm
202
203/// With Clang, a pure virtual class's deleting destructor is emitted as a
204/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
205/// context of whole program devirtualization, the deleting destructor of a pure
206/// virtual class won't be invoked by the source code so safe to skip as a
207/// devirtualize target.
208///
209/// However, not all unreachable functions are safe to skip. In some cases, the
210/// program intends to run such functions and terminate, for instance, a unit
211/// test may run a death test. A non-test program might (or allowed to) invoke
212/// such functions to report failures (whether/when it's a good practice or not
213/// is a different topic).
214///
215/// This option is enabled to keep an unreachable function as a possible
216/// devirtualize target to conservatively keep the program behavior.
217///
218/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
219/// in Clang's codegen for more devirtualization in LLVM.
221 "wholeprogramdevirt-keep-unreachable-function",
222 cl::desc("Regard unreachable functions as possible devirtualize targets."),
223 cl::Hidden, cl::init(true));
224
225/// Mechanism to add runtime checking of devirtualization decisions, optionally
226/// trapping or falling back to indirect call on any that are not correct.
227/// Trapping mode is useful for debugging undefined behavior leading to failures
228/// with WPD. Fallback mode is useful for ensuring safety when whole program
229/// visibility may be compromised.
232 "wholeprogramdevirt-check", cl::Hidden,
233 cl::desc("Type of checking for incorrect devirtualizations"),
234 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
235 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
237 "Fallback to indirect when incorrect")));
238
239namespace {
240struct PatternList {
241 std::vector<GlobPattern> Patterns;
242 template <class T> void init(const T &StringList) {
243 for (const auto &S : StringList)
245 Patterns.push_back(std::move(*Pat));
246 }
247 bool match(StringRef S) {
248 for (const GlobPattern &P : Patterns)
249 if (P.match(S))
250 return true;
251 return false;
252 }
253};
254} // namespace
255
256// Find the minimum offset that we may store a value of size Size bits at. If
257// IsAfter is set, look for an offset before the object, otherwise look for an
258// offset after the object.
261 bool IsAfter, uint64_t Size) {
262 // Find a minimum offset taking into account only vtable sizes.
263 uint64_t MinByte = 0;
264 for (const VirtualCallTarget &Target : Targets) {
265 if (IsAfter)
266 MinByte = std::max(MinByte, Target.minAfterBytes());
267 else
268 MinByte = std::max(MinByte, Target.minBeforeBytes());
269 }
270
271 // Build a vector of arrays of bytes covering, for each target, a slice of the
272 // used region (see AccumBitVector::BytesUsed in
273 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
274 // this aligns the used regions to start at MinByte.
275 //
276 // In this example, A, B and C are vtables, # is a byte already allocated for
277 // a virtual function pointer, AAAA... (etc.) are the used regions for the
278 // vtables and Offset(X) is the value computed for the Offset variable below
279 // for X.
280 //
281 // Offset(A)
282 // | |
283 // |MinByte
284 // A: ################AAAAAAAA|AAAAAAAA
285 // B: ########BBBBBBBBBBBBBBBB|BBBB
286 // C: ########################|CCCCCCCCCCCCCCCC
287 // | Offset(B) |
288 //
289 // This code produces the slices of A, B and C that appear after the divider
290 // at MinByte.
291 std::vector<ArrayRef<uint8_t>> Used;
292 for (const VirtualCallTarget &Target : Targets) {
293 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
294 : Target.TM->Bits->Before.BytesUsed;
295 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
296 : MinByte - Target.minBeforeBytes();
297
298 // Disregard used regions that are smaller than Offset. These are
299 // effectively all-free regions that do not need to be checked.
300 if (VTUsed.size() > Offset)
301 Used.push_back(VTUsed.slice(Offset));
302 }
303
304 if (Size == 1) {
305 // Find a free bit in each member of Used.
306 for (unsigned I = 0;; ++I) {
307 uint8_t BitsUsed = 0;
308 for (auto &&B : Used)
309 if (I < B.size())
310 BitsUsed |= B[I];
311 if (BitsUsed != 0xff)
312 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
313 }
314 } else {
315 // Find a free (Size/8) byte region in each member of Used.
316 // FIXME: see if alignment helps.
317 for (unsigned I = 0;; ++I) {
318 for (auto &&B : Used) {
319 unsigned Byte = 0;
320 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
321 if (B[I + Byte])
322 goto NextI;
323 ++Byte;
324 }
325 }
326 // Rounding up ensures the constant is always stored at address we
327 // can directly load from without misalignment.
328 return alignTo((MinByte + I) * 8, Size);
329 NextI:;
330 }
331 }
332}
333
335 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
336 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
337 if (BitWidth == 1)
338 OffsetByte = -(AllocBefore / 8 + 1);
339 else
340 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
341 OffsetBit = AllocBefore % 8;
342
343 for (VirtualCallTarget &Target : Targets) {
344 if (BitWidth == 1)
345 Target.setBeforeBit(AllocBefore);
346 else
347 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
348 }
349}
350
353 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
354 if (BitWidth == 1)
355 OffsetByte = AllocAfter / 8;
356 else
357 OffsetByte = (AllocAfter + 7) / 8;
358 OffsetBit = AllocAfter % 8;
359
360 for (VirtualCallTarget &Target : Targets) {
361 if (BitWidth == 1)
362 Target.setAfterBit(AllocAfter);
363 else
364 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
365 }
366}
367
372
373namespace {
374
375// A slot in a set of virtual tables. The TypeID identifies the set of virtual
376// tables, and the ByteOffset is the offset in bytes from the address point to
377// the virtual function pointer.
378struct VTableSlot {
379 Metadata *TypeID;
380 uint64_t ByteOffset;
381};
382
383} // end anonymous namespace
384
385template <> struct llvm::DenseMapInfo<VTableSlot> {
394 static unsigned getHashValue(const VTableSlot &I) {
397 }
398 static bool isEqual(const VTableSlot &LHS,
399 const VTableSlot &RHS) {
400 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
401 }
402};
403
413 static unsigned getHashValue(const VTableSlotSummary &I) {
416 }
417 static bool isEqual(const VTableSlotSummary &LHS,
418 const VTableSlotSummary &RHS) {
419 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
420 }
421};
422
423// Returns true if the function must be unreachable based on ValueInfo.
424//
425// In particular, identifies a function as unreachable in the following
426// conditions
427// 1) All summaries are live.
428// 2) All function summaries indicate it's unreachable
429// 3) There is no non-function with the same GUID (which is rare)
432 return false;
433
434 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
435 // Returns false if ValueInfo is absent, or the summary list is empty
436 // (e.g., function declarations).
437 return false;
438 }
439
440 for (const auto &Summary : TheFnVI.getSummaryList()) {
441 // Conservatively returns false if any non-live functions are seen.
442 // In general either all summaries should be live or all should be dead.
443 if (!Summary->isLive())
444 return false;
445 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
446 if (!FS->fflags().MustBeUnreachable)
447 return false;
448 }
449 // Be conservative if a non-function has the same GUID (which is rare).
450 else
451 return false;
452 }
453 // All function summaries are live and all of them agree that the function is
454 // unreachble.
455 return true;
456}
457
458namespace {
459// A virtual call site. VTable is the loaded virtual table pointer, and CS is
460// the indirect virtual call.
461struct VirtualCallSite {
462 Value *VTable = nullptr;
463 CallBase &CB;
464
465 // If non-null, this field points to the associated unsafe use count stored in
466 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
467 // of that field for details.
468 unsigned *NumUnsafeUses = nullptr;
469
470 void
471 emitRemark(const StringRef OptName, const StringRef TargetName,
472 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter) {
473 Function *F = CB.getCaller();
474 DebugLoc DLoc = CB.getDebugLoc();
475 BasicBlock *Block = CB.getParent();
476
477 using namespace ore;
478 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
479 << NV("Optimization", OptName)
480 << ": devirtualized a call to "
481 << NV("FunctionName", TargetName));
482 }
483
484 void replaceAndErase(
485 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
486 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter,
487 Value *New) {
488 if (RemarksEnabled)
489 emitRemark(OptName, TargetName, OREGetter);
490 CB.replaceAllUsesWith(New);
491 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
492 BranchInst::Create(II->getNormalDest(), CB.getIterator());
493 II->getUnwindDest()->removePredecessor(II->getParent());
494 }
495 CB.eraseFromParent();
496 // This use is no longer unsafe.
497 if (NumUnsafeUses)
498 --*NumUnsafeUses;
499 }
500};
501
502// Call site information collected for a specific VTableSlot and possibly a list
503// of constant integer arguments. The grouping by arguments is handled by the
504// VTableSlotInfo class.
505struct CallSiteInfo {
506 /// The set of call sites for this slot. Used during regular LTO and the
507 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
508 /// call sites that appear in the merged module itself); in each of these
509 /// cases we are directly operating on the call sites at the IR level.
510 std::vector<VirtualCallSite> CallSites;
511
512 /// Whether all call sites represented by this CallSiteInfo, including those
513 /// in summaries, have been devirtualized. This starts off as true because a
514 /// default constructed CallSiteInfo represents no call sites.
515 ///
516 /// If at the end of the pass there are still undevirtualized calls, we will
517 /// need to add a use of llvm.type.test to each of the function summaries in
518 /// the vector.
519 bool AllCallSitesDevirted = true;
520
521 // These fields are used during the export phase of ThinLTO and reflect
522 // information collected from function summaries.
523
524 /// CFI-specific: a vector containing the list of function summaries that use
525 /// the llvm.type.checked.load intrinsic and therefore will require
526 /// resolutions for llvm.type.test in order to implement CFI checks if
527 /// devirtualization was unsuccessful.
528 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
529
530 /// A vector containing the list of function summaries that use
531 /// assume(llvm.type.test).
532 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
533
534 bool isExported() const {
535 return !SummaryTypeCheckedLoadUsers.empty() ||
536 !SummaryTypeTestAssumeUsers.empty();
537 }
538
539 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
540 SummaryTypeCheckedLoadUsers.push_back(FS);
541 AllCallSitesDevirted = false;
542 }
543
544 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
545 SummaryTypeTestAssumeUsers.push_back(FS);
546 AllCallSitesDevirted = false;
547 }
548
549 void markDevirt() { AllCallSitesDevirted = true; }
550};
551
552// Call site information collected for a specific VTableSlot.
553struct VTableSlotInfo {
554 // The set of call sites which do not have all constant integer arguments
555 // (excluding "this").
556 CallSiteInfo CSInfo;
557
558 // The set of call sites with all constant integer arguments (excluding
559 // "this"), grouped by argument list.
560 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
561
562 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
563
564private:
565 CallSiteInfo &findCallSiteInfo(CallBase &CB);
566};
567
568CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
569 std::vector<uint64_t> Args;
570 auto *CBType = dyn_cast<IntegerType>(CB.getType());
571 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
572 return CSInfo;
573 for (auto &&Arg : drop_begin(CB.args())) {
574 auto *CI = dyn_cast<ConstantInt>(Arg);
575 if (!CI || CI->getBitWidth() > 64)
576 return CSInfo;
577 Args.push_back(CI->getZExtValue());
578 }
579 return ConstCSInfo[Args];
580}
581
582void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
583 unsigned *NumUnsafeUses) {
584 auto &CSI = findCallSiteInfo(CB);
585 CSI.AllCallSitesDevirted = false;
586 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
587}
588
589struct DevirtModule {
590 Module &M;
593
594 ModuleSummaryIndex *const ExportSummary;
595 const ModuleSummaryIndex *const ImportSummary;
596
597 IntegerType *const Int8Ty;
598 PointerType *const Int8PtrTy;
599 IntegerType *const Int32Ty;
600 IntegerType *const Int64Ty;
601 IntegerType *const IntPtrTy;
602 /// Sizeless array type, used for imported vtables. This provides a signal
603 /// to analyzers that these imports may alias, as they do for example
604 /// when multiple unique return values occur in the same vtable.
605 ArrayType *const Int8Arr0Ty;
606
607 const bool RemarksEnabled;
608 std::function<OptimizationRemarkEmitter &(Function &)> OREGetter;
609 MapVector<VTableSlot, VTableSlotInfo> CallSlots;
610
611 // Calls that have already been optimized. We may add a call to multiple
612 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
613 // optimize a call more than once.
614 SmallPtrSet<CallBase *, 8> OptimizedCalls;
615
616 // Store calls that had their ptrauth bundle removed. They are to be deleted
617 // at the end of the optimization.
618 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
619
620 // This map keeps track of the number of "unsafe" uses of a loaded function
621 // pointer. The key is the associated llvm.type.test intrinsic call generated
622 // by this pass. An unsafe use is one that calls the loaded function pointer
623 // directly. Every time we eliminate an unsafe use (for example, by
624 // devirtualizing it or by applying virtual constant propagation), we
625 // decrement the value stored in this map. If a value reaches zero, we can
626 // eliminate the type check by RAUWing the associated llvm.type.test call with
627 // true.
628 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
629 PatternList FunctionsToSkip;
630
631 const bool DevirtSpeculatively;
632 DevirtModule(Module &M, ModuleAnalysisManager &MAM,
633 ModuleSummaryIndex *ExportSummary,
634 const ModuleSummaryIndex *ImportSummary,
635 bool DevirtSpeculatively)
636 : M(M), MAM(MAM),
637 FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
638 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
639 Int8Ty(Type::getInt8Ty(M.getContext())),
640 Int8PtrTy(PointerType::getUnqual(M.getContext())),
641 Int32Ty(Type::getInt32Ty(M.getContext())),
642 Int64Ty(Type::getInt64Ty(M.getContext())),
643 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
644 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
645 RemarksEnabled(areRemarksEnabled()),
646 OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
647 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
648 }),
649 DevirtSpeculatively(DevirtSpeculatively) {
650 assert(!(ExportSummary && ImportSummary));
651 FunctionsToSkip.init(SkipFunctionNames);
652 }
653
654 bool areRemarksEnabled();
655
656 void
657 scanTypeTestUsers(Function *TypeTestFunc,
658 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
659 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
660
661 void buildTypeIdentifierMap(
662 std::vector<VTableBits> &Bits,
663 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
664
665 bool
666 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
667 const std::set<TypeMemberInfo> &TypeMemberInfos,
668 uint64_t ByteOffset,
669 ModuleSummaryIndex *ExportSummary);
670
671 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
672 bool &IsExported);
673 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
675 VTableSlotInfo &SlotInfo,
676 WholeProgramDevirtResolution *Res);
677
678 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
679 bool &IsExported);
680 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
681 VTableSlotInfo &SlotInfo,
682 WholeProgramDevirtResolution *Res, VTableSlot Slot);
683
684 bool tryEvaluateFunctionsWithArgs(
686 ArrayRef<uint64_t> Args);
687
688 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
689 uint64_t TheRetVal);
690 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
691 CallSiteInfo &CSInfo,
692 WholeProgramDevirtResolution::ByArg *Res);
693
694 // Returns the global symbol name that is used to export information about the
695 // given vtable slot and list of arguments.
696 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
697 StringRef Name);
698
699 bool shouldExportConstantsAsAbsoluteSymbols();
700
701 // This function is called during the export phase to create a symbol
702 // definition containing information about the given vtable slot and list of
703 // arguments.
704 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
705 Constant *C);
706 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
707 uint32_t Const, uint32_t &Storage);
708
709 // This function is called during the import phase to create a reference to
710 // the symbol definition created during the export phase.
711 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
712 StringRef Name);
713 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
714 StringRef Name, IntegerType *IntTy,
715 uint32_t Storage);
716
717 Constant *getMemberAddr(const TypeMemberInfo *M);
718
719 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
720 Constant *UniqueMemberAddr);
721 bool tryUniqueRetValOpt(unsigned BitWidth,
723 CallSiteInfo &CSInfo,
724 WholeProgramDevirtResolution::ByArg *Res,
725 VTableSlot Slot, ArrayRef<uint64_t> Args);
726
727 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
728 Constant *Byte, Constant *Bit);
729 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
730 VTableSlotInfo &SlotInfo,
731 WholeProgramDevirtResolution *Res, VTableSlot Slot);
732
733 void rebuildGlobal(VTableBits &B);
734
735 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
736 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
737
738 // If we were able to eliminate all unsafe uses for a type checked load,
739 // eliminate the associated type tests by replacing them with true.
740 void removeRedundantTypeTests();
741
742 bool run();
743
744 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
745 //
746 // Caller guarantees that `ExportSummary` is not nullptr.
747 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
748 ModuleSummaryIndex *ExportSummary);
749
750 // Returns true if the function definition must be unreachable.
751 //
752 // Note if this helper function returns true, `F` is guaranteed
753 // to be unreachable; if it returns false, `F` might still
754 // be unreachable but not covered by this helper function.
755 //
756 // Implementation-wise, if function definition is present, IR is analyzed; if
757 // not, look up function flags from ExportSummary as a fallback.
758 static bool mustBeUnreachableFunction(Function *const F,
759 ModuleSummaryIndex *ExportSummary);
760
761 // Lower the module using the action and summary passed as command line
762 // arguments. For testing purposes only.
763 static bool runForTesting(Module &M, ModuleAnalysisManager &MAM,
764 bool DevirtSpeculatively);
765};
766
767struct DevirtIndex {
768 ModuleSummaryIndex &ExportSummary;
769 // The set in which to record GUIDs exported from their module by
770 // devirtualization, used by client to ensure they are not internalized.
771 std::set<GlobalValue::GUID> &ExportedGUIDs;
772 // A map in which to record the information necessary to locate the WPD
773 // resolution for local targets in case they are exported by cross module
774 // importing.
775 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
776
777 MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
778
779 PatternList FunctionsToSkip;
780
781 DevirtIndex(
782 ModuleSummaryIndex &ExportSummary,
783 std::set<GlobalValue::GUID> &ExportedGUIDs,
784 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
785 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
786 LocalWPDTargetsMap(LocalWPDTargetsMap) {
787 FunctionsToSkip.init(SkipFunctionNames);
788 }
789
790 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
791 const TypeIdCompatibleVtableInfo TIdInfo,
792 uint64_t ByteOffset);
793
794 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
795 VTableSlotSummary &SlotSummary,
796 VTableSlotInfo &SlotInfo,
797 WholeProgramDevirtResolution *Res,
798 std::set<ValueInfo> &DevirtTargets);
799
800 void run();
801};
802} // end anonymous namespace
803
806 if (UseCommandLine) {
807 if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively))
808 return PreservedAnalyses::all();
810 }
811
812 std::optional<ModuleSummaryIndex> Index;
814 // Build the ExportSummary from the module.
816 "ExportSummary is expected to be empty in non-LTO mode");
817 ProfileSummaryInfo PSI(M);
818 Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
819 ExportSummary = Index.has_value() ? &Index.value() : nullptr;
820 }
821 if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively)
822 .run())
823 return PreservedAnalyses::all();
825}
826
827// Enable whole program visibility if enabled by client (e.g. linker) or
828// internal option, and not force disabled.
829bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
830 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
832}
833
834static bool
836 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
837 // TypeID for member function pointer type is an internal construct
838 // and won't exist in IsVisibleToRegularObj. The full TypeID
839 // will be present and participate in invalidation.
840 if (TypeID.ends_with(".virtual"))
841 return false;
842
843 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
844 // non-externally visible types which cannot interact with
845 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
846 if (!TypeID.consume_front("_ZTS"))
847 return false;
848
849 // TypeID is keyed off the type name symbol (_ZTS). However, the native
850 // object may not contain this symbol if it does not contain a key
851 // function for the base type and thus only contains a reference to the
852 // type info (_ZTI). To catch this case we query using the type info
853 // symbol corresponding to the TypeID.
854 std::string TypeInfo = ("_ZTI" + TypeID).str();
855 return IsVisibleToRegularObj(TypeInfo);
856}
857
858static bool
860 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
862 GV.getMetadata(LLVMContext::MD_type, Types);
863
864 for (auto *Type : Types)
865 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
866 return typeIDVisibleToRegularObj(TypeID->getString(),
867 IsVisibleToRegularObj);
868
869 return false;
870}
871
872/// If whole program visibility asserted, then upgrade all public vcall
873/// visibility metadata on vtable definitions to linkage unit visibility in
874/// Module IR (for regular or hybrid LTO).
876 Module &M, bool WholeProgramVisibilityEnabledInLTO,
877 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
878 bool ValidateAllVtablesHaveTypeInfos,
879 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
880 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
881 return;
882 for (GlobalVariable &GV : M.globals()) {
883 // Add linkage unit visibility to any variable with type metadata, which are
884 // the vtable definitions. We won't have an existing vcall_visibility
885 // metadata on vtable definitions with public visibility.
886 if (GV.hasMetadata(LLVMContext::MD_type) &&
888 // Don't upgrade the visibility for symbols exported to the dynamic
889 // linker, as we have no information on their eventual use.
890 !DynamicExportSymbols.count(GV.getGUID()) &&
891 // With validation enabled, we want to exclude symbols visible to
892 // regular objects. Local symbols will be in this group due to the
893 // current implementation but those with VCallVisibilityTranslationUnit
894 // will have already been marked in clang so are unaffected.
895 !(ValidateAllVtablesHaveTypeInfos &&
896 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
898 }
899}
900
902 bool WholeProgramVisibilityEnabledInLTO) {
903 llvm::TimeTraceScope timeScope("Update public type test calls");
904 Function *PublicTypeTestFunc =
905 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
906 if (!PublicTypeTestFunc)
907 return;
908 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
909 Function *TypeTestFunc =
910 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
911 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
912 auto *CI = cast<CallInst>(U.getUser());
913 auto *NewCI = CallInst::Create(
914 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
915 CI->getIterator());
916 CI->replaceAllUsesWith(NewCI);
917 CI->eraseFromParent();
918 }
919 } else {
920 // TODO: Don't replace public type tests when speculative devirtualization
921 // gets enabled in LTO mode.
922 auto *True = ConstantInt::getTrue(M.getContext());
923 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
924 auto *CI = cast<CallInst>(U.getUser());
925 CI->replaceAllUsesWith(True);
926 CI->eraseFromParent();
927 }
928 }
929}
930
931/// Based on typeID string, get all associated vtable GUIDS that are
932/// visible to regular objects.
934 ModuleSummaryIndex &Index,
935 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
936 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
937 for (const auto &TypeID : Index.typeIdCompatibleVtableMap()) {
938 if (typeIDVisibleToRegularObj(TypeID.first, IsVisibleToRegularObj))
939 for (const TypeIdOffsetVtableInfo &P : TypeID.second)
940 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
941 }
942}
943
944/// If whole program visibility asserted, then upgrade all public vcall
945/// visibility metadata on vtable definition summaries to linkage unit
946/// visibility in Module summary index (for ThinLTO).
948 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
949 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
950 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
951 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
952 return;
953 for (auto &P : Index) {
954 // Don't upgrade the visibility for symbols exported to the dynamic
955 // linker, as we have no information on their eventual use.
956 if (DynamicExportSymbols.count(P.first))
957 continue;
958 // With validation enabled, we want to exclude symbols visible to regular
959 // objects. Local symbols will be in this group due to the current
960 // implementation but those with VCallVisibilityTranslationUnit will have
961 // already been marked in clang so are unaffected.
962 if (VisibleToRegularObjSymbols.count(P.first))
963 continue;
964 for (auto &S : P.second.getSummaryList()) {
965 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
966 if (!GVar ||
967 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
968 continue;
969 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
970 }
971 }
972}
973
975 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
976 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
977 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
978}
979
981 ModuleSummaryIndex &Summary,
982 function_ref<bool(StringRef, ValueInfo)> IsExported,
983 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
984 for (auto &T : LocalWPDTargetsMap) {
985 auto &VI = T.first;
986 // This was enforced earlier during trySingleImplDevirt.
987 assert(VI.getSummaryList().size() == 1 &&
988 "Devirt of local target has more than one copy");
989 auto &S = VI.getSummaryList()[0];
990 if (!IsExported(S->modulePath(), VI))
991 continue;
992
993 // It's been exported by a cross module import.
994 for (auto &SlotSummary : T.second) {
995 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
996 assert(TIdSum);
997 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
998 assert(WPDRes != TIdSum->WPDRes.end());
999 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
1000 WPDRes->second.SingleImplName,
1001 Summary.getModuleHash(S->modulePath()));
1002 }
1003 }
1004}
1005
1007 // Check that summary index contains regular LTO module when performing
1008 // export to prevent occasional use of index from pure ThinLTO compilation
1009 // (-fno-split-lto-module). This kind of summary index is passed to
1010 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
1011 const auto &ModPaths = Summary->modulePaths();
1013 !ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))
1014 return createStringError(
1016 "combined summary should contain Regular LTO module");
1017 return ErrorSuccess();
1018}
1019
1020bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM,
1021 bool DevirtSpeculatively) {
1022 std::unique_ptr<ModuleSummaryIndex> Summary =
1023 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1024
1025 // Handle the command-line summary arguments. This code is for testing
1026 // purposes only, so we handle errors directly.
1027 if (!ClReadSummary.empty()) {
1028 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1029 ": ");
1030 auto ReadSummaryFile =
1032 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1033 getModuleSummaryIndex(*ReadSummaryFile)) {
1034 Summary = std::move(*SummaryOrErr);
1035 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1036 } else {
1037 // Try YAML if we've failed with bitcode.
1038 consumeError(SummaryOrErr.takeError());
1039 yaml::Input In(ReadSummaryFile->getBuffer());
1040 In >> *Summary;
1041 ExitOnErr(errorCodeToError(In.error()));
1042 }
1043 }
1044
1045 bool Changed =
1046 DevirtModule(M, MAM,
1048 : nullptr,
1050 : nullptr,
1051 DevirtSpeculatively)
1052 .run();
1053
1054 if (!ClWriteSummary.empty()) {
1055 ExitOnError ExitOnErr(
1056 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1057 std::error_code EC;
1058 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1059 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
1060 ExitOnErr(errorCodeToError(EC));
1061 writeIndexToFile(*Summary, OS);
1062 } else {
1063 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1064 ExitOnErr(errorCodeToError(EC));
1065 yaml::Output Out(OS);
1066 Out << *Summary;
1067 }
1068 }
1069
1070 return Changed;
1071}
1072
1073void DevirtModule::buildTypeIdentifierMap(
1074 std::vector<VTableBits> &Bits,
1075 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1076 DenseMap<GlobalVariable *, VTableBits *> GVToBits;
1077 Bits.reserve(M.global_size());
1079 for (GlobalVariable &GV : M.globals()) {
1080 Types.clear();
1081 GV.getMetadata(LLVMContext::MD_type, Types);
1082 if (GV.isDeclaration() || Types.empty())
1083 continue;
1084
1085 VTableBits *&BitsPtr = GVToBits[&GV];
1086 if (!BitsPtr) {
1087 Bits.emplace_back();
1088 Bits.back().GV = &GV;
1089 Bits.back().ObjectSize =
1090 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1091 BitsPtr = &Bits.back();
1092 }
1093
1094 for (MDNode *Type : Types) {
1095 auto *TypeID = Type->getOperand(1).get();
1096
1097 uint64_t Offset =
1099 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1100 ->getZExtValue();
1101
1102 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1103 }
1104 }
1105}
1106
1107bool DevirtModule::tryFindVirtualCallTargets(
1108 std::vector<VirtualCallTarget> &TargetsForSlot,
1109 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1110 ModuleSummaryIndex *ExportSummary) {
1111 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1112 if (!TM.Bits->GV->isConstant())
1113 return false;
1114
1115 // Without DevirtSpeculatively, we cannot perform whole program
1116 // devirtualization analysis on a vtable with public LTO visibility.
1117 if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() ==
1119 return false;
1120
1121 Function *Fn = nullptr;
1122 Constant *C = nullptr;
1123 std::tie(Fn, C) =
1124 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1125
1126 if (!Fn)
1127 return false;
1128
1129 if (FunctionsToSkip.match(Fn->getName()))
1130 return false;
1131
1132 // We can disregard __cxa_pure_virtual as a possible call target, as
1133 // calls to pure virtuals are UB.
1134 if (Fn->getName() == "__cxa_pure_virtual")
1135 continue;
1136
1137 // In most cases empty functions will be overridden by the
1138 // implementation of the derived class, so we can skip them.
1139 if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() &&
1140 Fn->getInstructionCount() <= 1)
1141 continue;
1142
1143 // We can disregard unreachable functions as possible call targets, as
1144 // unreachable functions shouldn't be called.
1145 if (mustBeUnreachableFunction(Fn, ExportSummary))
1146 continue;
1147
1148 // Save the symbol used in the vtable to use as the devirtualization
1149 // target.
1150 auto *GV = dyn_cast<GlobalValue>(C);
1151 assert(GV);
1152 TargetsForSlot.push_back({GV, &TM});
1153 }
1154
1155 // Give up if we couldn't find any targets.
1156 return !TargetsForSlot.empty();
1157}
1158
1159bool DevirtIndex::tryFindVirtualCallTargets(
1160 std::vector<ValueInfo> &TargetsForSlot,
1161 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1162 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1163 // Find a representative copy of the vtable initializer.
1164 // We can have multiple available_externally, linkonce_odr and weak_odr
1165 // vtable initializers. We can also have multiple external vtable
1166 // initializers in the case of comdats, which we cannot check here.
1167 // The linker should give an error in this case.
1168 //
1169 // Also, handle the case of same-named local Vtables with the same path
1170 // and therefore the same GUID. This can happen if there isn't enough
1171 // distinguishing path when compiling the source file. In that case we
1172 // conservatively return false early.
1173 if (P.VTableVI.hasLocal() && P.VTableVI.getSummaryList().size() > 1)
1174 return false;
1175 const GlobalVarSummary *VS = nullptr;
1176 for (const auto &S : P.VTableVI.getSummaryList()) {
1177 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1178 if (!CurVS->vTableFuncs().empty() ||
1179 // Previously clang did not attach the necessary type metadata to
1180 // available_externally vtables, in which case there would not
1181 // be any vtable functions listed in the summary and we need
1182 // to treat this case conservatively (in case the bitcode is old).
1183 // However, we will also not have any vtable functions in the
1184 // case of a pure virtual base class. In that case we do want
1185 // to set VS to avoid treating it conservatively.
1187 VS = CurVS;
1188 // We cannot perform whole program devirtualization analysis on a vtable
1189 // with public LTO visibility.
1190 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1191 return false;
1192 break;
1193 }
1194 }
1195 // There will be no VS if all copies are available_externally having no
1196 // type metadata. In that case we can't safely perform WPD.
1197 if (!VS)
1198 return false;
1199 if (!VS->isLive())
1200 continue;
1201 for (auto VTP : VS->vTableFuncs()) {
1202 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1203 continue;
1204
1205 if (mustBeUnreachableFunction(VTP.FuncVI))
1206 continue;
1207
1208 TargetsForSlot.push_back(VTP.FuncVI);
1209 }
1210 }
1211
1212 // Give up if we couldn't find any targets.
1213 return !TargetsForSlot.empty();
1214}
1215
1216void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1217 Constant *TheFn, bool &IsExported) {
1218 // Don't devirtualize function if we're told to skip it
1219 // in -wholeprogramdevirt-skip.
1220 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1221 return;
1222 auto Apply = [&](CallSiteInfo &CSInfo) {
1223 for (auto &&VCallSite : CSInfo.CallSites) {
1224 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1225 continue;
1226
1227 // Stop when the number of devirted calls reaches the cutoff.
1228 if (!DebugCounter::shouldExecute(CallsToDevirt))
1229 continue;
1230
1231 if (RemarksEnabled)
1232 VCallSite.emitRemark("single-impl",
1233 TheFn->stripPointerCasts()->getName(), OREGetter);
1234 NumSingleImpl++;
1235 auto &CB = VCallSite.CB;
1236 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1237 IRBuilder<> Builder(&CB);
1238 Value *Callee =
1239 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1240
1241 // If trap checking is enabled, add support to compare the virtual
1242 // function pointer to the devirtualized target. In case of a mismatch,
1243 // perform a debug trap.
1245 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1247 Cond, &CB, /*Unreachable=*/false,
1248 MDBuilder(M.getContext()).createUnlikelyBranchWeights());
1249 Builder.SetInsertPoint(ThenTerm);
1250 Function *TrapFn =
1251 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1252 auto *CallTrap = Builder.CreateCall(TrapFn);
1253 CallTrap->setDebugLoc(CB.getDebugLoc());
1254 }
1255
1256 // If fallback checking or speculative devirtualization are enabled,
1257 // add support to compare the virtual function pointer to the
1258 // devirtualized target. In case of a mismatch, fall back to indirect
1259 // call.
1260 if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) {
1261 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1262 // Version the indirect call site. If the called value is equal to the
1263 // given callee, 'NewInst' will be executed, otherwise the original call
1264 // site will be executed.
1265 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1266 NewInst.setCalledOperand(Callee);
1267 // Since the new call site is direct, we must clear metadata that
1268 // is only appropriate for indirect calls. This includes !prof and
1269 // !callees metadata.
1270 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1271 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1272 // Additionally, we should remove them from the fallback indirect call,
1273 // so that we don't attempt to perform indirect call promotion later.
1274 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1275 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1276 }
1277
1278 // In either trapping or non-checking mode, devirtualize original call.
1279 else {
1280 // Devirtualize unconditionally.
1281 CB.setCalledOperand(Callee);
1282 // Since the call site is now direct, we must clear metadata that
1283 // is only appropriate for indirect calls. This includes !prof and
1284 // !callees metadata.
1285 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1286 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1287 if (CB.getCalledOperand() &&
1289 auto *NewCS = CallBase::removeOperandBundle(
1291 CB.replaceAllUsesWith(NewCS);
1292 // Schedule for deletion at the end of pass run.
1293 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1294 }
1295 }
1296
1297 // This use is no longer unsafe.
1298 if (VCallSite.NumUnsafeUses)
1299 --*VCallSite.NumUnsafeUses;
1300 }
1301 if (CSInfo.isExported())
1302 IsExported = true;
1303 CSInfo.markDevirt();
1304 };
1305 Apply(SlotInfo.CSInfo);
1306 for (auto &P : SlotInfo.ConstCSInfo)
1307 Apply(P.second);
1308}
1309
1310static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1311 // We can't add calls if we haven't seen a definition
1312 if (Callee.getSummaryList().empty())
1313 return false;
1314
1315 // Insert calls into the summary index so that the devirtualized targets
1316 // are eligible for import.
1317 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1318 // to better ensure we have the opportunity to inline them.
1319 bool IsExported = false;
1320 auto &S = Callee.getSummaryList()[0];
1321 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1322 /* RelBF = */ 0);
1323 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1324 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1325 FS->addCall({Callee, CI});
1326 IsExported |= S->modulePath() != FS->modulePath();
1327 }
1328 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1329 FS->addCall({Callee, CI});
1330 IsExported |= S->modulePath() != FS->modulePath();
1331 }
1332 };
1333 AddCalls(SlotInfo.CSInfo);
1334 for (auto &P : SlotInfo.ConstCSInfo)
1335 AddCalls(P.second);
1336 return IsExported;
1337}
1338
1339bool DevirtModule::trySingleImplDevirt(
1340 ModuleSummaryIndex *ExportSummary,
1341 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1342 WholeProgramDevirtResolution *Res) {
1343 // See if the program contains a single implementation of this virtual
1344 // function.
1345 auto *TheFn = TargetsForSlot[0].Fn;
1346 for (auto &&Target : TargetsForSlot)
1347 if (TheFn != Target.Fn)
1348 return false;
1349
1350 // If so, update each call site to call that implementation directly.
1351 if (RemarksEnabled || AreStatisticsEnabled())
1352 TargetsForSlot[0].WasDevirt = true;
1353
1354 bool IsExported = false;
1355 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1356 if (!IsExported)
1357 return false;
1358
1359 // If the only implementation has local linkage, we must promote to external
1360 // to make it visible to thin LTO objects. We can only get here during the
1361 // ThinLTO export phase.
1362 if (TheFn->hasLocalLinkage()) {
1363 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1364
1365 // Since we are renaming the function, any comdats with the same name must
1366 // also be renamed. This is required when targeting COFF, as the comdat name
1367 // must match one of the names of the symbols in the comdat.
1368 if (Comdat *C = TheFn->getComdat()) {
1369 if (C->getName() == TheFn->getName()) {
1370 Comdat *NewC = M.getOrInsertComdat(NewName);
1371 NewC->setSelectionKind(C->getSelectionKind());
1372 for (GlobalObject &GO : M.global_objects())
1373 if (GO.getComdat() == C)
1374 GO.setComdat(NewC);
1375 }
1376 }
1377
1378 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1379 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1380 TheFn->setName(NewName);
1381 }
1382 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1383 // Any needed promotion of 'TheFn' has already been done during
1384 // LTO unit split, so we can ignore return value of AddCalls.
1385 addCalls(SlotInfo, TheFnVI);
1386
1388 Res->SingleImplName = std::string(TheFn->getName());
1389
1390 return true;
1391}
1392
1393bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1394 VTableSlotSummary &SlotSummary,
1395 VTableSlotInfo &SlotInfo,
1396 WholeProgramDevirtResolution *Res,
1397 std::set<ValueInfo> &DevirtTargets) {
1398 // See if the program contains a single implementation of this virtual
1399 // function.
1400 auto TheFn = TargetsForSlot[0];
1401 for (auto &&Target : TargetsForSlot)
1402 if (TheFn != Target)
1403 return false;
1404
1405 // Don't devirtualize if we don't have target definition.
1406 auto Size = TheFn.getSummaryList().size();
1407 if (!Size)
1408 return false;
1409
1410 // Don't devirtualize function if we're told to skip it
1411 // in -wholeprogramdevirt-skip.
1412 if (FunctionsToSkip.match(TheFn.name()))
1413 return false;
1414
1415 // If the summary list contains multiple summaries where at least one is
1416 // a local, give up, as we won't know which (possibly promoted) name to use.
1417 if (TheFn.hasLocal() && Size > 1)
1418 return false;
1419
1420 // Collect functions devirtualized at least for one call site for stats.
1422 DevirtTargets.insert(TheFn);
1423
1424 auto &S = TheFn.getSummaryList()[0];
1425 bool IsExported = addCalls(SlotInfo, TheFn);
1426 if (IsExported)
1427 ExportedGUIDs.insert(TheFn.getGUID());
1428
1429 // Record in summary for use in devirtualization during the ThinLTO import
1430 // step.
1432 if (GlobalValue::isLocalLinkage(S->linkage())) {
1433 if (IsExported)
1434 // If target is a local function and we are exporting it by
1435 // devirtualizing a call in another module, we need to record the
1436 // promoted name.
1438 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1439 else {
1440 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1441 Res->SingleImplName = std::string(TheFn.name());
1442 }
1443 } else
1444 Res->SingleImplName = std::string(TheFn.name());
1445
1446 // Name will be empty if this thin link driven off of serialized combined
1447 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1448 // legacy LTO API anyway.
1449 assert(!Res->SingleImplName.empty());
1450
1451 return true;
1452}
1453
1454void DevirtModule::tryICallBranchFunnel(
1455 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1456 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1457 Triple T(M.getTargetTriple());
1458 if (T.getArch() != Triple::x86_64)
1459 return;
1460
1461 if (TargetsForSlot.size() > ClThreshold)
1462 return;
1463
1464 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1465 if (!HasNonDevirt)
1466 for (auto &P : SlotInfo.ConstCSInfo)
1467 if (!P.second.AllCallSitesDevirted) {
1468 HasNonDevirt = true;
1469 break;
1470 }
1471
1472 if (!HasNonDevirt)
1473 return;
1474
1475 // If any GV is AvailableExternally, not to generate branch.funnel.
1476 // NOTE: It is to avoid crash in LowerTypeTest.
1477 // If the branch.funnel is generated, because GV.isDeclarationForLinker(),
1478 // in LowerTypeTestsModule::lower(), its GlobalTypeMember would NOT
1479 // be saved in GlobalTypeMembers[&GV]. Then crash happens in
1480 // buildBitSetsFromDisjointSet due to GlobalTypeMembers[&GV] is NULL.
1481 // Even doing experiment to save it in GlobalTypeMembers[&GV] and
1482 // making GlobalTypeMembers[&GV] be not NULL, crash could avoid from
1483 // buildBitSetsFromDisjointSet. But still report_fatal_error in Verifier
1484 // or SelectionDAGBuilder later, because operands linkage type consistency
1485 // check of icall.branch.funnel can not pass.
1486 for (auto &T : TargetsForSlot) {
1487 if (T.TM->Bits->GV->hasAvailableExternallyLinkage())
1488 return;
1489 }
1490
1491 FunctionType *FT =
1492 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1493 Function *JT;
1494 if (isa<MDString>(Slot.TypeID)) {
1496 M.getDataLayout().getProgramAddressSpace(),
1497 getGlobalName(Slot, {}, "branch_funnel"), &M);
1498 JT->setVisibility(GlobalValue::HiddenVisibility);
1499 } else {
1501 M.getDataLayout().getProgramAddressSpace(),
1502 "branch_funnel", &M);
1503 }
1504 JT->addParamAttr(0, Attribute::Nest);
1505
1506 std::vector<Value *> JTArgs;
1507 JTArgs.push_back(JT->arg_begin());
1508 for (auto &T : TargetsForSlot) {
1509 JTArgs.push_back(getMemberAddr(T.TM));
1510 JTArgs.push_back(T.Fn);
1511 }
1512
1513 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1515 &M, llvm::Intrinsic::icall_branch_funnel, {});
1516
1517 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1518 CI->setTailCallKind(CallInst::TCK_MustTail);
1519 ReturnInst::Create(M.getContext(), nullptr, BB);
1520
1521 bool IsExported = false;
1522 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
1523 if (IsExported)
1525
1526 if (!JT->getEntryCount().has_value()) {
1527 // FIXME: we could pass through thinlto the necessary information.
1529 }
1530}
1531
1532void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1533 Function &JT, bool &IsExported) {
1534 DenseMap<Function *, double> FunctionEntryCounts;
1535 auto Apply = [&](CallSiteInfo &CSInfo) {
1536 if (CSInfo.isExported())
1537 IsExported = true;
1538 if (CSInfo.AllCallSitesDevirted)
1539 return;
1540
1541 std::map<CallBase *, CallBase *> CallBases;
1542 for (auto &&VCallSite : CSInfo.CallSites) {
1543 CallBase &CB = VCallSite.CB;
1544
1545 if (CallBases.find(&CB) != CallBases.end()) {
1546 // When finding devirtualizable calls, it's possible to find the same
1547 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1548 // calls, which can cause duplicate call sites to be recorded in
1549 // [Const]CallSites. If we've already found one of these
1550 // call instances, just ignore it. It will be replaced later.
1551 continue;
1552 }
1553
1554 // Jump tables are only profitable if the retpoline mitigation is enabled.
1555 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1556 if (!FSAttr.isValid() ||
1557 !FSAttr.getValueAsString().contains("+retpoline"))
1558 continue;
1559
1560 NumBranchFunnel++;
1561 if (RemarksEnabled)
1562 VCallSite.emitRemark("branch-funnel", JT.getName(), OREGetter);
1563
1564 // Pass the address of the vtable in the nest register, which is r10 on
1565 // x86_64.
1566 std::vector<Type *> NewArgs;
1567 NewArgs.push_back(Int8PtrTy);
1568 append_range(NewArgs, CB.getFunctionType()->params());
1569 FunctionType *NewFT =
1571 CB.getFunctionType()->isVarArg());
1572 IRBuilder<> IRB(&CB);
1573 std::vector<Value *> Args;
1574 Args.push_back(VCallSite.VTable);
1575 llvm::append_range(Args, CB.args());
1576
1577 CallBase *NewCS = nullptr;
1578 if (!JT.isDeclaration() && !ProfcheckDisableMetadataFixes) {
1579 // Accumulate the call frequencies of the original call site, and use
1580 // that as total entry count for the funnel function.
1581 auto &F = *CB.getCaller();
1582 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1583 auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1584 auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1585 double CallCount = 0.0;
1586 if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1587 double CallFreq =
1588 static_cast<double>(
1589 BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1590 EC.getFrequency();
1591 CallCount = CallFreq * CC->getCount();
1592 }
1593 FunctionEntryCounts[&JT] += CallCount;
1594 }
1595 if (isa<CallInst>(CB))
1596 NewCS = IRB.CreateCall(NewFT, &JT, Args);
1597 else
1598 NewCS =
1599 IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
1600 cast<InvokeInst>(CB).getUnwindDest(), Args);
1601 NewCS->setCallingConv(CB.getCallingConv());
1602
1603 AttributeList Attrs = CB.getAttributes();
1604 std::vector<AttributeSet> NewArgAttrs;
1605 NewArgAttrs.push_back(AttributeSet::get(
1606 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1607 M.getContext(), Attribute::Nest)}));
1608 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1609 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1610 NewCS->setAttributes(
1611 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1612 Attrs.getRetAttrs(), NewArgAttrs));
1613
1614 CallBases[&CB] = NewCS;
1615
1616 // This use is no longer unsafe.
1617 if (VCallSite.NumUnsafeUses)
1618 --*VCallSite.NumUnsafeUses;
1619 }
1620 // Don't mark as devirtualized because there may be callers compiled without
1621 // retpoline mitigation, which would mean that they are lowered to
1622 // llvm.type.test and therefore require an llvm.type.test resolution for the
1623 // type identifier.
1624
1625 for (auto &[Old, New] : CallBases) {
1626 Old->replaceAllUsesWith(New);
1627 Old->eraseFromParent();
1628 }
1629 };
1630 Apply(SlotInfo.CSInfo);
1631 for (auto &P : SlotInfo.ConstCSInfo)
1632 Apply(P.second);
1633 for (auto &[F, C] : FunctionEntryCounts) {
1634 assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1635 "Unexpected entry count for funnel that was freshly synthesized");
1636 F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1637 }
1638}
1639
1640bool DevirtModule::tryEvaluateFunctionsWithArgs(
1642 ArrayRef<uint64_t> Args) {
1643 // Evaluate each function and store the result in each target's RetVal
1644 // field.
1645 for (VirtualCallTarget &Target : TargetsForSlot) {
1646 // TODO: Skip for now if the vtable symbol was an alias to a function,
1647 // need to evaluate whether it would be correct to analyze the aliasee
1648 // function for this optimization.
1649 auto *Fn = dyn_cast<Function>(Target.Fn);
1650 if (!Fn)
1651 return false;
1652
1653 if (Fn->arg_size() != Args.size() + 1)
1654 return false;
1655
1656 Evaluator Eval(M.getDataLayout(), nullptr);
1658 EvalArgs.push_back(
1660 for (unsigned I = 0; I != Args.size(); ++I) {
1661 auto *ArgTy =
1663 if (!ArgTy)
1664 return false;
1665 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1666 }
1667
1668 Constant *RetVal;
1669 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1670 !isa<ConstantInt>(RetVal))
1671 return false;
1672 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1673 }
1674 return true;
1675}
1676
1677void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1678 uint64_t TheRetVal) {
1679 for (auto Call : CSInfo.CallSites) {
1680 if (!OptimizedCalls.insert(&Call.CB).second)
1681 continue;
1682 NumUniformRetVal++;
1683 Call.replaceAndErase(
1684 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1685 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1686 }
1687 CSInfo.markDevirt();
1688}
1689
1690bool DevirtModule::tryUniformRetValOpt(
1691 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1692 WholeProgramDevirtResolution::ByArg *Res) {
1693 // Uniform return value optimization. If all functions return the same
1694 // constant, replace all calls with that constant.
1695 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1696 for (const VirtualCallTarget &Target : TargetsForSlot)
1697 if (Target.RetVal != TheRetVal)
1698 return false;
1699
1700 if (CSInfo.isExported()) {
1702 Res->Info = TheRetVal;
1703 }
1704
1705 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1706 if (RemarksEnabled || AreStatisticsEnabled())
1707 for (auto &&Target : TargetsForSlot)
1708 Target.WasDevirt = true;
1709 return true;
1710}
1711
1712std::string DevirtModule::getGlobalName(VTableSlot Slot,
1713 ArrayRef<uint64_t> Args,
1714 StringRef Name) {
1715 std::string FullName = "__typeid_";
1716 raw_string_ostream OS(FullName);
1717 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1718 for (uint64_t Arg : Args)
1719 OS << '_' << Arg;
1720 OS << '_' << Name;
1721 return FullName;
1722}
1723
1724bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1725 Triple T(M.getTargetTriple());
1726 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1727}
1728
1729void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1730 StringRef Name, Constant *C) {
1731 GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
1732 getGlobalName(Slot, Args, Name), C, &M);
1734}
1735
1736void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1737 StringRef Name, uint32_t Const,
1738 uint32_t &Storage) {
1739 if (shouldExportConstantsAsAbsoluteSymbols()) {
1740 exportGlobal(
1741 Slot, Args, Name,
1742 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1743 return;
1744 }
1745
1746 Storage = Const;
1747}
1748
1749Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1750 StringRef Name) {
1751 GlobalVariable *GV =
1752 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1754 return GV;
1755}
1756
1757Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1758 StringRef Name, IntegerType *IntTy,
1759 uint32_t Storage) {
1760 if (!shouldExportConstantsAsAbsoluteSymbols())
1761 return ConstantInt::get(IntTy, Storage);
1762
1763 Constant *C = importGlobal(Slot, Args, Name);
1764 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1765 C = ConstantExpr::getPtrToInt(C, IntTy);
1766
1767 // We only need to set metadata if the global is newly created, in which
1768 // case it would not have hidden visibility.
1769 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1770 return C;
1771
1772 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1773 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1774 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1775 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1776 MDNode::get(M.getContext(), {MinC, MaxC}));
1777 };
1778 unsigned AbsWidth = IntTy->getBitWidth();
1779 if (AbsWidth == IntPtrTy->getBitWidth())
1780 SetAbsRange(~0ull, ~0ull); // Full set.
1781 else
1782 SetAbsRange(0, 1ull << AbsWidth);
1783 return C;
1784}
1785
1786void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1787 bool IsOne,
1788 Constant *UniqueMemberAddr) {
1789 for (auto &&Call : CSInfo.CallSites) {
1790 if (!OptimizedCalls.insert(&Call.CB).second)
1791 continue;
1792 IRBuilder<> B(&Call.CB);
1793 Value *Cmp =
1794 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1795 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1796 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1797 NumUniqueRetVal++;
1798 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1799 Cmp);
1800 }
1801 CSInfo.markDevirt();
1802}
1803
1804Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1805 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1806 ConstantInt::get(Int64Ty, M->Offset));
1807}
1808
1809bool DevirtModule::tryUniqueRetValOpt(
1810 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1811 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1812 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1813 // IsOne controls whether we look for a 0 or a 1.
1814 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1815 const TypeMemberInfo *UniqueMember = nullptr;
1816 for (const VirtualCallTarget &Target : TargetsForSlot) {
1817 if (Target.RetVal == (IsOne ? 1 : 0)) {
1818 if (UniqueMember)
1819 return false;
1820 UniqueMember = Target.TM;
1821 }
1822 }
1823
1824 // We should have found a unique member or bailed out by now. We already
1825 // checked for a uniform return value in tryUniformRetValOpt.
1826 assert(UniqueMember);
1827
1828 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1829 if (CSInfo.isExported()) {
1831 Res->Info = IsOne;
1832
1833 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1834 }
1835
1836 // Replace each call with the comparison.
1837 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1838 UniqueMemberAddr);
1839
1840 // Update devirtualization statistics for targets.
1841 if (RemarksEnabled || AreStatisticsEnabled())
1842 for (auto &&Target : TargetsForSlot)
1843 Target.WasDevirt = true;
1844
1845 return true;
1846 };
1847
1848 if (BitWidth == 1) {
1849 if (tryUniqueRetValOptFor(true))
1850 return true;
1851 if (tryUniqueRetValOptFor(false))
1852 return true;
1853 }
1854 return false;
1855}
1856
1857void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1858 Constant *Byte, Constant *Bit) {
1859 for (auto Call : CSInfo.CallSites) {
1860 if (!OptimizedCalls.insert(&Call.CB).second)
1861 continue;
1862 auto *RetType = cast<IntegerType>(Call.CB.getType());
1863 IRBuilder<> B(&Call.CB);
1864 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1865 if (RetType->getBitWidth() == 1) {
1866 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1867 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1868 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1869 NumVirtConstProp1Bit++;
1870 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1871 OREGetter, IsBitSet);
1872 } else {
1873 Value *Val = B.CreateLoad(RetType, Addr);
1874 NumVirtConstProp++;
1875 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1876 OREGetter, Val);
1877 }
1878 }
1879 CSInfo.markDevirt();
1880}
1881
1882bool DevirtModule::tryVirtualConstProp(
1883 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1884 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1885 // TODO: Skip for now if the vtable symbol was an alias to a function,
1886 // need to evaluate whether it would be correct to analyze the aliasee
1887 // function for this optimization.
1888 auto *Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1889 if (!Fn)
1890 return false;
1891 // This only works if the function returns an integer.
1892 auto *RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1893 if (!RetType)
1894 return false;
1895 unsigned BitWidth = RetType->getBitWidth();
1896
1897 // TODO: Since we can evaluated these constants at compile-time, we can save
1898 // some space by calculating the smallest range of values that all these
1899 // constants can fit in, then only allocate enough space to fit those values.
1900 // At each callsite, we can get the original type by doing a sign/zero
1901 // extension. For example, if we would store an i64, but we can see that all
1902 // the values fit into an i16, then we can store an i16 before/after the
1903 // vtable and at each callsite do a s/zext.
1904 if (BitWidth > 64)
1905 return false;
1906
1907 Align TypeAlignment = M.getDataLayout().getABIIntegerTypeAlignment(BitWidth);
1908
1909 // Make sure that each function is defined, does not access memory, takes at
1910 // least one argument, does not use its first argument (which we assume is
1911 // 'this'), and has the same return type.
1912 //
1913 // Note that we test whether this copy of the function is readnone, rather
1914 // than testing function attributes, which must hold for any copy of the
1915 // function, even a less optimized version substituted at link time. This is
1916 // sound because the virtual constant propagation optimizations effectively
1917 // inline all implementations of the virtual function into each call site,
1918 // rather than using function attributes to perform local optimization.
1919 for (VirtualCallTarget &Target : TargetsForSlot) {
1920 // TODO: Skip for now if the vtable symbol was an alias to a function,
1921 // need to evaluate whether it would be correct to analyze the aliasee
1922 // function for this optimization.
1923 auto *Fn = dyn_cast<Function>(Target.Fn);
1924 if (!Fn)
1925 return false;
1926
1927 if (Fn->isDeclaration() ||
1928 !computeFunctionBodyMemoryAccess(*Fn, FAM.getResult<AAManager>(*Fn))
1930 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1931 Fn->getReturnType() != RetType)
1932 return false;
1933
1934 // This only works if the integer size is at most the alignment of the
1935 // vtable. If the table is underaligned, then we can't guarantee that the
1936 // constant will always be aligned to the integer type alignment. For
1937 // example, if the table is `align 1`, we can never guarantee that an i32
1938 // stored before/after the vtable is 32-bit aligned without changing the
1939 // alignment of the new global.
1940 GlobalVariable *GV = Target.TM->Bits->GV;
1941 Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1942 GV->getAlign(), GV->getValueType());
1943 if (TypeAlignment > TableAlignment)
1944 return false;
1945 }
1946
1947 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1948 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1949 continue;
1950
1951 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1952 if (Res)
1953 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1954
1955 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1956 continue;
1957
1958 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1959 ResByArg, Slot, CSByConstantArg.first))
1960 continue;
1961
1962 // Find an allocation offset in bits in all vtables associated with the
1963 // type.
1964 // TODO: If there would be "holes" in the vtable that were added by
1965 // padding, we could place i1s there to reduce any extra padding that
1966 // would be introduced by the i1s.
1967 uint64_t AllocBefore =
1968 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1969 uint64_t AllocAfter =
1970 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1971
1972 // Calculate the total amount of padding needed to store a value at both
1973 // ends of the object.
1974 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1975 for (auto &&Target : TargetsForSlot) {
1976 TotalPaddingBefore += std::max<int64_t>(
1977 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1978 TotalPaddingAfter += std::max<int64_t>(
1979 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1980 }
1981
1982 // If the amount of padding is too large, give up.
1983 // FIXME: do something smarter here.
1984 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1985 continue;
1986
1987 // Calculate the offset to the value as a (possibly negative) byte offset
1988 // and (if applicable) a bit offset, and store the values in the targets.
1989 int64_t OffsetByte;
1990 uint64_t OffsetBit;
1991 if (TotalPaddingBefore <= TotalPaddingAfter)
1992 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1993 OffsetBit);
1994 else
1995 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1996 OffsetBit);
1997
1998 // In an earlier check we forbade constant propagation from operating on
1999 // tables whose alignment is less than the alignment needed for loading
2000 // the constant. Thus, the address we take the offset from will always be
2001 // aligned to at least this integer alignment. Now, we need to ensure that
2002 // the offset is also aligned to this integer alignment to ensure we always
2003 // have an aligned load.
2004 assert(OffsetByte % TypeAlignment.value() == 0);
2005
2006 if (RemarksEnabled || AreStatisticsEnabled())
2007 for (auto &&Target : TargetsForSlot)
2008 Target.WasDevirt = true;
2009
2010
2011 if (CSByConstantArg.second.isExported()) {
2013 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
2014 ResByArg->Byte);
2015 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
2016 ResByArg->Bit);
2017 }
2018
2019 // Rewrite each call to a load from OffsetByte/OffsetBit.
2020 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
2021 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
2022 applyVirtualConstProp(CSByConstantArg.second,
2023 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
2024 }
2025 return true;
2026}
2027
2028void DevirtModule::rebuildGlobal(VTableBits &B) {
2029 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
2030 return;
2031
2032 // Align the before byte array to the global's minimum alignment so that we
2033 // don't break any alignment requirements on the global.
2034 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
2035 B.GV->getAlign(), B.GV->getValueType());
2036 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
2037
2038 // Before was stored in reverse order; flip it now.
2039 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
2040 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
2041
2042 // Build an anonymous global containing the before bytes, followed by the
2043 // original initializer, followed by the after bytes.
2044 auto *NewInit = ConstantStruct::getAnon(
2045 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
2046 B.GV->getInitializer(),
2047 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
2048 auto *NewGV =
2049 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
2050 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
2051 NewGV->setSection(B.GV->getSection());
2052 NewGV->setComdat(B.GV->getComdat());
2053 NewGV->setAlignment(B.GV->getAlign());
2054
2055 // Copy the original vtable's metadata to the anonymous global, adjusting
2056 // offsets as required.
2057 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
2058
2059 // Build an alias named after the original global, pointing at the second
2060 // element (the original initializer).
2061 auto *Alias = GlobalAlias::create(
2062 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
2064 NewInit->getType(), NewGV,
2065 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
2066 ConstantInt::get(Int32Ty, 1)}),
2067 &M);
2068 Alias->setVisibility(B.GV->getVisibility());
2069 Alias->takeName(B.GV);
2070
2071 B.GV->replaceAllUsesWith(Alias);
2072 B.GV->eraseFromParent();
2073}
2074
2075bool DevirtModule::areRemarksEnabled() {
2076 const auto &FL = M.getFunctionList();
2077 for (const Function &Fn : FL) {
2078 if (Fn.empty())
2079 continue;
2080 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
2081 return DI.isEnabled();
2082 }
2083 return false;
2084}
2085
2086void DevirtModule::scanTypeTestUsers(
2087 Function *TypeTestFunc,
2088 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
2089 // Find all virtual calls via a virtual table pointer %p under an assumption
2090 // of the form llvm.assume(llvm.type.test(%p, %md)) or
2091 // llvm.assume(llvm.public.type.test(%p, %md)).
2092 // This indicates that %p points to a member of the type identifier %md.
2093 // Group calls by (type ID, offset) pair (effectively the identity of the
2094 // virtual function) and store to CallSlots.
2095 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2096 auto *CI = dyn_cast<CallInst>(U.getUser());
2097 if (!CI)
2098 continue;
2099 // Search for virtual calls based on %p and add them to DevirtCalls.
2102 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2103 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2104
2105 Metadata *TypeId =
2106 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2107 // If we found any, add them to CallSlots.
2108 if (!Assumes.empty()) {
2109 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2110 for (DevirtCallSite Call : DevirtCalls)
2111 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2112 }
2113
2114 auto RemoveTypeTestAssumes = [&]() {
2115 // We no longer need the assumes or the type test.
2116 for (auto *Assume : Assumes)
2117 Assume->eraseFromParent();
2118 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2119 // may use the vtable argument later.
2120 if (CI->use_empty())
2121 CI->eraseFromParent();
2122 };
2123
2124 // At this point we could remove all type test assume sequences, as they
2125 // were originally inserted for WPD. However, we can keep these in the
2126 // code stream for later analysis (e.g. to help drive more efficient ICP
2127 // sequences). They will eventually be removed by a second LowerTypeTests
2128 // invocation that cleans them up. In order to do this correctly, the first
2129 // LowerTypeTests invocation needs to know that they have "Unknown" type
2130 // test resolution, so that they aren't treated as Unsat and lowered to
2131 // False, which will break any uses on assumes. Below we remove any type
2132 // test assumes that will not be treated as Unknown by LTT.
2133
2134 // The type test assumes will be treated by LTT as Unsat if the type id is
2135 // not used on a global (in which case it has no entry in the TypeIdMap).
2136 if (!TypeIdMap.count(TypeId))
2137 RemoveTypeTestAssumes();
2138
2139 // For ThinLTO importing, we need to remove the type test assumes if this is
2140 // an MDString type id without a corresponding TypeIdSummary. Any
2141 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2142 // type test assumes can be kept. If the MDString type id is missing a
2143 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2144 // exporting phase of WPD from analyzing it), then it would be treated as
2145 // Unsat by LTT and we need to remove its type test assumes here. If not
2146 // used on a vcall we don't need them for later optimization use in any
2147 // case.
2148 else if (ImportSummary && isa<MDString>(TypeId)) {
2149 const TypeIdSummary *TidSummary =
2150 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2151 if (!TidSummary)
2152 RemoveTypeTestAssumes();
2153 else
2154 // If one was created it should not be Unsat, because if we reached here
2155 // the type id was used on a global.
2157 }
2158 }
2159}
2160
2161void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2162 Function *TypeTestFunc =
2163 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2164
2165 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2166 auto *CI = dyn_cast<CallInst>(U.getUser());
2167 if (!CI)
2168 continue;
2169
2170 Value *Ptr = CI->getArgOperand(0);
2171 Value *Offset = CI->getArgOperand(1);
2172 Value *TypeIdValue = CI->getArgOperand(2);
2173 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2174
2178 bool HasNonCallUses = false;
2179 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2180 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2181 HasNonCallUses, CI, DT);
2182
2183 // Start by generating "pessimistic" code that explicitly loads the function
2184 // pointer from the vtable and performs the type check. If possible, we will
2185 // eliminate the load and the type check later.
2186
2187 // If possible, only generate the load at the point where it is used.
2188 // This helps avoid unnecessary spills.
2189 IRBuilder<> LoadB(
2190 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2191
2192 Value *LoadedValue = nullptr;
2193 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2194 Intrinsic::type_checked_load_relative) {
2196 &M, Intrinsic::load_relative, {Int32Ty});
2197 LoadedValue = LoadB.CreateCall(LoadRelFunc, {Ptr, Offset});
2198 } else {
2199 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2200 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2201 }
2202
2203 for (Instruction *LoadedPtr : LoadedPtrs) {
2204 LoadedPtr->replaceAllUsesWith(LoadedValue);
2205 LoadedPtr->eraseFromParent();
2206 }
2207
2208 // Likewise for the type test.
2209 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2210 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2211
2212 for (Instruction *Pred : Preds) {
2213 Pred->replaceAllUsesWith(TypeTestCall);
2214 Pred->eraseFromParent();
2215 }
2216
2217 // We have already erased any extractvalue instructions that refer to the
2218 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2219 // (although this is unlikely). In that case, explicitly build a pair and
2220 // RAUW it.
2221 if (!CI->use_empty()) {
2222 Value *Pair = PoisonValue::get(CI->getType());
2223 IRBuilder<> B(CI);
2224 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2225 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2226 CI->replaceAllUsesWith(Pair);
2227 }
2228
2229 // The number of unsafe uses is initially the number of uses.
2230 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2231 NumUnsafeUses = DevirtCalls.size();
2232
2233 // If the function pointer has a non-call user, we cannot eliminate the type
2234 // check, as one of those users may eventually call the pointer. Increment
2235 // the unsafe use count to make sure it cannot reach zero.
2236 if (HasNonCallUses)
2237 ++NumUnsafeUses;
2238 for (DevirtCallSite Call : DevirtCalls) {
2239 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2240 &NumUnsafeUses);
2241 }
2242
2243 CI->eraseFromParent();
2244 }
2245}
2246
2247void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2248 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2249 if (!TypeId)
2250 return;
2251 const TypeIdSummary *TidSummary =
2252 ImportSummary->getTypeIdSummary(TypeId->getString());
2253 if (!TidSummary)
2254 return;
2255 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2256 if (ResI == TidSummary->WPDRes.end())
2257 return;
2258 const WholeProgramDevirtResolution &Res = ResI->second;
2259
2261 assert(!Res.SingleImplName.empty());
2262 // The type of the function in the declaration is irrelevant because every
2263 // call site will cast it to the correct type.
2264 Constant *SingleImpl =
2265 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2266 Type::getVoidTy(M.getContext()))
2267 .getCallee());
2268
2269 // This is the import phase so we should not be exporting anything.
2270 bool IsExported = false;
2271 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2272 assert(!IsExported);
2273 }
2274
2275 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2276 auto I = Res.ResByArg.find(CSByConstantArg.first);
2277 if (I == Res.ResByArg.end())
2278 continue;
2279 auto &ResByArg = I->second;
2280 // FIXME: We should figure out what to do about the "function name" argument
2281 // to the apply* functions, as the function names are unavailable during the
2282 // importing phase. For now we just pass the empty string. This does not
2283 // impact correctness because the function names are just used for remarks.
2284 switch (ResByArg.TheKind) {
2286 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2287 break;
2289 Constant *UniqueMemberAddr =
2290 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2291 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2292 UniqueMemberAddr);
2293 break;
2294 }
2296 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2297 Int32Ty, ResByArg.Byte);
2298 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2299 ResByArg.Bit);
2300 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2301 break;
2302 }
2303 default:
2304 break;
2305 }
2306 }
2307
2309 // The type of the function is irrelevant, because it's bitcast at calls
2310 // anyhow.
2311 auto *JT = cast<Function>(
2312 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2313 Type::getVoidTy(M.getContext()))
2314 .getCallee());
2315 bool IsExported = false;
2316 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
2317 assert(!IsExported);
2318 }
2319}
2320
2321void DevirtModule::removeRedundantTypeTests() {
2322 auto *True = ConstantInt::getTrue(M.getContext());
2323 for (auto &&U : NumUnsafeUsesForTypeTest) {
2324 if (U.second == 0) {
2325 U.first->replaceAllUsesWith(True);
2326 U.first->eraseFromParent();
2327 }
2328 }
2329}
2330
2331ValueInfo
2332DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2333 ModuleSummaryIndex *ExportSummary) {
2334 assert((ExportSummary != nullptr) &&
2335 "Caller guarantees ExportSummary is not nullptr");
2336
2337 const auto TheFnGUID = TheFn->getGUID();
2338 const auto TheFnGUIDWithExportedName =
2340 // Look up ValueInfo with the GUID in the current linkage.
2341 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2342 // If no entry is found and GUID is different from GUID computed using
2343 // exported name, look up ValueInfo with the exported name unconditionally.
2344 // This is a fallback.
2345 //
2346 // The reason to have a fallback:
2347 // 1. LTO could enable global value internalization via
2348 // `enable-lto-internalization`.
2349 // 2. The GUID in ExportedSummary is computed using exported name.
2350 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2351 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2352 }
2353 return TheFnVI;
2354}
2355
2356bool DevirtModule::mustBeUnreachableFunction(
2357 Function *const F, ModuleSummaryIndex *ExportSummary) {
2359 return false;
2360 // First, learn unreachability by analyzing function IR.
2361 if (!F->isDeclaration()) {
2362 // A function must be unreachable if its entry block ends with an
2363 // 'unreachable'.
2364 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2365 }
2366 // Learn unreachability from ExportSummary if ExportSummary is present.
2367 return ExportSummary &&
2369 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2370}
2371
2372bool DevirtModule::run() {
2373 // If only some of the modules were split, we cannot correctly perform
2374 // this transformation. We already checked for the presense of type tests
2375 // with partially split modules during the thin link, and would have emitted
2376 // an error if any were found, so here we can simply return.
2377 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2378 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2379 return false;
2380
2381 Function *PublicTypeTestFunc = nullptr;
2382 // If we are in speculative devirtualization mode, we can work on the public
2383 // type test intrinsics.
2384 if (DevirtSpeculatively)
2385 PublicTypeTestFunc =
2386 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
2387 Function *TypeTestFunc =
2388 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2389 Function *TypeCheckedLoadFunc =
2390 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2391 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2392 &M, Intrinsic::type_checked_load_relative);
2393 Function *AssumeFunc =
2394 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2395
2396 // Normally if there are no users of the devirtualization intrinsics in the
2397 // module, this pass has nothing to do. But if we are exporting, we also need
2398 // to handle any users that appear only in the function summaries.
2399 if (!ExportSummary &&
2400 (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
2401 (!TypeTestFunc || TypeTestFunc->use_empty())) ||
2402 !AssumeFunc || AssumeFunc->use_empty()) &&
2403 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2404 (!TypeCheckedLoadRelativeFunc ||
2405 TypeCheckedLoadRelativeFunc->use_empty()))
2406 return false;
2407
2408 // Rebuild type metadata into a map for easy lookup.
2409 std::vector<VTableBits> Bits;
2410 DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
2411 buildTypeIdentifierMap(Bits, TypeIdMap);
2412
2413 if (PublicTypeTestFunc && AssumeFunc)
2414 scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
2415
2416 if (TypeTestFunc && AssumeFunc)
2417 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2418
2419 if (TypeCheckedLoadFunc)
2420 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2421
2422 if (TypeCheckedLoadRelativeFunc)
2423 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2424
2425 if (ImportSummary) {
2426 for (auto &S : CallSlots)
2427 importResolution(S.first, S.second);
2428
2429 removeRedundantTypeTests();
2430
2431 // We have lowered or deleted the type intrinsics, so we will no longer have
2432 // enough information to reason about the liveness of virtual function
2433 // pointers in GlobalDCE.
2434 for (GlobalVariable &GV : M.globals())
2435 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2436
2437 // The rest of the code is only necessary when exporting or during regular
2438 // LTO, so we are done.
2439 return true;
2440 }
2441
2442 if (TypeIdMap.empty())
2443 return true;
2444
2445 // Collect information from summary about which calls to try to devirtualize.
2446 if (ExportSummary) {
2447 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2448 for (auto &P : TypeIdMap) {
2449 if (auto *TypeId = dyn_cast<MDString>(P.first))
2451 TypeId->getString())]
2452 .push_back(TypeId);
2453 }
2454
2455 for (auto &P : *ExportSummary) {
2456 for (auto &S : P.second.getSummaryList()) {
2457 auto *FS = dyn_cast<FunctionSummary>(S.get());
2458 if (!FS)
2459 continue;
2460 // FIXME: Only add live functions.
2461 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2462 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2463 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2464 }
2465 }
2466 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2467 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2468 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2469 }
2470 }
2471 for (const FunctionSummary::ConstVCall &VC :
2472 FS->type_test_assume_const_vcalls()) {
2473 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2474 CallSlots[{MD, VC.VFunc.Offset}]
2475 .ConstCSInfo[VC.Args]
2476 .addSummaryTypeTestAssumeUser(FS);
2477 }
2478 }
2479 for (const FunctionSummary::ConstVCall &VC :
2480 FS->type_checked_load_const_vcalls()) {
2481 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2482 CallSlots[{MD, VC.VFunc.Offset}]
2483 .ConstCSInfo[VC.Args]
2484 .addSummaryTypeCheckedLoadUser(FS);
2485 }
2486 }
2487 }
2488 }
2489 }
2490
2491 // For each (type, offset) pair:
2492 bool DidVirtualConstProp = false;
2493 std::map<std::string, GlobalValue *> DevirtTargets;
2494 for (auto &S : CallSlots) {
2495 // Search each of the members of the type identifier for the virtual
2496 // function implementation at offset S.first.ByteOffset, and add to
2497 // TargetsForSlot.
2498 std::vector<VirtualCallTarget> TargetsForSlot;
2499 WholeProgramDevirtResolution *Res = nullptr;
2500 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2501 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2502 TypeMemberInfos.size())
2503 // For any type id used on a global's type metadata, create the type id
2504 // summary resolution regardless of whether we can devirtualize, so that
2505 // lower type tests knows the type id is not Unsat. If it was not used on
2506 // a global's type metadata, the TypeIdMap entry set will be empty, and
2507 // we don't want to create an entry (with the default Unknown type
2508 // resolution), which can prevent detection of the Unsat.
2509 Res = &ExportSummary
2510 ->getOrInsertTypeIdSummary(
2511 cast<MDString>(S.first.TypeID)->getString())
2512 .WPDRes[S.first.ByteOffset];
2513 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2514 S.first.ByteOffset, ExportSummary)) {
2515 bool SingleImplDevirt =
2516 trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2517 // Out of speculative devirtualization mode, Try to apply virtual constant
2518 // propagation or branch funneling.
2519 // TODO: This should eventually be enabled for non-public type tests.
2520 if (!SingleImplDevirt && !DevirtSpeculatively) {
2521 DidVirtualConstProp |=
2522 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2523
2524 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2525 }
2526
2527 // Collect functions devirtualized at least for one call site for stats.
2528 if (RemarksEnabled || AreStatisticsEnabled())
2529 for (const auto &T : TargetsForSlot)
2530 if (T.WasDevirt)
2531 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2532 }
2533
2534 // CFI-specific: if we are exporting and any llvm.type.checked.load
2535 // intrinsics were *not* devirtualized, we need to add the resulting
2536 // llvm.type.test intrinsics to the function summaries so that the
2537 // LowerTypeTests pass will export them.
2538 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2540 cast<MDString>(S.first.TypeID)->getString());
2541 auto AddTypeTestsForTypeCheckedLoads = [&](CallSiteInfo &CSI) {
2542 if (!CSI.AllCallSitesDevirted)
2543 for (auto *FS : CSI.SummaryTypeCheckedLoadUsers)
2544 FS->addTypeTest(GUID);
2545 };
2546 AddTypeTestsForTypeCheckedLoads(S.second.CSInfo);
2547 for (auto &CCS : S.second.ConstCSInfo)
2548 AddTypeTestsForTypeCheckedLoads(CCS.second);
2549 }
2550 }
2551
2552 if (RemarksEnabled) {
2553 // Generate remarks for each devirtualized function.
2554 for (const auto &DT : DevirtTargets) {
2555 GlobalValue *GV = DT.second;
2556 auto *F = dyn_cast<Function>(GV);
2557 if (!F) {
2558 auto *A = dyn_cast<GlobalAlias>(GV);
2559 assert(A && isa<Function>(A->getAliasee()));
2560 F = dyn_cast<Function>(A->getAliasee());
2561 assert(F);
2562 }
2563
2564 using namespace ore;
2565 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2566 << "devirtualized " << NV("FunctionName", DT.first));
2567 }
2568 }
2569
2570 NumDevirtTargets += DevirtTargets.size();
2571
2572 removeRedundantTypeTests();
2573
2574 // Rebuild each global we touched as part of virtual constant propagation to
2575 // include the before and after bytes.
2576 if (DidVirtualConstProp)
2577 for (VTableBits &B : Bits)
2578 rebuildGlobal(B);
2579
2580 // We have lowered or deleted the type intrinsics, so we will no longer have
2581 // enough information to reason about the liveness of virtual function
2582 // pointers in GlobalDCE.
2583 for (GlobalVariable &GV : M.globals())
2584 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2585
2586 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2587 CI->eraseFromParent();
2588
2589 return true;
2590}
2591
2592void DevirtIndex::run() {
2593 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2594 return;
2595
2596 // Assert that we haven't made any changes that would affect the hasLocal()
2597 // flag on the GUID summary info.
2598 assert(!ExportSummary.withInternalizeAndPromote() &&
2599 "Expect index-based WPD to run before internalization and promotion");
2600
2601 DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
2602 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2603 NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back(
2604 P.first);
2605 // Create the type id summary resolution regardlness of whether we can
2606 // devirtualize, so that lower type tests knows the type id is used on
2607 // a global and not Unsat. We do this here rather than in the loop over the
2608 // CallSlots, since that handling will only see type tests that directly
2609 // feed assumes, and we would miss any that aren't currently handled by WPD
2610 // (such as type tests that feed assumes via phis).
2611 ExportSummary.getOrInsertTypeIdSummary(P.first);
2612 }
2613
2614 // Collect information from summary about which calls to try to devirtualize.
2615 for (auto &P : ExportSummary) {
2616 for (auto &S : P.second.getSummaryList()) {
2617 auto *FS = dyn_cast<FunctionSummary>(S.get());
2618 if (!FS)
2619 continue;
2620 // FIXME: Only add live functions.
2621 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2622 for (StringRef Name : NameByGUID[VF.GUID]) {
2623 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2624 }
2625 }
2626 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2627 for (StringRef Name : NameByGUID[VF.GUID]) {
2628 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2629 }
2630 }
2631 for (const FunctionSummary::ConstVCall &VC :
2632 FS->type_test_assume_const_vcalls()) {
2633 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2634 CallSlots[{Name, VC.VFunc.Offset}]
2635 .ConstCSInfo[VC.Args]
2636 .addSummaryTypeTestAssumeUser(FS);
2637 }
2638 }
2639 for (const FunctionSummary::ConstVCall &VC :
2640 FS->type_checked_load_const_vcalls()) {
2641 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2642 CallSlots[{Name, VC.VFunc.Offset}]
2643 .ConstCSInfo[VC.Args]
2644 .addSummaryTypeCheckedLoadUser(FS);
2645 }
2646 }
2647 }
2648 }
2649
2650 std::set<ValueInfo> DevirtTargets;
2651 // For each (type, offset) pair:
2652 for (auto &S : CallSlots) {
2653 // Search each of the members of the type identifier for the virtual
2654 // function implementation at offset S.first.ByteOffset, and add to
2655 // TargetsForSlot.
2656 std::vector<ValueInfo> TargetsForSlot;
2657 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2658 assert(TidSummary);
2659 // The type id summary would have been created while building the NameByGUID
2660 // map earlier.
2661 WholeProgramDevirtResolution *Res =
2662 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2663 ->WPDRes[S.first.ByteOffset];
2664 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2665 S.first.ByteOffset)) {
2666
2667 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2668 DevirtTargets))
2669 continue;
2670 }
2671 }
2672
2673 // Optionally have the thin link print message for each devirtualized
2674 // function.
2676 for (const auto &DT : DevirtTargets)
2677 errs() << "Devirtualized call to " << DT << "\n";
2678
2679 NumDevirtTargets += DevirtTargets.size();
2680}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
#define T
static bool mustBeUnreachableFunction(const Function &F)
This is the interface to build a ModuleSummaryIndex for a module.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
bool arg_empty() const
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1311
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1284
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition Constants.h:491
const Constant * stripPointerCasts() const
Definition Constant.h:222
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static bool shouldExecute(CounterInfo &Counter)
bool empty() const
Definition DenseMap.h:109
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Type * getParamType(unsigned i) const
Parameter type accessors.
ArrayRef< Type * > params() const
bool isVarArg() const
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
bool arg_empty() const
Definition Function.h:900
const BasicBlock & front() const
Definition Function.h:858
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
arg_iterator arg_begin()
Definition Function.h:866
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
unsigned getInstructionCount() const
Returns the number of non-debug IR instructions in this function.
Definition Function.cpp:367
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:598
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition Value.h:602
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI VCallVisibility getVCallVisibility() const
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI void setVCallVisibilityMetadata(VCallVisibility Visibility)
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
void setVisibility(VisibilityTypes V)
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:220
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Analysis providing profile information.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
Target - Wrapper for Target specific information.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool use_empty() const
Definition Value.h:346
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
LLVM_ABI uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
LLVM_ABI void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
LLVM_ABI void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
@ Export
Export information to summary.
Definition IPO.h:57
@ None
Do nothing.
Definition IPO.h:55
@ Import
Import information from summary.
Definition IPO.h:56
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI ModuleSummaryIndex buildModuleSummaryIndex(const Module &M, std::function< BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI, std::function< const StackSafetyInfo *(const Function &F)> GetSSICallback=[](const Function &F) -> const StackSafetyInfo *{ return nullptr;})
Direct function to compute a ModuleSummaryIndex from a given module.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
@ invalid_argument
Definition Errc.h:56
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
LLVM_ABI void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
LLVM_ABI void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
LLVM_ABI CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName)
Analogous to setExplicitlyUnknownBranchWeights, but for functions and their entry counts.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
LLVM_ABI void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition Error.h:1245
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< bool > ClDevirtualizeSpeculatively("devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
LLVM_ABI void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
LLVM_ABI void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
const ModuleSummaryIndex * ImportSummary
ModuleSummaryIndex * ExportSummary
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
LLVM_ABI VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)