LLVM 22.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular/thin and non-LTO
28// pipelines:
29//
30// During regular LTO, the pass determines the best optimization for each
31// virtual call and applies the resolutions directly to virtual calls that are
32// eligible for virtual call optimization (i.e. calls that use either of the
33// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
34//
35// During hybrid Regular/ThinLTO, the pass operates in two phases:
36// - Export phase: this is run during the thin link over a single merged module
37// that contains all vtables with !type metadata that participate in the link.
38// The pass computes a resolution for each virtual call and stores it in the
39// type identifier summary.
40// - Import phase: this is run during the thin backends over the individual
41// modules. The pass applies the resolutions previously computed during the
42// import phase to each eligible virtual call.
43//
44// During ThinLTO, the pass operates in two phases:
45// - Export phase: this is run during the thin link over the index which
46// contains a summary of all vtables with !type metadata that participate in
47// the link. It computes a resolution for each virtual call and stores it in
48// the type identifier summary. Only single implementation devirtualization
49// is supported.
50// - Import phase: (same as with hybrid case above).
51//
52// During Speculative devirtualization mode -not restricted to LTO-:
53// - The pass applies speculative devirtualization without requiring any type of
54// visibility.
55// - Skips other features like virtual constant propagation, uniform return
56// value optimization, unique return value optimization and branch funnels as
57// they need LTO.
58// - This mode is enabled via 'devirtualize-speculatively' flag.
59//
60//===----------------------------------------------------------------------===//
61
63#include "llvm/ADT/ArrayRef.h"
64#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/DenseSet.h"
67#include "llvm/ADT/MapVector.h"
69#include "llvm/ADT/Statistic.h"
79#include "llvm/IR/Constants.h"
80#include "llvm/IR/DataLayout.h"
81#include "llvm/IR/DebugLoc.h"
84#include "llvm/IR/Dominators.h"
85#include "llvm/IR/Function.h"
86#include "llvm/IR/GlobalAlias.h"
88#include "llvm/IR/IRBuilder.h"
89#include "llvm/IR/InstrTypes.h"
90#include "llvm/IR/Instruction.h"
92#include "llvm/IR/Intrinsics.h"
93#include "llvm/IR/LLVMContext.h"
94#include "llvm/IR/MDBuilder.h"
95#include "llvm/IR/Metadata.h"
96#include "llvm/IR/Module.h"
98#include "llvm/IR/PassManager.h"
100#include "llvm/Support/Casting.h"
102#include "llvm/Support/Errc.h"
103#include "llvm/Support/Error.h"
108#include "llvm/Transforms/IPO.h"
113#include <algorithm>
114#include <cmath>
115#include <cstddef>
116#include <map>
117#include <set>
118#include <string>
119
120using namespace llvm;
121using namespace wholeprogramdevirt;
122
123#define DEBUG_TYPE "wholeprogramdevirt"
124
125STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
126STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
127STATISTIC(NumBranchFunnel, "Number of branch funnels");
128STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
129STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
130STATISTIC(NumVirtConstProp1Bit,
131 "Number of 1 bit virtual constant propagations");
132STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
133
134namespace llvm {
135
137 "wholeprogramdevirt-summary-action",
138 cl::desc("What to do with the summary when running this pass"),
139 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
141 "Import typeid resolutions from summary and globals"),
143 "Export typeid resolutions to summary and globals")),
144 cl::Hidden);
145
147 "wholeprogramdevirt-read-summary",
148 cl::desc(
149 "Read summary from given bitcode or YAML file before running pass"),
150 cl::Hidden);
151
153 "wholeprogramdevirt-write-summary",
154 cl::desc("Write summary to given bitcode or YAML file after running pass. "
155 "Output file format is deduced from extension: *.bc means writing "
156 "bitcode, otherwise YAML"),
157 cl::Hidden);
158
159// TODO: This option eventually should support any public visibility vtables
160// with/out LTO.
162 "devirtualize-speculatively",
163 cl::desc("Enable speculative devirtualization optimization"),
164 cl::init(false));
165
167 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
168 cl::init(10),
169 cl::desc("Maximum number of call targets per "
170 "call site to enable branch funnels"));
171
172static cl::opt<bool>
173 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
174 cl::desc("Print index-based devirtualization messages"));
175
176/// Provide a way to force enable whole program visibility in tests.
177/// This is needed to support legacy tests that don't contain
178/// !vcall_visibility metadata (the mere presense of type tests
179/// previously implied hidden visibility).
180static cl::opt<bool>
181 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
182 cl::desc("Enable whole program visibility"));
183
184/// Provide a way to force disable whole program for debugging or workarounds,
185/// when enabled via the linker.
187 "disable-whole-program-visibility", cl::Hidden,
188 cl::desc("Disable whole program visibility (overrides enabling options)"));
189
190/// Provide way to prevent certain function from being devirtualized
192 SkipFunctionNames("wholeprogramdevirt-skip",
193 cl::desc("Prevent function(s) from being devirtualized"),
195
197
198} // end namespace llvm
199
200/// With Clang, a pure virtual class's deleting destructor is emitted as a
201/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
202/// context of whole program devirtualization, the deleting destructor of a pure
203/// virtual class won't be invoked by the source code so safe to skip as a
204/// devirtualize target.
205///
206/// However, not all unreachable functions are safe to skip. In some cases, the
207/// program intends to run such functions and terminate, for instance, a unit
208/// test may run a death test. A non-test program might (or allowed to) invoke
209/// such functions to report failures (whether/when it's a good practice or not
210/// is a different topic).
211///
212/// This option is enabled to keep an unreachable function as a possible
213/// devirtualize target to conservatively keep the program behavior.
214///
215/// TODO: Make a pure virtual class's deleting destructor precisely identifiable
216/// in Clang's codegen for more devirtualization in LLVM.
218 "wholeprogramdevirt-keep-unreachable-function",
219 cl::desc("Regard unreachable functions as possible devirtualize targets."),
220 cl::Hidden, cl::init(true));
221
222/// If explicitly specified, the devirt module pass will stop transformation
223/// once the total number of devirtualizations reach the cutoff value. Setting
224/// this option to 0 explicitly will do 0 devirtualization.
226 "wholeprogramdevirt-cutoff",
227 cl::desc("Max number of devirtualizations for devirt module pass"),
228 cl::init(0));
229
230/// Mechanism to add runtime checking of devirtualization decisions, optionally
231/// trapping or falling back to indirect call on any that are not correct.
232/// Trapping mode is useful for debugging undefined behavior leading to failures
233/// with WPD. Fallback mode is useful for ensuring safety when whole program
234/// visibility may be compromised.
237 "wholeprogramdevirt-check", cl::Hidden,
238 cl::desc("Type of checking for incorrect devirtualizations"),
239 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
240 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
242 "Fallback to indirect when incorrect")));
243
244namespace {
245struct PatternList {
246 std::vector<GlobPattern> Patterns;
247 template <class T> void init(const T &StringList) {
248 for (const auto &S : StringList)
250 Patterns.push_back(std::move(*Pat));
251 }
252 bool match(StringRef S) {
253 for (const GlobPattern &P : Patterns)
254 if (P.match(S))
255 return true;
256 return false;
257 }
258};
259} // namespace
260
261// Find the minimum offset that we may store a value of size Size bits at. If
262// IsAfter is set, look for an offset before the object, otherwise look for an
263// offset after the object.
266 bool IsAfter, uint64_t Size) {
267 // Find a minimum offset taking into account only vtable sizes.
268 uint64_t MinByte = 0;
269 for (const VirtualCallTarget &Target : Targets) {
270 if (IsAfter)
271 MinByte = std::max(MinByte, Target.minAfterBytes());
272 else
273 MinByte = std::max(MinByte, Target.minBeforeBytes());
274 }
275
276 // Build a vector of arrays of bytes covering, for each target, a slice of the
277 // used region (see AccumBitVector::BytesUsed in
278 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
279 // this aligns the used regions to start at MinByte.
280 //
281 // In this example, A, B and C are vtables, # is a byte already allocated for
282 // a virtual function pointer, AAAA... (etc.) are the used regions for the
283 // vtables and Offset(X) is the value computed for the Offset variable below
284 // for X.
285 //
286 // Offset(A)
287 // | |
288 // |MinByte
289 // A: ################AAAAAAAA|AAAAAAAA
290 // B: ########BBBBBBBBBBBBBBBB|BBBB
291 // C: ########################|CCCCCCCCCCCCCCCC
292 // | Offset(B) |
293 //
294 // This code produces the slices of A, B and C that appear after the divider
295 // at MinByte.
296 std::vector<ArrayRef<uint8_t>> Used;
297 for (const VirtualCallTarget &Target : Targets) {
298 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
299 : Target.TM->Bits->Before.BytesUsed;
300 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
301 : MinByte - Target.minBeforeBytes();
302
303 // Disregard used regions that are smaller than Offset. These are
304 // effectively all-free regions that do not need to be checked.
305 if (VTUsed.size() > Offset)
306 Used.push_back(VTUsed.slice(Offset));
307 }
308
309 if (Size == 1) {
310 // Find a free bit in each member of Used.
311 for (unsigned I = 0;; ++I) {
312 uint8_t BitsUsed = 0;
313 for (auto &&B : Used)
314 if (I < B.size())
315 BitsUsed |= B[I];
316 if (BitsUsed != 0xff)
317 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
318 }
319 } else {
320 // Find a free (Size/8) byte region in each member of Used.
321 // FIXME: see if alignment helps.
322 for (unsigned I = 0;; ++I) {
323 for (auto &&B : Used) {
324 unsigned Byte = 0;
325 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
326 if (B[I + Byte])
327 goto NextI;
328 ++Byte;
329 }
330 }
331 // Rounding up ensures the constant is always stored at address we
332 // can directly load from without misalignment.
333 return alignTo((MinByte + I) * 8, Size);
334 NextI:;
335 }
336 }
337}
338
340 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
341 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
342 if (BitWidth == 1)
343 OffsetByte = -(AllocBefore / 8 + 1);
344 else
345 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
346 OffsetBit = AllocBefore % 8;
347
348 for (VirtualCallTarget &Target : Targets) {
349 if (BitWidth == 1)
350 Target.setBeforeBit(AllocBefore);
351 else
352 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
353 }
354}
355
358 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
359 if (BitWidth == 1)
360 OffsetByte = AllocAfter / 8;
361 else
362 OffsetByte = (AllocAfter + 7) / 8;
363 OffsetBit = AllocAfter % 8;
364
365 for (VirtualCallTarget &Target : Targets) {
366 if (BitWidth == 1)
367 Target.setAfterBit(AllocAfter);
368 else
369 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
370 }
371}
372
377
378namespace {
379
380// Tracks the number of devirted calls in the IR transformation.
381static unsigned NumDevirtCalls = 0;
382
383// A slot in a set of virtual tables. The TypeID identifies the set of virtual
384// tables, and the ByteOffset is the offset in bytes from the address point to
385// the virtual function pointer.
386struct VTableSlot {
387 Metadata *TypeID;
388 uint64_t ByteOffset;
389};
390
391} // end anonymous namespace
392
393template <> struct llvm::DenseMapInfo<VTableSlot> {
402 static unsigned getHashValue(const VTableSlot &I) {
405 }
406 static bool isEqual(const VTableSlot &LHS,
407 const VTableSlot &RHS) {
408 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
409 }
410};
411
421 static unsigned getHashValue(const VTableSlotSummary &I) {
424 }
425 static bool isEqual(const VTableSlotSummary &LHS,
426 const VTableSlotSummary &RHS) {
427 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
428 }
429};
430
431// Returns true if the function must be unreachable based on ValueInfo.
432//
433// In particular, identifies a function as unreachable in the following
434// conditions
435// 1) All summaries are live.
436// 2) All function summaries indicate it's unreachable
437// 3) There is no non-function with the same GUID (which is rare)
440 return false;
441
442 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
443 // Returns false if ValueInfo is absent, or the summary list is empty
444 // (e.g., function declarations).
445 return false;
446 }
447
448 for (const auto &Summary : TheFnVI.getSummaryList()) {
449 // Conservatively returns false if any non-live functions are seen.
450 // In general either all summaries should be live or all should be dead.
451 if (!Summary->isLive())
452 return false;
453 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
454 if (!FS->fflags().MustBeUnreachable)
455 return false;
456 }
457 // Be conservative if a non-function has the same GUID (which is rare).
458 else
459 return false;
460 }
461 // All function summaries are live and all of them agree that the function is
462 // unreachble.
463 return true;
464}
465
466namespace {
467// A virtual call site. VTable is the loaded virtual table pointer, and CS is
468// the indirect virtual call.
469struct VirtualCallSite {
470 Value *VTable = nullptr;
471 CallBase &CB;
472
473 // If non-null, this field points to the associated unsafe use count stored in
474 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
475 // of that field for details.
476 unsigned *NumUnsafeUses = nullptr;
477
478 void
479 emitRemark(const StringRef OptName, const StringRef TargetName,
480 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter) {
481 Function *F = CB.getCaller();
482 DebugLoc DLoc = CB.getDebugLoc();
483 BasicBlock *Block = CB.getParent();
484
485 using namespace ore;
486 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
487 << NV("Optimization", OptName)
488 << ": devirtualized a call to "
489 << NV("FunctionName", TargetName));
490 }
491
492 void replaceAndErase(
493 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
494 function_ref<OptimizationRemarkEmitter &(Function &)> OREGetter,
495 Value *New) {
496 if (RemarksEnabled)
497 emitRemark(OptName, TargetName, OREGetter);
498 CB.replaceAllUsesWith(New);
499 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
500 BranchInst::Create(II->getNormalDest(), CB.getIterator());
501 II->getUnwindDest()->removePredecessor(II->getParent());
502 }
503 CB.eraseFromParent();
504 // This use is no longer unsafe.
505 if (NumUnsafeUses)
506 --*NumUnsafeUses;
507 }
508};
509
510// Call site information collected for a specific VTableSlot and possibly a list
511// of constant integer arguments. The grouping by arguments is handled by the
512// VTableSlotInfo class.
513struct CallSiteInfo {
514 /// The set of call sites for this slot. Used during regular LTO and the
515 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
516 /// call sites that appear in the merged module itself); in each of these
517 /// cases we are directly operating on the call sites at the IR level.
518 std::vector<VirtualCallSite> CallSites;
519
520 /// Whether all call sites represented by this CallSiteInfo, including those
521 /// in summaries, have been devirtualized. This starts off as true because a
522 /// default constructed CallSiteInfo represents no call sites.
523 ///
524 /// If at the end of the pass there are still undevirtualized calls, we will
525 /// need to add a use of llvm.type.test to each of the function summaries in
526 /// the vector.
527 bool AllCallSitesDevirted = true;
528
529 // These fields are used during the export phase of ThinLTO and reflect
530 // information collected from function summaries.
531
532 /// CFI-specific: a vector containing the list of function summaries that use
533 /// the llvm.type.checked.load intrinsic and therefore will require
534 /// resolutions for llvm.type.test in order to implement CFI checks if
535 /// devirtualization was unsuccessful.
536 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
537
538 /// A vector containing the list of function summaries that use
539 /// assume(llvm.type.test).
540 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
541
542 bool isExported() const {
543 return !SummaryTypeCheckedLoadUsers.empty() ||
544 !SummaryTypeTestAssumeUsers.empty();
545 }
546
547 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
548 SummaryTypeCheckedLoadUsers.push_back(FS);
549 AllCallSitesDevirted = false;
550 }
551
552 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
553 SummaryTypeTestAssumeUsers.push_back(FS);
554 AllCallSitesDevirted = false;
555 }
556
557 void markDevirt() { AllCallSitesDevirted = true; }
558};
559
560// Call site information collected for a specific VTableSlot.
561struct VTableSlotInfo {
562 // The set of call sites which do not have all constant integer arguments
563 // (excluding "this").
564 CallSiteInfo CSInfo;
565
566 // The set of call sites with all constant integer arguments (excluding
567 // "this"), grouped by argument list.
568 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
569
570 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
571
572private:
573 CallSiteInfo &findCallSiteInfo(CallBase &CB);
574};
575
576CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
577 std::vector<uint64_t> Args;
578 auto *CBType = dyn_cast<IntegerType>(CB.getType());
579 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
580 return CSInfo;
581 for (auto &&Arg : drop_begin(CB.args())) {
582 auto *CI = dyn_cast<ConstantInt>(Arg);
583 if (!CI || CI->getBitWidth() > 64)
584 return CSInfo;
585 Args.push_back(CI->getZExtValue());
586 }
587 return ConstCSInfo[Args];
588}
589
590void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
591 unsigned *NumUnsafeUses) {
592 auto &CSI = findCallSiteInfo(CB);
593 CSI.AllCallSitesDevirted = false;
594 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
595}
596
597struct DevirtModule {
598 Module &M;
601
602 ModuleSummaryIndex *const ExportSummary;
603 const ModuleSummaryIndex *const ImportSummary;
604
605 IntegerType *const Int8Ty;
606 PointerType *const Int8PtrTy;
607 IntegerType *const Int32Ty;
608 IntegerType *const Int64Ty;
609 IntegerType *const IntPtrTy;
610 /// Sizeless array type, used for imported vtables. This provides a signal
611 /// to analyzers that these imports may alias, as they do for example
612 /// when multiple unique return values occur in the same vtable.
613 ArrayType *const Int8Arr0Ty;
614
615 const bool RemarksEnabled;
616 std::function<OptimizationRemarkEmitter &(Function &)> OREGetter;
617 MapVector<VTableSlot, VTableSlotInfo> CallSlots;
618
619 // Calls that have already been optimized. We may add a call to multiple
620 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
621 // optimize a call more than once.
622 SmallPtrSet<CallBase *, 8> OptimizedCalls;
623
624 // Store calls that had their ptrauth bundle removed. They are to be deleted
625 // at the end of the optimization.
626 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
627
628 // This map keeps track of the number of "unsafe" uses of a loaded function
629 // pointer. The key is the associated llvm.type.test intrinsic call generated
630 // by this pass. An unsafe use is one that calls the loaded function pointer
631 // directly. Every time we eliminate an unsafe use (for example, by
632 // devirtualizing it or by applying virtual constant propagation), we
633 // decrement the value stored in this map. If a value reaches zero, we can
634 // eliminate the type check by RAUWing the associated llvm.type.test call with
635 // true.
636 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
637 PatternList FunctionsToSkip;
638
639 DevirtModule(Module &M, ModuleAnalysisManager &MAM,
640 ModuleSummaryIndex *ExportSummary,
641 const ModuleSummaryIndex *ImportSummary)
642 : M(M), MAM(MAM),
643 FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
644 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
645 Int8Ty(Type::getInt8Ty(M.getContext())),
646 Int8PtrTy(PointerType::getUnqual(M.getContext())),
647 Int32Ty(Type::getInt32Ty(M.getContext())),
648 Int64Ty(Type::getInt64Ty(M.getContext())),
649 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
650 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
651 RemarksEnabled(areRemarksEnabled()),
652 OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
653 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
654 }) {
655 assert(!(ExportSummary && ImportSummary));
656 FunctionsToSkip.init(SkipFunctionNames);
657 }
658
659 bool areRemarksEnabled();
660
661 void
662 scanTypeTestUsers(Function *TypeTestFunc,
663 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
664 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
665
666 void buildTypeIdentifierMap(
667 std::vector<VTableBits> &Bits,
668 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
669
670 bool
671 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
672 const std::set<TypeMemberInfo> &TypeMemberInfos,
673 uint64_t ByteOffset,
674 ModuleSummaryIndex *ExportSummary);
675
676 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
677 bool &IsExported);
678 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
680 VTableSlotInfo &SlotInfo,
681 WholeProgramDevirtResolution *Res);
682
683 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Function &JT,
684 bool &IsExported);
685 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
686 VTableSlotInfo &SlotInfo,
687 WholeProgramDevirtResolution *Res, VTableSlot Slot);
688
689 bool tryEvaluateFunctionsWithArgs(
691 ArrayRef<uint64_t> Args);
692
693 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
694 uint64_t TheRetVal);
695 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
696 CallSiteInfo &CSInfo,
697 WholeProgramDevirtResolution::ByArg *Res);
698
699 // Returns the global symbol name that is used to export information about the
700 // given vtable slot and list of arguments.
701 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
702 StringRef Name);
703
704 bool shouldExportConstantsAsAbsoluteSymbols();
705
706 // This function is called during the export phase to create a symbol
707 // definition containing information about the given vtable slot and list of
708 // arguments.
709 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
710 Constant *C);
711 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
712 uint32_t Const, uint32_t &Storage);
713
714 // This function is called during the import phase to create a reference to
715 // the symbol definition created during the export phase.
716 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
717 StringRef Name);
718 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
719 StringRef Name, IntegerType *IntTy,
720 uint32_t Storage);
721
722 Constant *getMemberAddr(const TypeMemberInfo *M);
723
724 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
725 Constant *UniqueMemberAddr);
726 bool tryUniqueRetValOpt(unsigned BitWidth,
728 CallSiteInfo &CSInfo,
729 WholeProgramDevirtResolution::ByArg *Res,
730 VTableSlot Slot, ArrayRef<uint64_t> Args);
731
732 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
733 Constant *Byte, Constant *Bit);
734 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
735 VTableSlotInfo &SlotInfo,
736 WholeProgramDevirtResolution *Res, VTableSlot Slot);
737
738 void rebuildGlobal(VTableBits &B);
739
740 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
741 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
742
743 // If we were able to eliminate all unsafe uses for a type checked load,
744 // eliminate the associated type tests by replacing them with true.
745 void removeRedundantTypeTests();
746
747 bool run();
748
749 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
750 //
751 // Caller guarantees that `ExportSummary` is not nullptr.
752 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
753 ModuleSummaryIndex *ExportSummary);
754
755 // Returns true if the function definition must be unreachable.
756 //
757 // Note if this helper function returns true, `F` is guaranteed
758 // to be unreachable; if it returns false, `F` might still
759 // be unreachable but not covered by this helper function.
760 //
761 // Implementation-wise, if function definition is present, IR is analyzed; if
762 // not, look up function flags from ExportSummary as a fallback.
763 static bool mustBeUnreachableFunction(Function *const F,
764 ModuleSummaryIndex *ExportSummary);
765
766 // Lower the module using the action and summary passed as command line
767 // arguments. For testing purposes only.
768 static bool runForTesting(Module &M, ModuleAnalysisManager &MAM);
769};
770
771struct DevirtIndex {
772 ModuleSummaryIndex &ExportSummary;
773 // The set in which to record GUIDs exported from their module by
774 // devirtualization, used by client to ensure they are not internalized.
775 std::set<GlobalValue::GUID> &ExportedGUIDs;
776 // A map in which to record the information necessary to locate the WPD
777 // resolution for local targets in case they are exported by cross module
778 // importing.
779 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
780
781 MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
782
783 PatternList FunctionsToSkip;
784
785 DevirtIndex(
786 ModuleSummaryIndex &ExportSummary,
787 std::set<GlobalValue::GUID> &ExportedGUIDs,
788 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
789 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
790 LocalWPDTargetsMap(LocalWPDTargetsMap) {
791 FunctionsToSkip.init(SkipFunctionNames);
792 }
793
794 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
795 const TypeIdCompatibleVtableInfo TIdInfo,
796 uint64_t ByteOffset);
797
798 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
799 VTableSlotSummary &SlotSummary,
800 VTableSlotInfo &SlotInfo,
801 WholeProgramDevirtResolution *Res,
802 std::set<ValueInfo> &DevirtTargets);
803
804 void run();
805};
806} // end anonymous namespace
807
810 if (UseCommandLine) {
811 if (!DevirtModule::runForTesting(M, MAM))
812 return PreservedAnalyses::all();
814 }
815 if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
816 return PreservedAnalyses::all();
818}
819
820// Enable whole program visibility if enabled by client (e.g. linker) or
821// internal option, and not force disabled.
822bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
823 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
825}
826
827static bool
829 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
830 // TypeID for member function pointer type is an internal construct
831 // and won't exist in IsVisibleToRegularObj. The full TypeID
832 // will be present and participate in invalidation.
833 if (TypeID.ends_with(".virtual"))
834 return false;
835
836 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
837 // non-externally visible types which cannot interact with
838 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
839 if (!TypeID.consume_front("_ZTS"))
840 return false;
841
842 // TypeID is keyed off the type name symbol (_ZTS). However, the native
843 // object may not contain this symbol if it does not contain a key
844 // function for the base type and thus only contains a reference to the
845 // type info (_ZTI). To catch this case we query using the type info
846 // symbol corresponding to the TypeID.
847 std::string TypeInfo = ("_ZTI" + TypeID).str();
848 return IsVisibleToRegularObj(TypeInfo);
849}
850
851static bool
853 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
855 GV.getMetadata(LLVMContext::MD_type, Types);
856
857 for (auto *Type : Types)
858 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
859 return typeIDVisibleToRegularObj(TypeID->getString(),
860 IsVisibleToRegularObj);
861
862 return false;
863}
864
865/// If whole program visibility asserted, then upgrade all public vcall
866/// visibility metadata on vtable definitions to linkage unit visibility in
867/// Module IR (for regular or hybrid LTO).
869 Module &M, bool WholeProgramVisibilityEnabledInLTO,
870 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
871 bool ValidateAllVtablesHaveTypeInfos,
872 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
873 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
874 return;
875 for (GlobalVariable &GV : M.globals()) {
876 // Add linkage unit visibility to any variable with type metadata, which are
877 // the vtable definitions. We won't have an existing vcall_visibility
878 // metadata on vtable definitions with public visibility.
879 if (GV.hasMetadata(LLVMContext::MD_type) &&
881 // Don't upgrade the visibility for symbols exported to the dynamic
882 // linker, as we have no information on their eventual use.
883 !DynamicExportSymbols.count(GV.getGUID()) &&
884 // With validation enabled, we want to exclude symbols visible to
885 // regular objects. Local symbols will be in this group due to the
886 // current implementation but those with VCallVisibilityTranslationUnit
887 // will have already been marked in clang so are unaffected.
888 !(ValidateAllVtablesHaveTypeInfos &&
889 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
891 }
892}
893
895 bool WholeProgramVisibilityEnabledInLTO) {
896 llvm::TimeTraceScope timeScope("Update public type test calls");
897 Function *PublicTypeTestFunc =
898 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
899 if (!PublicTypeTestFunc)
900 return;
901 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
902 Function *TypeTestFunc =
903 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
904 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
905 auto *CI = cast<CallInst>(U.getUser());
906 auto *NewCI = CallInst::Create(
907 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, {}, "",
908 CI->getIterator());
909 CI->replaceAllUsesWith(NewCI);
910 CI->eraseFromParent();
911 }
912 } else {
913 // TODO: Don't replace public type tests when speculative devirtualization
914 // gets enabled in LTO mode.
915 auto *True = ConstantInt::getTrue(M.getContext());
916 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
917 auto *CI = cast<CallInst>(U.getUser());
918 CI->replaceAllUsesWith(True);
919 CI->eraseFromParent();
920 }
921 }
922}
923
924/// Based on typeID string, get all associated vtable GUIDS that are
925/// visible to regular objects.
927 ModuleSummaryIndex &Index,
928 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
929 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
930 for (const auto &TypeID : Index.typeIdCompatibleVtableMap()) {
931 if (typeIDVisibleToRegularObj(TypeID.first, IsVisibleToRegularObj))
932 for (const TypeIdOffsetVtableInfo &P : TypeID.second)
933 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
934 }
935}
936
937/// If whole program visibility asserted, then upgrade all public vcall
938/// visibility metadata on vtable definition summaries to linkage unit
939/// visibility in Module summary index (for ThinLTO).
941 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
942 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
943 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
944 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
945 return;
946 for (auto &P : Index) {
947 // Don't upgrade the visibility for symbols exported to the dynamic
948 // linker, as we have no information on their eventual use.
949 if (DynamicExportSymbols.count(P.first))
950 continue;
951 // With validation enabled, we want to exclude symbols visible to regular
952 // objects. Local symbols will be in this group due to the current
953 // implementation but those with VCallVisibilityTranslationUnit will have
954 // already been marked in clang so are unaffected.
955 if (VisibleToRegularObjSymbols.count(P.first))
956 continue;
957 for (auto &S : P.second.getSummaryList()) {
958 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
959 if (!GVar ||
960 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
961 continue;
962 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
963 }
964 }
965}
966
968 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
969 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
970 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
971}
972
974 ModuleSummaryIndex &Summary,
975 function_ref<bool(StringRef, ValueInfo)> IsExported,
976 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
977 for (auto &T : LocalWPDTargetsMap) {
978 auto &VI = T.first;
979 // This was enforced earlier during trySingleImplDevirt.
980 assert(VI.getSummaryList().size() == 1 &&
981 "Devirt of local target has more than one copy");
982 auto &S = VI.getSummaryList()[0];
983 if (!IsExported(S->modulePath(), VI))
984 continue;
985
986 // It's been exported by a cross module import.
987 for (auto &SlotSummary : T.second) {
988 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
989 assert(TIdSum);
990 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
991 assert(WPDRes != TIdSum->WPDRes.end());
992 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
993 WPDRes->second.SingleImplName,
994 Summary.getModuleHash(S->modulePath()));
995 }
996 }
997}
998
1000 // Check that summary index contains regular LTO module when performing
1001 // export to prevent occasional use of index from pure ThinLTO compilation
1002 // (-fno-split-lto-module). This kind of summary index is passed to
1003 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
1004 const auto &ModPaths = Summary->modulePaths();
1006 !ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))
1007 return createStringError(
1009 "combined summary should contain Regular LTO module");
1010 return ErrorSuccess();
1011}
1012
1013bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) {
1014 std::unique_ptr<ModuleSummaryIndex> Summary =
1015 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
1016
1017 // Handle the command-line summary arguments. This code is for testing
1018 // purposes only, so we handle errors directly.
1019 if (!ClReadSummary.empty()) {
1020 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
1021 ": ");
1022 auto ReadSummaryFile =
1024 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
1025 getModuleSummaryIndex(*ReadSummaryFile)) {
1026 Summary = std::move(*SummaryOrErr);
1027 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
1028 } else {
1029 // Try YAML if we've failed with bitcode.
1030 consumeError(SummaryOrErr.takeError());
1031 yaml::Input In(ReadSummaryFile->getBuffer());
1032 In >> *Summary;
1033 ExitOnErr(errorCodeToError(In.error()));
1034 }
1035 }
1036
1037 bool Changed =
1038 DevirtModule(M, MAM,
1040 : nullptr,
1042 : nullptr)
1043 .run();
1044
1045 if (!ClWriteSummary.empty()) {
1046 ExitOnError ExitOnErr(
1047 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1048 std::error_code EC;
1049 if (StringRef(ClWriteSummary).ends_with(".bc")) {
1050 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
1051 ExitOnErr(errorCodeToError(EC));
1052 writeIndexToFile(*Summary, OS);
1053 } else {
1054 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1055 ExitOnErr(errorCodeToError(EC));
1056 yaml::Output Out(OS);
1057 Out << *Summary;
1058 }
1059 }
1060
1061 return Changed;
1062}
1063
1064void DevirtModule::buildTypeIdentifierMap(
1065 std::vector<VTableBits> &Bits,
1066 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1067 DenseMap<GlobalVariable *, VTableBits *> GVToBits;
1068 Bits.reserve(M.global_size());
1070 for (GlobalVariable &GV : M.globals()) {
1071 Types.clear();
1072 GV.getMetadata(LLVMContext::MD_type, Types);
1073 if (GV.isDeclaration() || Types.empty())
1074 continue;
1075
1076 VTableBits *&BitsPtr = GVToBits[&GV];
1077 if (!BitsPtr) {
1078 Bits.emplace_back();
1079 Bits.back().GV = &GV;
1080 Bits.back().ObjectSize =
1081 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1082 BitsPtr = &Bits.back();
1083 }
1084
1085 for (MDNode *Type : Types) {
1086 auto *TypeID = Type->getOperand(1).get();
1087
1088 uint64_t Offset =
1090 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1091 ->getZExtValue();
1092
1093 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1094 }
1095 }
1096}
1097
1098bool DevirtModule::tryFindVirtualCallTargets(
1099 std::vector<VirtualCallTarget> &TargetsForSlot,
1100 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1101 ModuleSummaryIndex *ExportSummary) {
1102 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1103 if (!TM.Bits->GV->isConstant())
1104 return false;
1105
1106 // Without ClDevirtualizeSpeculatively, we cannot perform whole program
1107 // devirtualization analysis on a vtable with public LTO visibility.
1108 if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
1110 return false;
1111
1112 Function *Fn = nullptr;
1113 Constant *C = nullptr;
1114 std::tie(Fn, C) =
1115 getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);
1116
1117 if (!Fn)
1118 return false;
1119
1120 if (FunctionsToSkip.match(Fn->getName()))
1121 return false;
1122
1123 // We can disregard __cxa_pure_virtual as a possible call target, as
1124 // calls to pure virtuals are UB.
1125 if (Fn->getName() == "__cxa_pure_virtual")
1126 continue;
1127
1128 // In most cases empty functions will be overridden by the
1129 // implementation of the derived class, so we can skip them.
1131 Fn->getInstructionCount() <= 1)
1132 continue;
1133
1134 // We can disregard unreachable functions as possible call targets, as
1135 // unreachable functions shouldn't be called.
1136 if (mustBeUnreachableFunction(Fn, ExportSummary))
1137 continue;
1138
1139 // Save the symbol used in the vtable to use as the devirtualization
1140 // target.
1141 auto *GV = dyn_cast<GlobalValue>(C);
1142 assert(GV);
1143 TargetsForSlot.push_back({GV, &TM});
1144 }
1145
1146 // Give up if we couldn't find any targets.
1147 return !TargetsForSlot.empty();
1148}
1149
1150bool DevirtIndex::tryFindVirtualCallTargets(
1151 std::vector<ValueInfo> &TargetsForSlot,
1152 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1153 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1154 // Find a representative copy of the vtable initializer.
1155 // We can have multiple available_externally, linkonce_odr and weak_odr
1156 // vtable initializers. We can also have multiple external vtable
1157 // initializers in the case of comdats, which we cannot check here.
1158 // The linker should give an error in this case.
1159 //
1160 // Also, handle the case of same-named local Vtables with the same path
1161 // and therefore the same GUID. This can happen if there isn't enough
1162 // distinguishing path when compiling the source file. In that case we
1163 // conservatively return false early.
1164 if (P.VTableVI.hasLocal() && P.VTableVI.getSummaryList().size() > 1)
1165 return false;
1166 const GlobalVarSummary *VS = nullptr;
1167 for (const auto &S : P.VTableVI.getSummaryList()) {
1168 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1169 if (!CurVS->vTableFuncs().empty() ||
1170 // Previously clang did not attach the necessary type metadata to
1171 // available_externally vtables, in which case there would not
1172 // be any vtable functions listed in the summary and we need
1173 // to treat this case conservatively (in case the bitcode is old).
1174 // However, we will also not have any vtable functions in the
1175 // case of a pure virtual base class. In that case we do want
1176 // to set VS to avoid treating it conservatively.
1178 VS = CurVS;
1179 // We cannot perform whole program devirtualization analysis on a vtable
1180 // with public LTO visibility.
1181 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1182 return false;
1183 break;
1184 }
1185 }
1186 // There will be no VS if all copies are available_externally having no
1187 // type metadata. In that case we can't safely perform WPD.
1188 if (!VS)
1189 return false;
1190 if (!VS->isLive())
1191 continue;
1192 for (auto VTP : VS->vTableFuncs()) {
1193 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1194 continue;
1195
1196 if (mustBeUnreachableFunction(VTP.FuncVI))
1197 continue;
1198
1199 TargetsForSlot.push_back(VTP.FuncVI);
1200 }
1201 }
1202
1203 // Give up if we couldn't find any targets.
1204 return !TargetsForSlot.empty();
1205}
1206
1207void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1208 Constant *TheFn, bool &IsExported) {
1209 // Don't devirtualize function if we're told to skip it
1210 // in -wholeprogramdevirt-skip.
1211 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1212 return;
1213 auto Apply = [&](CallSiteInfo &CSInfo) {
1214 for (auto &&VCallSite : CSInfo.CallSites) {
1215 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1216 continue;
1217
1218 // Stop when the number of devirted calls reaches the cutoff.
1219 if (WholeProgramDevirtCutoff.getNumOccurrences() > 0 &&
1220 NumDevirtCalls >= WholeProgramDevirtCutoff)
1221 return;
1222
1223 if (RemarksEnabled)
1224 VCallSite.emitRemark("single-impl",
1225 TheFn->stripPointerCasts()->getName(), OREGetter);
1226 NumSingleImpl++;
1227 NumDevirtCalls++;
1228 auto &CB = VCallSite.CB;
1229 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1230 IRBuilder<> Builder(&CB);
1231 Value *Callee =
1232 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1233
1234 // If trap checking is enabled, add support to compare the virtual
1235 // function pointer to the devirtualized target. In case of a mismatch,
1236 // perform a debug trap.
1238 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1240 Cond, &CB, /*Unreachable=*/false,
1241 MDBuilder(M.getContext()).createUnlikelyBranchWeights());
1242 Builder.SetInsertPoint(ThenTerm);
1243 Function *TrapFn =
1244 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::debugtrap);
1245 auto *CallTrap = Builder.CreateCall(TrapFn);
1246 CallTrap->setDebugLoc(CB.getDebugLoc());
1247 }
1248
1249 // If fallback checking or speculative devirtualization are enabled,
1250 // add support to compare the virtual function pointer to the
1251 // devirtualized target. In case of a mismatch, fall back to indirect
1252 // call.
1255 MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
1256 // Version the indirect call site. If the called value is equal to the
1257 // given callee, 'NewInst' will be executed, otherwise the original call
1258 // site will be executed.
1259 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1260 NewInst.setCalledOperand(Callee);
1261 // Since the new call site is direct, we must clear metadata that
1262 // is only appropriate for indirect calls. This includes !prof and
1263 // !callees metadata.
1264 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1265 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1266 // Additionally, we should remove them from the fallback indirect call,
1267 // so that we don't attempt to perform indirect call promotion later.
1268 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1269 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1270 }
1271
1272 // In either trapping or non-checking mode, devirtualize original call.
1273 else {
1274 // Devirtualize unconditionally.
1275 CB.setCalledOperand(Callee);
1276 // Since the call site is now direct, we must clear metadata that
1277 // is only appropriate for indirect calls. This includes !prof and
1278 // !callees metadata.
1279 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1280 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1281 if (CB.getCalledOperand() &&
1283 auto *NewCS = CallBase::removeOperandBundle(
1285 CB.replaceAllUsesWith(NewCS);
1286 // Schedule for deletion at the end of pass run.
1287 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1288 }
1289 }
1290
1291 // This use is no longer unsafe.
1292 if (VCallSite.NumUnsafeUses)
1293 --*VCallSite.NumUnsafeUses;
1294 }
1295 if (CSInfo.isExported())
1296 IsExported = true;
1297 CSInfo.markDevirt();
1298 };
1299 Apply(SlotInfo.CSInfo);
1300 for (auto &P : SlotInfo.ConstCSInfo)
1301 Apply(P.second);
1302}
1303
1304static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1305 // We can't add calls if we haven't seen a definition
1306 if (Callee.getSummaryList().empty())
1307 return false;
1308
1309 // Insert calls into the summary index so that the devirtualized targets
1310 // are eligible for import.
1311 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1312 // to better ensure we have the opportunity to inline them.
1313 bool IsExported = false;
1314 auto &S = Callee.getSummaryList()[0];
1315 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
1316 /* RelBF = */ 0);
1317 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1318 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1319 FS->addCall({Callee, CI});
1320 IsExported |= S->modulePath() != FS->modulePath();
1321 }
1322 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1323 FS->addCall({Callee, CI});
1324 IsExported |= S->modulePath() != FS->modulePath();
1325 }
1326 };
1327 AddCalls(SlotInfo.CSInfo);
1328 for (auto &P : SlotInfo.ConstCSInfo)
1329 AddCalls(P.second);
1330 return IsExported;
1331}
1332
1333bool DevirtModule::trySingleImplDevirt(
1334 ModuleSummaryIndex *ExportSummary,
1335 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1336 WholeProgramDevirtResolution *Res) {
1337 // See if the program contains a single implementation of this virtual
1338 // function.
1339 auto *TheFn = TargetsForSlot[0].Fn;
1340 for (auto &&Target : TargetsForSlot)
1341 if (TheFn != Target.Fn)
1342 return false;
1343
1344 // If so, update each call site to call that implementation directly.
1345 if (RemarksEnabled || AreStatisticsEnabled())
1346 TargetsForSlot[0].WasDevirt = true;
1347
1348 bool IsExported = false;
1349 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1350 if (!IsExported)
1351 return false;
1352
1353 // If the only implementation has local linkage, we must promote to external
1354 // to make it visible to thin LTO objects. We can only get here during the
1355 // ThinLTO export phase.
1356 if (TheFn->hasLocalLinkage()) {
1357 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1358
1359 // Since we are renaming the function, any comdats with the same name must
1360 // also be renamed. This is required when targeting COFF, as the comdat name
1361 // must match one of the names of the symbols in the comdat.
1362 if (Comdat *C = TheFn->getComdat()) {
1363 if (C->getName() == TheFn->getName()) {
1364 Comdat *NewC = M.getOrInsertComdat(NewName);
1365 NewC->setSelectionKind(C->getSelectionKind());
1366 for (GlobalObject &GO : M.global_objects())
1367 if (GO.getComdat() == C)
1368 GO.setComdat(NewC);
1369 }
1370 }
1371
1372 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1373 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1374 TheFn->setName(NewName);
1375 }
1376 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1377 // Any needed promotion of 'TheFn' has already been done during
1378 // LTO unit split, so we can ignore return value of AddCalls.
1379 addCalls(SlotInfo, TheFnVI);
1380
1382 Res->SingleImplName = std::string(TheFn->getName());
1383
1384 return true;
1385}
1386
1387bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1388 VTableSlotSummary &SlotSummary,
1389 VTableSlotInfo &SlotInfo,
1390 WholeProgramDevirtResolution *Res,
1391 std::set<ValueInfo> &DevirtTargets) {
1392 // See if the program contains a single implementation of this virtual
1393 // function.
1394 auto TheFn = TargetsForSlot[0];
1395 for (auto &&Target : TargetsForSlot)
1396 if (TheFn != Target)
1397 return false;
1398
1399 // Don't devirtualize if we don't have target definition.
1400 auto Size = TheFn.getSummaryList().size();
1401 if (!Size)
1402 return false;
1403
1404 // Don't devirtualize function if we're told to skip it
1405 // in -wholeprogramdevirt-skip.
1406 if (FunctionsToSkip.match(TheFn.name()))
1407 return false;
1408
1409 // If the summary list contains multiple summaries where at least one is
1410 // a local, give up, as we won't know which (possibly promoted) name to use.
1411 if (TheFn.hasLocal() && Size > 1)
1412 return false;
1413
1414 // Collect functions devirtualized at least for one call site for stats.
1416 DevirtTargets.insert(TheFn);
1417
1418 auto &S = TheFn.getSummaryList()[0];
1419 bool IsExported = addCalls(SlotInfo, TheFn);
1420 if (IsExported)
1421 ExportedGUIDs.insert(TheFn.getGUID());
1422
1423 // Record in summary for use in devirtualization during the ThinLTO import
1424 // step.
1426 if (GlobalValue::isLocalLinkage(S->linkage())) {
1427 if (IsExported)
1428 // If target is a local function and we are exporting it by
1429 // devirtualizing a call in another module, we need to record the
1430 // promoted name.
1432 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1433 else {
1434 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1435 Res->SingleImplName = std::string(TheFn.name());
1436 }
1437 } else
1438 Res->SingleImplName = std::string(TheFn.name());
1439
1440 // Name will be empty if this thin link driven off of serialized combined
1441 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1442 // legacy LTO API anyway.
1443 assert(!Res->SingleImplName.empty());
1444
1445 return true;
1446}
1447
1448void DevirtModule::tryICallBranchFunnel(
1449 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1450 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1451 Triple T(M.getTargetTriple());
1452 if (T.getArch() != Triple::x86_64)
1453 return;
1454
1455 if (TargetsForSlot.size() > ClThreshold)
1456 return;
1457
1458 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1459 if (!HasNonDevirt)
1460 for (auto &P : SlotInfo.ConstCSInfo)
1461 if (!P.second.AllCallSitesDevirted) {
1462 HasNonDevirt = true;
1463 break;
1464 }
1465
1466 if (!HasNonDevirt)
1467 return;
1468
1469 // If any GV is AvailableExternally, not to generate branch.funnel.
1470 // NOTE: It is to avoid crash in LowerTypeTest.
1471 // If the branch.funnel is generated, because GV.isDeclarationForLinker(),
1472 // in LowerTypeTestsModule::lower(), its GlobalTypeMember would NOT
1473 // be saved in GlobalTypeMembers[&GV]. Then crash happens in
1474 // buildBitSetsFromDisjointSet due to GlobalTypeMembers[&GV] is NULL.
1475 // Even doing experiment to save it in GlobalTypeMembers[&GV] and
1476 // making GlobalTypeMembers[&GV] be not NULL, crash could avoid from
1477 // buildBitSetsFromDisjointSet. But still report_fatal_error in Verifier
1478 // or SelectionDAGBuilder later, because operands linkage type consistency
1479 // check of icall.branch.funnel can not pass.
1480 for (auto &T : TargetsForSlot) {
1481 if (T.TM->Bits->GV->hasAvailableExternallyLinkage())
1482 return;
1483 }
1484
1485 FunctionType *FT =
1486 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1487 Function *JT;
1488 if (isa<MDString>(Slot.TypeID)) {
1490 M.getDataLayout().getProgramAddressSpace(),
1491 getGlobalName(Slot, {}, "branch_funnel"), &M);
1492 JT->setVisibility(GlobalValue::HiddenVisibility);
1493 } else {
1495 M.getDataLayout().getProgramAddressSpace(),
1496 "branch_funnel", &M);
1497 }
1498 JT->addParamAttr(0, Attribute::Nest);
1499
1500 std::vector<Value *> JTArgs;
1501 JTArgs.push_back(JT->arg_begin());
1502 for (auto &T : TargetsForSlot) {
1503 JTArgs.push_back(getMemberAddr(T.TM));
1504 JTArgs.push_back(T.Fn);
1505 }
1506
1507 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1509 &M, llvm::Intrinsic::icall_branch_funnel, {});
1510
1511 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1512 CI->setTailCallKind(CallInst::TCK_MustTail);
1513 ReturnInst::Create(M.getContext(), nullptr, BB);
1514
1515 bool IsExported = false;
1516 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
1517 if (IsExported)
1519
1520 if (!JT->getEntryCount().has_value()) {
1521 // FIXME: we could pass through thinlto the necessary information.
1523 }
1524}
1525
1526void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1527 Function &JT, bool &IsExported) {
1528 DenseMap<Function *, double> FunctionEntryCounts;
1529 auto Apply = [&](CallSiteInfo &CSInfo) {
1530 if (CSInfo.isExported())
1531 IsExported = true;
1532 if (CSInfo.AllCallSitesDevirted)
1533 return;
1534
1535 std::map<CallBase *, CallBase *> CallBases;
1536 for (auto &&VCallSite : CSInfo.CallSites) {
1537 CallBase &CB = VCallSite.CB;
1538
1539 if (CallBases.find(&CB) != CallBases.end()) {
1540 // When finding devirtualizable calls, it's possible to find the same
1541 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1542 // calls, which can cause duplicate call sites to be recorded in
1543 // [Const]CallSites. If we've already found one of these
1544 // call instances, just ignore it. It will be replaced later.
1545 continue;
1546 }
1547
1548 // Jump tables are only profitable if the retpoline mitigation is enabled.
1549 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1550 if (!FSAttr.isValid() ||
1551 !FSAttr.getValueAsString().contains("+retpoline"))
1552 continue;
1553
1554 NumBranchFunnel++;
1555 if (RemarksEnabled)
1556 VCallSite.emitRemark("branch-funnel", JT.getName(), OREGetter);
1557
1558 // Pass the address of the vtable in the nest register, which is r10 on
1559 // x86_64.
1560 std::vector<Type *> NewArgs;
1561 NewArgs.push_back(Int8PtrTy);
1562 append_range(NewArgs, CB.getFunctionType()->params());
1563 FunctionType *NewFT =
1565 CB.getFunctionType()->isVarArg());
1566 IRBuilder<> IRB(&CB);
1567 std::vector<Value *> Args;
1568 Args.push_back(VCallSite.VTable);
1569 llvm::append_range(Args, CB.args());
1570
1571 CallBase *NewCS = nullptr;
1572 if (!JT.isDeclaration() && !ProfcheckDisableMetadataFixes) {
1573 // Accumulate the call frequencies of the original call site, and use
1574 // that as total entry count for the funnel function.
1575 auto &F = *CB.getCaller();
1576 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1577 auto EC = BFI.getBlockFreq(&F.getEntryBlock());
1578 auto CC = F.getEntryCount(/*AllowSynthetic=*/true);
1579 double CallCount = 0.0;
1580 if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) {
1581 double CallFreq =
1582 static_cast<double>(
1583 BFI.getBlockFreq(CB.getParent()).getFrequency()) /
1584 EC.getFrequency();
1585 CallCount = CallFreq * CC->getCount();
1586 }
1587 FunctionEntryCounts[&JT] += CallCount;
1588 }
1589 if (isa<CallInst>(CB))
1590 NewCS = IRB.CreateCall(NewFT, &JT, Args);
1591 else
1592 NewCS =
1593 IRB.CreateInvoke(NewFT, &JT, cast<InvokeInst>(CB).getNormalDest(),
1594 cast<InvokeInst>(CB).getUnwindDest(), Args);
1595 NewCS->setCallingConv(CB.getCallingConv());
1596
1597 AttributeList Attrs = CB.getAttributes();
1598 std::vector<AttributeSet> NewArgAttrs;
1599 NewArgAttrs.push_back(AttributeSet::get(
1600 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1601 M.getContext(), Attribute::Nest)}));
1602 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1603 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1604 NewCS->setAttributes(
1605 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1606 Attrs.getRetAttrs(), NewArgAttrs));
1607
1608 CallBases[&CB] = NewCS;
1609
1610 // This use is no longer unsafe.
1611 if (VCallSite.NumUnsafeUses)
1612 --*VCallSite.NumUnsafeUses;
1613 }
1614 // Don't mark as devirtualized because there may be callers compiled without
1615 // retpoline mitigation, which would mean that they are lowered to
1616 // llvm.type.test and therefore require an llvm.type.test resolution for the
1617 // type identifier.
1618
1619 for (auto &[Old, New] : CallBases) {
1620 Old->replaceAllUsesWith(New);
1621 Old->eraseFromParent();
1622 }
1623 };
1624 Apply(SlotInfo.CSInfo);
1625 for (auto &P : SlotInfo.ConstCSInfo)
1626 Apply(P.second);
1627 for (auto &[F, C] : FunctionEntryCounts) {
1628 assert(!F->getEntryCount(/*AllowSynthetic=*/true) &&
1629 "Unexpected entry count for funnel that was freshly synthesized");
1630 F->setEntryCount(static_cast<uint64_t>(std::round(C)));
1631 }
1632}
1633
1634bool DevirtModule::tryEvaluateFunctionsWithArgs(
1636 ArrayRef<uint64_t> Args) {
1637 // Evaluate each function and store the result in each target's RetVal
1638 // field.
1639 for (VirtualCallTarget &Target : TargetsForSlot) {
1640 // TODO: Skip for now if the vtable symbol was an alias to a function,
1641 // need to evaluate whether it would be correct to analyze the aliasee
1642 // function for this optimization.
1643 auto *Fn = dyn_cast<Function>(Target.Fn);
1644 if (!Fn)
1645 return false;
1646
1647 if (Fn->arg_size() != Args.size() + 1)
1648 return false;
1649
1650 Evaluator Eval(M.getDataLayout(), nullptr);
1652 EvalArgs.push_back(
1654 for (unsigned I = 0; I != Args.size(); ++I) {
1655 auto *ArgTy =
1657 if (!ArgTy)
1658 return false;
1659 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1660 }
1661
1662 Constant *RetVal;
1663 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1664 !isa<ConstantInt>(RetVal))
1665 return false;
1666 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1667 }
1668 return true;
1669}
1670
1671void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1672 uint64_t TheRetVal) {
1673 for (auto Call : CSInfo.CallSites) {
1674 if (!OptimizedCalls.insert(&Call.CB).second)
1675 continue;
1676 NumUniformRetVal++;
1677 Call.replaceAndErase(
1678 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1679 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1680 }
1681 CSInfo.markDevirt();
1682}
1683
1684bool DevirtModule::tryUniformRetValOpt(
1685 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1686 WholeProgramDevirtResolution::ByArg *Res) {
1687 // Uniform return value optimization. If all functions return the same
1688 // constant, replace all calls with that constant.
1689 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1690 for (const VirtualCallTarget &Target : TargetsForSlot)
1691 if (Target.RetVal != TheRetVal)
1692 return false;
1693
1694 if (CSInfo.isExported()) {
1696 Res->Info = TheRetVal;
1697 }
1698
1699 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1700 if (RemarksEnabled || AreStatisticsEnabled())
1701 for (auto &&Target : TargetsForSlot)
1702 Target.WasDevirt = true;
1703 return true;
1704}
1705
1706std::string DevirtModule::getGlobalName(VTableSlot Slot,
1707 ArrayRef<uint64_t> Args,
1708 StringRef Name) {
1709 std::string FullName = "__typeid_";
1710 raw_string_ostream OS(FullName);
1711 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1712 for (uint64_t Arg : Args)
1713 OS << '_' << Arg;
1714 OS << '_' << Name;
1715 return FullName;
1716}
1717
1718bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1719 Triple T(M.getTargetTriple());
1720 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1721}
1722
1723void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1724 StringRef Name, Constant *C) {
1725 GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
1726 getGlobalName(Slot, Args, Name), C, &M);
1728}
1729
1730void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1731 StringRef Name, uint32_t Const,
1732 uint32_t &Storage) {
1733 if (shouldExportConstantsAsAbsoluteSymbols()) {
1734 exportGlobal(
1735 Slot, Args, Name,
1736 ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
1737 return;
1738 }
1739
1740 Storage = Const;
1741}
1742
1743Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1744 StringRef Name) {
1745 GlobalVariable *GV =
1746 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1748 return GV;
1749}
1750
1751Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1752 StringRef Name, IntegerType *IntTy,
1753 uint32_t Storage) {
1754 if (!shouldExportConstantsAsAbsoluteSymbols())
1755 return ConstantInt::get(IntTy, Storage);
1756
1757 Constant *C = importGlobal(Slot, Args, Name);
1758 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1759 C = ConstantExpr::getPtrToInt(C, IntTy);
1760
1761 // We only need to set metadata if the global is newly created, in which
1762 // case it would not have hidden visibility.
1763 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1764 return C;
1765
1766 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1767 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1768 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1769 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1770 MDNode::get(M.getContext(), {MinC, MaxC}));
1771 };
1772 unsigned AbsWidth = IntTy->getBitWidth();
1773 if (AbsWidth == IntPtrTy->getBitWidth())
1774 SetAbsRange(~0ull, ~0ull); // Full set.
1775 else
1776 SetAbsRange(0, 1ull << AbsWidth);
1777 return C;
1778}
1779
1780void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1781 bool IsOne,
1782 Constant *UniqueMemberAddr) {
1783 for (auto &&Call : CSInfo.CallSites) {
1784 if (!OptimizedCalls.insert(&Call.CB).second)
1785 continue;
1786 IRBuilder<> B(&Call.CB);
1787 Value *Cmp =
1788 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1789 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1790 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1791 NumUniqueRetVal++;
1792 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1793 Cmp);
1794 }
1795 CSInfo.markDevirt();
1796}
1797
1798Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1799 return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
1800 ConstantInt::get(Int64Ty, M->Offset));
1801}
1802
1803bool DevirtModule::tryUniqueRetValOpt(
1804 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1805 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1806 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1807 // IsOne controls whether we look for a 0 or a 1.
1808 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1809 const TypeMemberInfo *UniqueMember = nullptr;
1810 for (const VirtualCallTarget &Target : TargetsForSlot) {
1811 if (Target.RetVal == (IsOne ? 1 : 0)) {
1812 if (UniqueMember)
1813 return false;
1814 UniqueMember = Target.TM;
1815 }
1816 }
1817
1818 // We should have found a unique member or bailed out by now. We already
1819 // checked for a uniform return value in tryUniformRetValOpt.
1820 assert(UniqueMember);
1821
1822 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1823 if (CSInfo.isExported()) {
1825 Res->Info = IsOne;
1826
1827 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1828 }
1829
1830 // Replace each call with the comparison.
1831 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1832 UniqueMemberAddr);
1833
1834 // Update devirtualization statistics for targets.
1835 if (RemarksEnabled || AreStatisticsEnabled())
1836 for (auto &&Target : TargetsForSlot)
1837 Target.WasDevirt = true;
1838
1839 return true;
1840 };
1841
1842 if (BitWidth == 1) {
1843 if (tryUniqueRetValOptFor(true))
1844 return true;
1845 if (tryUniqueRetValOptFor(false))
1846 return true;
1847 }
1848 return false;
1849}
1850
1851void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1852 Constant *Byte, Constant *Bit) {
1853 for (auto Call : CSInfo.CallSites) {
1854 if (!OptimizedCalls.insert(&Call.CB).second)
1855 continue;
1856 auto *RetType = cast<IntegerType>(Call.CB.getType());
1857 IRBuilder<> B(&Call.CB);
1858 Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);
1859 if (RetType->getBitWidth() == 1) {
1860 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1861 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1862 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1863 NumVirtConstProp1Bit++;
1864 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1865 OREGetter, IsBitSet);
1866 } else {
1867 Value *Val = B.CreateLoad(RetType, Addr);
1868 NumVirtConstProp++;
1869 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1870 OREGetter, Val);
1871 }
1872 }
1873 CSInfo.markDevirt();
1874}
1875
1876bool DevirtModule::tryVirtualConstProp(
1877 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1878 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1879 // TODO: Skip for now if the vtable symbol was an alias to a function,
1880 // need to evaluate whether it would be correct to analyze the aliasee
1881 // function for this optimization.
1882 auto *Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1883 if (!Fn)
1884 return false;
1885 // This only works if the function returns an integer.
1886 auto *RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1887 if (!RetType)
1888 return false;
1889 unsigned BitWidth = RetType->getBitWidth();
1890
1891 // TODO: Since we can evaluated these constants at compile-time, we can save
1892 // some space by calculating the smallest range of values that all these
1893 // constants can fit in, then only allocate enough space to fit those values.
1894 // At each callsite, we can get the original type by doing a sign/zero
1895 // extension. For example, if we would store an i64, but we can see that all
1896 // the values fit into an i16, then we can store an i16 before/after the
1897 // vtable and at each callsite do a s/zext.
1898 if (BitWidth > 64)
1899 return false;
1900
1901 Align TypeAlignment = M.getDataLayout().getABIIntegerTypeAlignment(BitWidth);
1902
1903 // Make sure that each function is defined, does not access memory, takes at
1904 // least one argument, does not use its first argument (which we assume is
1905 // 'this'), and has the same return type.
1906 //
1907 // Note that we test whether this copy of the function is readnone, rather
1908 // than testing function attributes, which must hold for any copy of the
1909 // function, even a less optimized version substituted at link time. This is
1910 // sound because the virtual constant propagation optimizations effectively
1911 // inline all implementations of the virtual function into each call site,
1912 // rather than using function attributes to perform local optimization.
1913 for (VirtualCallTarget &Target : TargetsForSlot) {
1914 // TODO: Skip for now if the vtable symbol was an alias to a function,
1915 // need to evaluate whether it would be correct to analyze the aliasee
1916 // function for this optimization.
1917 auto *Fn = dyn_cast<Function>(Target.Fn);
1918 if (!Fn)
1919 return false;
1920
1921 if (Fn->isDeclaration() ||
1922 !computeFunctionBodyMemoryAccess(*Fn, FAM.getResult<AAManager>(*Fn))
1924 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1925 Fn->getReturnType() != RetType)
1926 return false;
1927
1928 // This only works if the integer size is at most the alignment of the
1929 // vtable. If the table is underaligned, then we can't guarantee that the
1930 // constant will always be aligned to the integer type alignment. For
1931 // example, if the table is `align 1`, we can never guarantee that an i32
1932 // stored before/after the vtable is 32-bit aligned without changing the
1933 // alignment of the new global.
1934 GlobalVariable *GV = Target.TM->Bits->GV;
1935 Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1936 GV->getAlign(), GV->getValueType());
1937 if (TypeAlignment > TableAlignment)
1938 return false;
1939 }
1940
1941 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1942 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1943 continue;
1944
1945 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1946 if (Res)
1947 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1948
1949 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1950 continue;
1951
1952 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1953 ResByArg, Slot, CSByConstantArg.first))
1954 continue;
1955
1956 // Find an allocation offset in bits in all vtables associated with the
1957 // type.
1958 // TODO: If there would be "holes" in the vtable that were added by
1959 // padding, we could place i1s there to reduce any extra padding that
1960 // would be introduced by the i1s.
1961 uint64_t AllocBefore =
1962 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1963 uint64_t AllocAfter =
1964 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1965
1966 // Calculate the total amount of padding needed to store a value at both
1967 // ends of the object.
1968 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1969 for (auto &&Target : TargetsForSlot) {
1970 TotalPaddingBefore += std::max<int64_t>(
1971 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1972 TotalPaddingAfter += std::max<int64_t>(
1973 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1974 }
1975
1976 // If the amount of padding is too large, give up.
1977 // FIXME: do something smarter here.
1978 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1979 continue;
1980
1981 // Calculate the offset to the value as a (possibly negative) byte offset
1982 // and (if applicable) a bit offset, and store the values in the targets.
1983 int64_t OffsetByte;
1984 uint64_t OffsetBit;
1985 if (TotalPaddingBefore <= TotalPaddingAfter)
1986 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1987 OffsetBit);
1988 else
1989 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1990 OffsetBit);
1991
1992 // In an earlier check we forbade constant propagation from operating on
1993 // tables whose alignment is less than the alignment needed for loading
1994 // the constant. Thus, the address we take the offset from will always be
1995 // aligned to at least this integer alignment. Now, we need to ensure that
1996 // the offset is also aligned to this integer alignment to ensure we always
1997 // have an aligned load.
1998 assert(OffsetByte % TypeAlignment.value() == 0);
1999
2000 if (RemarksEnabled || AreStatisticsEnabled())
2001 for (auto &&Target : TargetsForSlot)
2002 Target.WasDevirt = true;
2003
2004
2005 if (CSByConstantArg.second.isExported()) {
2007 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
2008 ResByArg->Byte);
2009 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
2010 ResByArg->Bit);
2011 }
2012
2013 // Rewrite each call to a load from OffsetByte/OffsetBit.
2014 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
2015 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
2016 applyVirtualConstProp(CSByConstantArg.second,
2017 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
2018 }
2019 return true;
2020}
2021
2022void DevirtModule::rebuildGlobal(VTableBits &B) {
2023 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
2024 return;
2025
2026 // Align the before byte array to the global's minimum alignment so that we
2027 // don't break any alignment requirements on the global.
2028 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
2029 B.GV->getAlign(), B.GV->getValueType());
2030 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
2031
2032 // Before was stored in reverse order; flip it now.
2033 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
2034 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
2035
2036 // Build an anonymous global containing the before bytes, followed by the
2037 // original initializer, followed by the after bytes.
2038 auto *NewInit = ConstantStruct::getAnon(
2039 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
2040 B.GV->getInitializer(),
2041 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
2042 auto *NewGV =
2043 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
2044 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
2045 NewGV->setSection(B.GV->getSection());
2046 NewGV->setComdat(B.GV->getComdat());
2047 NewGV->setAlignment(B.GV->getAlign());
2048
2049 // Copy the original vtable's metadata to the anonymous global, adjusting
2050 // offsets as required.
2051 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
2052
2053 // Build an alias named after the original global, pointing at the second
2054 // element (the original initializer).
2055 auto *Alias = GlobalAlias::create(
2056 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
2058 NewInit->getType(), NewGV,
2059 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
2060 ConstantInt::get(Int32Ty, 1)}),
2061 &M);
2062 Alias->setVisibility(B.GV->getVisibility());
2063 Alias->takeName(B.GV);
2064
2065 B.GV->replaceAllUsesWith(Alias);
2066 B.GV->eraseFromParent();
2067}
2068
2069bool DevirtModule::areRemarksEnabled() {
2070 const auto &FL = M.getFunctionList();
2071 for (const Function &Fn : FL) {
2072 if (Fn.empty())
2073 continue;
2074 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
2075 return DI.isEnabled();
2076 }
2077 return false;
2078}
2079
2080void DevirtModule::scanTypeTestUsers(
2081 Function *TypeTestFunc,
2082 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
2083 // Find all virtual calls via a virtual table pointer %p under an assumption
2084 // of the form llvm.assume(llvm.type.test(%p, %md)) or
2085 // llvm.assume(llvm.public.type.test(%p, %md)).
2086 // This indicates that %p points to a member of the type identifier %md.
2087 // Group calls by (type ID, offset) pair (effectively the identity of the
2088 // virtual function) and store to CallSlots.
2089 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
2090 auto *CI = dyn_cast<CallInst>(U.getUser());
2091 if (!CI)
2092 continue;
2093 // Search for virtual calls based on %p and add them to DevirtCalls.
2096 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2097 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
2098
2099 Metadata *TypeId =
2100 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
2101 // If we found any, add them to CallSlots.
2102 if (!Assumes.empty()) {
2103 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
2104 for (DevirtCallSite Call : DevirtCalls)
2105 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
2106 }
2107
2108 auto RemoveTypeTestAssumes = [&]() {
2109 // We no longer need the assumes or the type test.
2110 for (auto *Assume : Assumes)
2111 Assume->eraseFromParent();
2112 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
2113 // may use the vtable argument later.
2114 if (CI->use_empty())
2115 CI->eraseFromParent();
2116 };
2117
2118 // At this point we could remove all type test assume sequences, as they
2119 // were originally inserted for WPD. However, we can keep these in the
2120 // code stream for later analysis (e.g. to help drive more efficient ICP
2121 // sequences). They will eventually be removed by a second LowerTypeTests
2122 // invocation that cleans them up. In order to do this correctly, the first
2123 // LowerTypeTests invocation needs to know that they have "Unknown" type
2124 // test resolution, so that they aren't treated as Unsat and lowered to
2125 // False, which will break any uses on assumes. Below we remove any type
2126 // test assumes that will not be treated as Unknown by LTT.
2127
2128 // The type test assumes will be treated by LTT as Unsat if the type id is
2129 // not used on a global (in which case it has no entry in the TypeIdMap).
2130 if (!TypeIdMap.count(TypeId))
2131 RemoveTypeTestAssumes();
2132
2133 // For ThinLTO importing, we need to remove the type test assumes if this is
2134 // an MDString type id without a corresponding TypeIdSummary. Any
2135 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2136 // type test assumes can be kept. If the MDString type id is missing a
2137 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2138 // exporting phase of WPD from analyzing it), then it would be treated as
2139 // Unsat by LTT and we need to remove its type test assumes here. If not
2140 // used on a vcall we don't need them for later optimization use in any
2141 // case.
2142 else if (ImportSummary && isa<MDString>(TypeId)) {
2143 const TypeIdSummary *TidSummary =
2144 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2145 if (!TidSummary)
2146 RemoveTypeTestAssumes();
2147 else
2148 // If one was created it should not be Unsat, because if we reached here
2149 // the type id was used on a global.
2151 }
2152 }
2153}
2154
2155void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2156 Function *TypeTestFunc =
2157 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::type_test);
2158
2159 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2160 auto *CI = dyn_cast<CallInst>(U.getUser());
2161 if (!CI)
2162 continue;
2163
2164 Value *Ptr = CI->getArgOperand(0);
2165 Value *Offset = CI->getArgOperand(1);
2166 Value *TypeIdValue = CI->getArgOperand(2);
2167 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2168
2172 bool HasNonCallUses = false;
2173 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*CI->getFunction());
2174 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2175 HasNonCallUses, CI, DT);
2176
2177 // Start by generating "pessimistic" code that explicitly loads the function
2178 // pointer from the vtable and performs the type check. If possible, we will
2179 // eliminate the load and the type check later.
2180
2181 // If possible, only generate the load at the point where it is used.
2182 // This helps avoid unnecessary spills.
2183 IRBuilder<> LoadB(
2184 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2185
2186 Value *LoadedValue = nullptr;
2187 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2188 Intrinsic::type_checked_load_relative) {
2190 &M, Intrinsic::load_relative, {Int32Ty});
2191 LoadedValue = LoadB.CreateCall(LoadRelFunc, {Ptr, Offset});
2192 } else {
2193 Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);
2194 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2195 }
2196
2197 for (Instruction *LoadedPtr : LoadedPtrs) {
2198 LoadedPtr->replaceAllUsesWith(LoadedValue);
2199 LoadedPtr->eraseFromParent();
2200 }
2201
2202 // Likewise for the type test.
2203 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2204 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2205
2206 for (Instruction *Pred : Preds) {
2207 Pred->replaceAllUsesWith(TypeTestCall);
2208 Pred->eraseFromParent();
2209 }
2210
2211 // We have already erased any extractvalue instructions that refer to the
2212 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2213 // (although this is unlikely). In that case, explicitly build a pair and
2214 // RAUW it.
2215 if (!CI->use_empty()) {
2216 Value *Pair = PoisonValue::get(CI->getType());
2217 IRBuilder<> B(CI);
2218 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2219 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2220 CI->replaceAllUsesWith(Pair);
2221 }
2222
2223 // The number of unsafe uses is initially the number of uses.
2224 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2225 NumUnsafeUses = DevirtCalls.size();
2226
2227 // If the function pointer has a non-call user, we cannot eliminate the type
2228 // check, as one of those users may eventually call the pointer. Increment
2229 // the unsafe use count to make sure it cannot reach zero.
2230 if (HasNonCallUses)
2231 ++NumUnsafeUses;
2232 for (DevirtCallSite Call : DevirtCalls) {
2233 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2234 &NumUnsafeUses);
2235 }
2236
2237 CI->eraseFromParent();
2238 }
2239}
2240
2241void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2242 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2243 if (!TypeId)
2244 return;
2245 const TypeIdSummary *TidSummary =
2246 ImportSummary->getTypeIdSummary(TypeId->getString());
2247 if (!TidSummary)
2248 return;
2249 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2250 if (ResI == TidSummary->WPDRes.end())
2251 return;
2252 const WholeProgramDevirtResolution &Res = ResI->second;
2253
2255 assert(!Res.SingleImplName.empty());
2256 // The type of the function in the declaration is irrelevant because every
2257 // call site will cast it to the correct type.
2258 Constant *SingleImpl =
2259 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2260 Type::getVoidTy(M.getContext()))
2261 .getCallee());
2262
2263 // This is the import phase so we should not be exporting anything.
2264 bool IsExported = false;
2265 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2266 assert(!IsExported);
2267 }
2268
2269 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2270 auto I = Res.ResByArg.find(CSByConstantArg.first);
2271 if (I == Res.ResByArg.end())
2272 continue;
2273 auto &ResByArg = I->second;
2274 // FIXME: We should figure out what to do about the "function name" argument
2275 // to the apply* functions, as the function names are unavailable during the
2276 // importing phase. For now we just pass the empty string. This does not
2277 // impact correctness because the function names are just used for remarks.
2278 switch (ResByArg.TheKind) {
2280 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2281 break;
2283 Constant *UniqueMemberAddr =
2284 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2285 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2286 UniqueMemberAddr);
2287 break;
2288 }
2290 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2291 Int32Ty, ResByArg.Byte);
2292 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2293 ResByArg.Bit);
2294 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2295 break;
2296 }
2297 default:
2298 break;
2299 }
2300 }
2301
2303 // The type of the function is irrelevant, because it's bitcast at calls
2304 // anyhow.
2305 auto *JT = cast<Function>(
2306 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2307 Type::getVoidTy(M.getContext()))
2308 .getCallee());
2309 bool IsExported = false;
2310 applyICallBranchFunnel(SlotInfo, *JT, IsExported);
2311 assert(!IsExported);
2312 }
2313}
2314
2315void DevirtModule::removeRedundantTypeTests() {
2316 auto *True = ConstantInt::getTrue(M.getContext());
2317 for (auto &&U : NumUnsafeUsesForTypeTest) {
2318 if (U.second == 0) {
2319 U.first->replaceAllUsesWith(True);
2320 U.first->eraseFromParent();
2321 }
2322 }
2323}
2324
2325ValueInfo
2326DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2327 ModuleSummaryIndex *ExportSummary) {
2328 assert((ExportSummary != nullptr) &&
2329 "Caller guarantees ExportSummary is not nullptr");
2330
2331 const auto TheFnGUID = TheFn->getGUID();
2332 const auto TheFnGUIDWithExportedName =
2334 // Look up ValueInfo with the GUID in the current linkage.
2335 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2336 // If no entry is found and GUID is different from GUID computed using
2337 // exported name, look up ValueInfo with the exported name unconditionally.
2338 // This is a fallback.
2339 //
2340 // The reason to have a fallback:
2341 // 1. LTO could enable global value internalization via
2342 // `enable-lto-internalization`.
2343 // 2. The GUID in ExportedSummary is computed using exported name.
2344 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2345 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2346 }
2347 return TheFnVI;
2348}
2349
2350bool DevirtModule::mustBeUnreachableFunction(
2351 Function *const F, ModuleSummaryIndex *ExportSummary) {
2353 return false;
2354 // First, learn unreachability by analyzing function IR.
2355 if (!F->isDeclaration()) {
2356 // A function must be unreachable if its entry block ends with an
2357 // 'unreachable'.
2358 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2359 }
2360 // Learn unreachability from ExportSummary if ExportSummary is present.
2361 return ExportSummary &&
2363 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2364}
2365
2366bool DevirtModule::run() {
2367 // If only some of the modules were split, we cannot correctly perform
2368 // this transformation. We already checked for the presense of type tests
2369 // with partially split modules during the thin link, and would have emitted
2370 // an error if any were found, so here we can simply return.
2371 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2372 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2373 return false;
2374
2375 Function *PublicTypeTestFunc = nullptr;
2376 // If we are in speculative devirtualization mode, we can work on the public
2377 // type test intrinsics.
2379 PublicTypeTestFunc =
2380 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
2381 Function *TypeTestFunc =
2382 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
2383 Function *TypeCheckedLoadFunc =
2384 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
2385 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
2386 &M, Intrinsic::type_checked_load_relative);
2387 Function *AssumeFunc =
2388 Intrinsic::getDeclarationIfExists(&M, Intrinsic::assume);
2389
2390 // Normally if there are no users of the devirtualization intrinsics in the
2391 // module, this pass has nothing to do. But if we are exporting, we also need
2392 // to handle any users that appear only in the function summaries.
2393 if (!ExportSummary &&
2394 (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
2395 (!TypeTestFunc || TypeTestFunc->use_empty())) ||
2396 !AssumeFunc || AssumeFunc->use_empty()) &&
2397 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2398 (!TypeCheckedLoadRelativeFunc ||
2399 TypeCheckedLoadRelativeFunc->use_empty()))
2400 return false;
2401
2402 // Rebuild type metadata into a map for easy lookup.
2403 std::vector<VTableBits> Bits;
2404 DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
2405 buildTypeIdentifierMap(Bits, TypeIdMap);
2406
2407 if (PublicTypeTestFunc && AssumeFunc)
2408 scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
2409
2410 if (TypeTestFunc && AssumeFunc)
2411 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2412
2413 if (TypeCheckedLoadFunc)
2414 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2415
2416 if (TypeCheckedLoadRelativeFunc)
2417 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2418
2419 if (ImportSummary) {
2420 for (auto &S : CallSlots)
2421 importResolution(S.first, S.second);
2422
2423 removeRedundantTypeTests();
2424
2425 // We have lowered or deleted the type intrinsics, so we will no longer have
2426 // enough information to reason about the liveness of virtual function
2427 // pointers in GlobalDCE.
2428 for (GlobalVariable &GV : M.globals())
2429 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2430
2431 // The rest of the code is only necessary when exporting or during regular
2432 // LTO, so we are done.
2433 return true;
2434 }
2435
2436 if (TypeIdMap.empty())
2437 return true;
2438
2439 // Collect information from summary about which calls to try to devirtualize.
2440 if (ExportSummary) {
2441 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2442 for (auto &P : TypeIdMap) {
2443 if (auto *TypeId = dyn_cast<MDString>(P.first))
2445 TypeId->getString())]
2446 .push_back(TypeId);
2447 }
2448
2449 for (auto &P : *ExportSummary) {
2450 for (auto &S : P.second.getSummaryList()) {
2451 auto *FS = dyn_cast<FunctionSummary>(S.get());
2452 if (!FS)
2453 continue;
2454 // FIXME: Only add live functions.
2455 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2456 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2457 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2458 }
2459 }
2460 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2461 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2462 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2463 }
2464 }
2465 for (const FunctionSummary::ConstVCall &VC :
2466 FS->type_test_assume_const_vcalls()) {
2467 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2468 CallSlots[{MD, VC.VFunc.Offset}]
2469 .ConstCSInfo[VC.Args]
2470 .addSummaryTypeTestAssumeUser(FS);
2471 }
2472 }
2473 for (const FunctionSummary::ConstVCall &VC :
2474 FS->type_checked_load_const_vcalls()) {
2475 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2476 CallSlots[{MD, VC.VFunc.Offset}]
2477 .ConstCSInfo[VC.Args]
2478 .addSummaryTypeCheckedLoadUser(FS);
2479 }
2480 }
2481 }
2482 }
2483 }
2484
2485 // For each (type, offset) pair:
2486 bool DidVirtualConstProp = false;
2487 std::map<std::string, GlobalValue *> DevirtTargets;
2488 for (auto &S : CallSlots) {
2489 // Search each of the members of the type identifier for the virtual
2490 // function implementation at offset S.first.ByteOffset, and add to
2491 // TargetsForSlot.
2492 std::vector<VirtualCallTarget> TargetsForSlot;
2493 WholeProgramDevirtResolution *Res = nullptr;
2494 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2495 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2496 TypeMemberInfos.size())
2497 // For any type id used on a global's type metadata, create the type id
2498 // summary resolution regardless of whether we can devirtualize, so that
2499 // lower type tests knows the type id is not Unsat. If it was not used on
2500 // a global's type metadata, the TypeIdMap entry set will be empty, and
2501 // we don't want to create an entry (with the default Unknown type
2502 // resolution), which can prevent detection of the Unsat.
2503 Res = &ExportSummary
2504 ->getOrInsertTypeIdSummary(
2505 cast<MDString>(S.first.TypeID)->getString())
2506 .WPDRes[S.first.ByteOffset];
2507 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2508 S.first.ByteOffset, ExportSummary)) {
2509 bool SingleImplDevirt =
2510 trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
2511 // Out of speculative devirtualization mode, Try to apply virtual constant
2512 // propagation or branch funneling.
2513 // TODO: This should eventually be enabled for non-public type tests.
2514 if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
2515 DidVirtualConstProp |=
2516 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2517
2518 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2519 }
2520
2521 // Collect functions devirtualized at least for one call site for stats.
2522 if (RemarksEnabled || AreStatisticsEnabled())
2523 for (const auto &T : TargetsForSlot)
2524 if (T.WasDevirt)
2525 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2526 }
2527
2528 // CFI-specific: if we are exporting and any llvm.type.checked.load
2529 // intrinsics were *not* devirtualized, we need to add the resulting
2530 // llvm.type.test intrinsics to the function summaries so that the
2531 // LowerTypeTests pass will export them.
2532 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2534 cast<MDString>(S.first.TypeID)->getString());
2535 auto AddTypeTestsForTypeCheckedLoads = [&](CallSiteInfo &CSI) {
2536 if (!CSI.AllCallSitesDevirted)
2537 for (auto *FS : CSI.SummaryTypeCheckedLoadUsers)
2538 FS->addTypeTest(GUID);
2539 };
2540 AddTypeTestsForTypeCheckedLoads(S.second.CSInfo);
2541 for (auto &CCS : S.second.ConstCSInfo)
2542 AddTypeTestsForTypeCheckedLoads(CCS.second);
2543 }
2544 }
2545
2546 if (RemarksEnabled) {
2547 // Generate remarks for each devirtualized function.
2548 for (const auto &DT : DevirtTargets) {
2549 GlobalValue *GV = DT.second;
2550 auto *F = dyn_cast<Function>(GV);
2551 if (!F) {
2552 auto *A = dyn_cast<GlobalAlias>(GV);
2553 assert(A && isa<Function>(A->getAliasee()));
2554 F = dyn_cast<Function>(A->getAliasee());
2555 assert(F);
2556 }
2557
2558 using namespace ore;
2559 OREGetter(*F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2560 << "devirtualized " << NV("FunctionName", DT.first));
2561 }
2562 }
2563
2564 NumDevirtTargets += DevirtTargets.size();
2565
2566 removeRedundantTypeTests();
2567
2568 // Rebuild each global we touched as part of virtual constant propagation to
2569 // include the before and after bytes.
2570 if (DidVirtualConstProp)
2571 for (VTableBits &B : Bits)
2572 rebuildGlobal(B);
2573
2574 // We have lowered or deleted the type intrinsics, so we will no longer have
2575 // enough information to reason about the liveness of virtual function
2576 // pointers in GlobalDCE.
2577 for (GlobalVariable &GV : M.globals())
2578 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2579
2580 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2581 CI->eraseFromParent();
2582
2583 return true;
2584}
2585
2586void DevirtIndex::run() {
2587 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2588 return;
2589
2590 // Assert that we haven't made any changes that would affect the hasLocal()
2591 // flag on the GUID summary info.
2592 assert(!ExportSummary.withInternalizeAndPromote() &&
2593 "Expect index-based WPD to run before internalization and promotion");
2594
2595 DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
2596 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2597 NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back(
2598 P.first);
2599 // Create the type id summary resolution regardlness of whether we can
2600 // devirtualize, so that lower type tests knows the type id is used on
2601 // a global and not Unsat. We do this here rather than in the loop over the
2602 // CallSlots, since that handling will only see type tests that directly
2603 // feed assumes, and we would miss any that aren't currently handled by WPD
2604 // (such as type tests that feed assumes via phis).
2605 ExportSummary.getOrInsertTypeIdSummary(P.first);
2606 }
2607
2608 // Collect information from summary about which calls to try to devirtualize.
2609 for (auto &P : ExportSummary) {
2610 for (auto &S : P.second.getSummaryList()) {
2611 auto *FS = dyn_cast<FunctionSummary>(S.get());
2612 if (!FS)
2613 continue;
2614 // FIXME: Only add live functions.
2615 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2616 for (StringRef Name : NameByGUID[VF.GUID]) {
2617 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2618 }
2619 }
2620 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2621 for (StringRef Name : NameByGUID[VF.GUID]) {
2622 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2623 }
2624 }
2625 for (const FunctionSummary::ConstVCall &VC :
2626 FS->type_test_assume_const_vcalls()) {
2627 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2628 CallSlots[{Name, VC.VFunc.Offset}]
2629 .ConstCSInfo[VC.Args]
2630 .addSummaryTypeTestAssumeUser(FS);
2631 }
2632 }
2633 for (const FunctionSummary::ConstVCall &VC :
2634 FS->type_checked_load_const_vcalls()) {
2635 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2636 CallSlots[{Name, VC.VFunc.Offset}]
2637 .ConstCSInfo[VC.Args]
2638 .addSummaryTypeCheckedLoadUser(FS);
2639 }
2640 }
2641 }
2642 }
2643
2644 std::set<ValueInfo> DevirtTargets;
2645 // For each (type, offset) pair:
2646 for (auto &S : CallSlots) {
2647 // Search each of the members of the type identifier for the virtual
2648 // function implementation at offset S.first.ByteOffset, and add to
2649 // TargetsForSlot.
2650 std::vector<ValueInfo> TargetsForSlot;
2651 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2652 assert(TidSummary);
2653 // The type id summary would have been created while building the NameByGUID
2654 // map earlier.
2655 WholeProgramDevirtResolution *Res =
2656 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2657 ->WPDRes[S.first.ByteOffset];
2658 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2659 S.first.ByteOffset)) {
2660
2661 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2662 DevirtTargets))
2663 continue;
2664 }
2665 }
2666
2667 // Optionally have the thin link print message for each devirtualized
2668 // function.
2670 for (const auto &DT : DevirtTargets)
2671 errs() << "Devirtualized call to " << DT << "\n";
2672
2673 NumDevirtTargets += DevirtTargets.size();
2674}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil translate DXIL Translate Metadata
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Provides passes for computing function attributes based on interprocedural analyses.
@ CallSiteInfo
#define DEBUG_TYPE
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, bool Skip)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
#define T
static bool mustBeUnreachableFunction(const Function &F)
This is the interface to build a ModuleSummaryIndex for a module.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static bool addCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< bool > WholeProgramDevirtKeepUnreachableFunction("wholeprogramdevirt-keep-unreachable-function", cl::desc("Regard unreachable functions as possible devirtualize targets."), cl::Hidden, cl::init(true))
With Clang, a pure virtual class's deleting destructor is emitted as a llvm.trap intrinsic followed b...
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static cl::opt< unsigned > WholeProgramDevirtCutoff("wholeprogramdevirt-cutoff", cl::desc("Max number of devirtualizations for devirt module pass"), cl::init(0))
If explicitly specified, the devirt module pass will stop transformation once the total number of dev...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setCallingConv(CallingConv::ID CC)
bool arg_empty() const
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition Constants.h:486
const Constant * stripPointerCasts() const
Definition Constant.h:219
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool empty() const
Definition DenseMap.h:109
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition Error.h:334
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Type * getParamType(unsigned i) const
Parameter type accessors.
ArrayRef< Type * > params() const
bool isVarArg() const
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
bool empty() const
Definition Function.h:857
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
bool arg_empty() const
Definition Function.h:900
const BasicBlock & front() const
Definition Function.h:858
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
arg_iterator arg_begin()
Definition Function.h:866
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
unsigned getInstructionCount() const
Returns the number of non-debug IR instructions in this function.
Definition Function.cpp:367
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:598
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition Value.h:602
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI VCallVisibility getVCallVisibility() const
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI void setVCallVisibilityMetadata(VCallVisibility Visibility)
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
void setVisibility(VisibilityTypes V)
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition ModRef.h:215
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
Target - Wrapper for Target specific information.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
bool use_empty() const
Definition Value.h:346
LLVM_ABI bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
LLVM_ABI uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
LLVM_ABI void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
LLVM_ABI void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
@ Export
Export information to summary.
Definition IPO.h:57
@ None
Do nothing.
Definition IPO.h:55
@ Import
Import information from summary.
Definition IPO.h:56
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const ModuleToSummariesForIndexTy *ModuleToSummariesForIndex=nullptr, const GVSummaryPtrSet *DecSummaries=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
@ invalid_argument
Definition Errc.h:56
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
LLVM_ABI void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
LLVM_ABI void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
LLVM_ABI CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName)
Analogous to setExplicitlyUnknownBranchWeights, but for functions and their entry counts.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
LLVM_ABI void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition Error.h:1245
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< bool > ClDevirtualizeSpeculatively("devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
LLVM_ABI void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
LLVM_ABI void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
const ModuleSummaryIndex * ImportSummary
ModuleSummaryIndex * ExportSummary
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
LLVM_ABI VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)