LLVM 18.0.0git
WholeProgramDevirt.cpp
Go to the documentation of this file.
1//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements whole program optimization of virtual calls in cases
10// where we know (via !type metadata) that the list of callees is fixed. This
11// includes the following:
12// - Single implementation devirtualization: if a virtual call has a single
13// possible callee, replace all calls with a direct call to that callee.
14// - Virtual constant propagation: if the virtual function's return type is an
15// integer <=64 bits and all possible callees are readnone, for each class and
16// each list of constant arguments: evaluate the function, store the return
17// value alongside the virtual table, and rewrite each virtual call as a load
18// from the virtual table.
19// - Uniform return value optimization: if the conditions for virtual constant
20// propagation hold and each function returns the same constant value, replace
21// each virtual call with that constant.
22// - Unique return value optimization for i1 return values: if the conditions
23// for virtual constant propagation hold and a single vtable's function
24// returns 0, or a single vtable's function returns 1, replace each virtual
25// call with a comparison of the vptr against that vtable's address.
26//
27// This pass is intended to be used during the regular and thin LTO pipelines:
28//
29// During regular LTO, the pass determines the best optimization for each
30// virtual call and applies the resolutions directly to virtual calls that are
31// eligible for virtual call optimization (i.e. calls that use either of the
32// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
33//
34// During hybrid Regular/ThinLTO, the pass operates in two phases:
35// - Export phase: this is run during the thin link over a single merged module
36// that contains all vtables with !type metadata that participate in the link.
37// The pass computes a resolution for each virtual call and stores it in the
38// type identifier summary.
39// - Import phase: this is run during the thin backends over the individual
40// modules. The pass applies the resolutions previously computed during the
41// import phase to each eligible virtual call.
42//
43// During ThinLTO, the pass operates in two phases:
44// - Export phase: this is run during the thin link over the index which
45// contains a summary of all vtables with !type metadata that participate in
46// the link. It computes a resolution for each virtual call and stores it in
47// the type identifier summary. Only single implementation devirtualization
48// is supported.
49// - Import phase: (same as with hybrid case above).
50//
51//===----------------------------------------------------------------------===//
52
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/MapVector.h"
60#include "llvm/ADT/Statistic.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/Dominators.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/InstrTypes.h"
78#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Intrinsics.h"
81#include "llvm/IR/LLVMContext.h"
82#include "llvm/IR/MDBuilder.h"
83#include "llvm/IR/Metadata.h"
84#include "llvm/IR/Module.h"
88#include "llvm/Support/Errc.h"
89#include "llvm/Support/Error.h"
94#include "llvm/Transforms/IPO.h"
99#include <algorithm>
100#include <cstddef>
101#include <map>
102#include <set>
103#include <string>
104
105using namespace llvm;
106using namespace wholeprogramdevirt;
107
108#define DEBUG_TYPE "wholeprogramdevirt"
109
110STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
111STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
112STATISTIC(NumBranchFunnel, "Number of branch funnels");
113STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
114STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
115STATISTIC(NumVirtConstProp1Bit,
116 "Number of 1 bit virtual constant propagations");
117STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
118
120 "wholeprogramdevirt-summary-action",
121 cl::desc("What to do with the summary when running this pass"),
122 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
123 clEnumValN(PassSummaryAction::Import, "import",
124 "Import typeid resolutions from summary and globals"),
125 clEnumValN(PassSummaryAction::Export, "export",
126 "Export typeid resolutions to summary and globals")),
127 cl::Hidden);
128
130 "wholeprogramdevirt-read-summary",
131 cl::desc(
132 "Read summary from given bitcode or YAML file before running pass"),
133 cl::Hidden);
134
136 "wholeprogramdevirt-write-summary",
137 cl::desc("Write summary to given bitcode or YAML file after running pass. "
138 "Output file format is deduced from extension: *.bc means writing "
139 "bitcode, otherwise YAML"),
140 cl::Hidden);
141
143 ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
144 cl::init(10),
145 cl::desc("Maximum number of call targets per "
146 "call site to enable branch funnels"));
147
148static cl::opt<bool>
149 PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
150 cl::desc("Print index-based devirtualization messages"));
151
152/// Provide a way to force enable whole program visibility in tests.
153/// This is needed to support legacy tests that don't contain
154/// !vcall_visibility metadata (the mere presense of type tests
155/// previously implied hidden visibility).
156static cl::opt<bool>
157 WholeProgramVisibility("whole-program-visibility", cl::Hidden,
158 cl::desc("Enable whole program visibility"));
159
160/// Provide a way to force disable whole program for debugging or workarounds,
161/// when enabled via the linker.
163 "disable-whole-program-visibility", cl::Hidden,
164 cl::desc("Disable whole program visibility (overrides enabling options)"));
165
166/// Provide way to prevent certain function from being devirtualized
168 SkipFunctionNames("wholeprogramdevirt-skip",
169 cl::desc("Prevent function(s) from being devirtualized"),
171
172/// Mechanism to add runtime checking of devirtualization decisions, optionally
173/// trapping or falling back to indirect call on any that are not correct.
174/// Trapping mode is useful for debugging undefined behavior leading to failures
175/// with WPD. Fallback mode is useful for ensuring safety when whole program
176/// visibility may be compromised.
179 "wholeprogramdevirt-check", cl::Hidden,
180 cl::desc("Type of checking for incorrect devirtualizations"),
181 cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
182 clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
184 "Fallback to indirect when incorrect")));
185
186namespace {
187struct PatternList {
188 std::vector<GlobPattern> Patterns;
189 template <class T> void init(const T &StringList) {
190 for (const auto &S : StringList)
192 Patterns.push_back(std::move(*Pat));
193 }
194 bool match(StringRef S) {
195 for (const GlobPattern &P : Patterns)
196 if (P.match(S))
197 return true;
198 return false;
199 }
200};
201} // namespace
202
203// Find the minimum offset that we may store a value of size Size bits at. If
204// IsAfter is set, look for an offset before the object, otherwise look for an
205// offset after the object.
208 bool IsAfter, uint64_t Size) {
209 // Find a minimum offset taking into account only vtable sizes.
210 uint64_t MinByte = 0;
211 for (const VirtualCallTarget &Target : Targets) {
212 if (IsAfter)
213 MinByte = std::max(MinByte, Target.minAfterBytes());
214 else
215 MinByte = std::max(MinByte, Target.minBeforeBytes());
216 }
217
218 // Build a vector of arrays of bytes covering, for each target, a slice of the
219 // used region (see AccumBitVector::BytesUsed in
220 // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
221 // this aligns the used regions to start at MinByte.
222 //
223 // In this example, A, B and C are vtables, # is a byte already allocated for
224 // a virtual function pointer, AAAA... (etc.) are the used regions for the
225 // vtables and Offset(X) is the value computed for the Offset variable below
226 // for X.
227 //
228 // Offset(A)
229 // | |
230 // |MinByte
231 // A: ################AAAAAAAA|AAAAAAAA
232 // B: ########BBBBBBBBBBBBBBBB|BBBB
233 // C: ########################|CCCCCCCCCCCCCCCC
234 // | Offset(B) |
235 //
236 // This code produces the slices of A, B and C that appear after the divider
237 // at MinByte.
238 std::vector<ArrayRef<uint8_t>> Used;
239 for (const VirtualCallTarget &Target : Targets) {
240 ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
241 : Target.TM->Bits->Before.BytesUsed;
242 uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
243 : MinByte - Target.minBeforeBytes();
244
245 // Disregard used regions that are smaller than Offset. These are
246 // effectively all-free regions that do not need to be checked.
247 if (VTUsed.size() > Offset)
248 Used.push_back(VTUsed.slice(Offset));
249 }
250
251 if (Size == 1) {
252 // Find a free bit in each member of Used.
253 for (unsigned I = 0;; ++I) {
254 uint8_t BitsUsed = 0;
255 for (auto &&B : Used)
256 if (I < B.size())
257 BitsUsed |= B[I];
258 if (BitsUsed != 0xff)
259 return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
260 }
261 } else {
262 // Find a free (Size/8) byte region in each member of Used.
263 // FIXME: see if alignment helps.
264 for (unsigned I = 0;; ++I) {
265 for (auto &&B : Used) {
266 unsigned Byte = 0;
267 while ((I + Byte) < B.size() && Byte < (Size / 8)) {
268 if (B[I + Byte])
269 goto NextI;
270 ++Byte;
271 }
272 }
273 return (MinByte + I) * 8;
274 NextI:;
275 }
276 }
277}
278
280 MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
281 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
282 if (BitWidth == 1)
283 OffsetByte = -(AllocBefore / 8 + 1);
284 else
285 OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
286 OffsetBit = AllocBefore % 8;
287
288 for (VirtualCallTarget &Target : Targets) {
289 if (BitWidth == 1)
290 Target.setBeforeBit(AllocBefore);
291 else
292 Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
293 }
294}
295
298 unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
299 if (BitWidth == 1)
300 OffsetByte = AllocAfter / 8;
301 else
302 OffsetByte = (AllocAfter + 7) / 8;
303 OffsetBit = AllocAfter % 8;
304
305 for (VirtualCallTarget &Target : Targets) {
306 if (BitWidth == 1)
307 Target.setAfterBit(AllocAfter);
308 else
309 Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
310 }
311}
312
314 : Fn(Fn), TM(TM),
315 IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()),
316 WasDevirt(false) {}
317
318namespace {
319
320// A slot in a set of virtual tables. The TypeID identifies the set of virtual
321// tables, and the ByteOffset is the offset in bytes from the address point to
322// the virtual function pointer.
323struct VTableSlot {
325 uint64_t ByteOffset;
326};
327
328} // end anonymous namespace
329
330namespace llvm {
331
332template <> struct DenseMapInfo<VTableSlot> {
333 static VTableSlot getEmptyKey() {
336 }
337 static VTableSlot getTombstoneKey() {
340 }
341 static unsigned getHashValue(const VTableSlot &I) {
344 }
345 static bool isEqual(const VTableSlot &LHS,
346 const VTableSlot &RHS) {
347 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
348 }
349};
350
351template <> struct DenseMapInfo<VTableSlotSummary> {
355 }
359 }
360 static unsigned getHashValue(const VTableSlotSummary &I) {
363 }
364 static bool isEqual(const VTableSlotSummary &LHS,
365 const VTableSlotSummary &RHS) {
366 return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
367 }
368};
369
370} // end namespace llvm
371
372// Returns true if the function must be unreachable based on ValueInfo.
373//
374// In particular, identifies a function as unreachable in the following
375// conditions
376// 1) All summaries are live.
377// 2) All function summaries indicate it's unreachable
378// 3) There is no non-function with the same GUID (which is rare)
380 if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
381 // Returns false if ValueInfo is absent, or the summary list is empty
382 // (e.g., function declarations).
383 return false;
384 }
385
386 for (const auto &Summary : TheFnVI.getSummaryList()) {
387 // Conservatively returns false if any non-live functions are seen.
388 // In general either all summaries should be live or all should be dead.
389 if (!Summary->isLive())
390 return false;
391 if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
392 if (!FS->fflags().MustBeUnreachable)
393 return false;
394 }
395 // Be conservative if a non-function has the same GUID (which is rare).
396 else
397 return false;
398 }
399 // All function summaries are live and all of them agree that the function is
400 // unreachble.
401 return true;
402}
403
404namespace {
405// A virtual call site. VTable is the loaded virtual table pointer, and CS is
406// the indirect virtual call.
407struct VirtualCallSite {
408 Value *VTable = nullptr;
409 CallBase &CB;
410
411 // If non-null, this field points to the associated unsafe use count stored in
412 // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
413 // of that field for details.
414 unsigned *NumUnsafeUses = nullptr;
415
416 void
417 emitRemark(const StringRef OptName, const StringRef TargetName,
419 Function *F = CB.getCaller();
420 DebugLoc DLoc = CB.getDebugLoc();
421 BasicBlock *Block = CB.getParent();
422
423 using namespace ore;
424 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
425 << NV("Optimization", OptName)
426 << ": devirtualized a call to "
427 << NV("FunctionName", TargetName));
428 }
429
430 void replaceAndErase(
431 const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
433 Value *New) {
434 if (RemarksEnabled)
435 emitRemark(OptName, TargetName, OREGetter);
436 CB.replaceAllUsesWith(New);
437 if (auto *II = dyn_cast<InvokeInst>(&CB)) {
438 BranchInst::Create(II->getNormalDest(), &CB);
439 II->getUnwindDest()->removePredecessor(II->getParent());
440 }
441 CB.eraseFromParent();
442 // This use is no longer unsafe.
443 if (NumUnsafeUses)
444 --*NumUnsafeUses;
445 }
446};
447
448// Call site information collected for a specific VTableSlot and possibly a list
449// of constant integer arguments. The grouping by arguments is handled by the
450// VTableSlotInfo class.
451struct CallSiteInfo {
452 /// The set of call sites for this slot. Used during regular LTO and the
453 /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
454 /// call sites that appear in the merged module itself); in each of these
455 /// cases we are directly operating on the call sites at the IR level.
456 std::vector<VirtualCallSite> CallSites;
457
458 /// Whether all call sites represented by this CallSiteInfo, including those
459 /// in summaries, have been devirtualized. This starts off as true because a
460 /// default constructed CallSiteInfo represents no call sites.
461 bool AllCallSitesDevirted = true;
462
463 // These fields are used during the export phase of ThinLTO and reflect
464 // information collected from function summaries.
465
466 /// Whether any function summary contains an llvm.assume(llvm.type.test) for
467 /// this slot.
468 bool SummaryHasTypeTestAssumeUsers = false;
469
470 /// CFI-specific: a vector containing the list of function summaries that use
471 /// the llvm.type.checked.load intrinsic and therefore will require
472 /// resolutions for llvm.type.test in order to implement CFI checks if
473 /// devirtualization was unsuccessful. If devirtualization was successful, the
474 /// pass will clear this vector by calling markDevirt(). If at the end of the
475 /// pass the vector is non-empty, we will need to add a use of llvm.type.test
476 /// to each of the function summaries in the vector.
477 std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
478 std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
479
480 bool isExported() const {
481 return SummaryHasTypeTestAssumeUsers ||
482 !SummaryTypeCheckedLoadUsers.empty();
483 }
484
485 void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
486 SummaryTypeCheckedLoadUsers.push_back(FS);
487 AllCallSitesDevirted = false;
488 }
489
490 void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
491 SummaryTypeTestAssumeUsers.push_back(FS);
492 SummaryHasTypeTestAssumeUsers = true;
493 AllCallSitesDevirted = false;
494 }
495
496 void markDevirt() {
497 AllCallSitesDevirted = true;
498
499 // As explained in the comment for SummaryTypeCheckedLoadUsers.
500 SummaryTypeCheckedLoadUsers.clear();
501 }
502};
503
504// Call site information collected for a specific VTableSlot.
505struct VTableSlotInfo {
506 // The set of call sites which do not have all constant integer arguments
507 // (excluding "this").
508 CallSiteInfo CSInfo;
509
510 // The set of call sites with all constant integer arguments (excluding
511 // "this"), grouped by argument list.
512 std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
513
514 void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
515
516private:
517 CallSiteInfo &findCallSiteInfo(CallBase &CB);
518};
519
520CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
521 std::vector<uint64_t> Args;
522 auto *CBType = dyn_cast<IntegerType>(CB.getType());
523 if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
524 return CSInfo;
525 for (auto &&Arg : drop_begin(CB.args())) {
526 auto *CI = dyn_cast<ConstantInt>(Arg);
527 if (!CI || CI->getBitWidth() > 64)
528 return CSInfo;
529 Args.push_back(CI->getZExtValue());
530 }
531 return ConstCSInfo[Args];
532}
533
534void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
535 unsigned *NumUnsafeUses) {
536 auto &CSI = findCallSiteInfo(CB);
537 CSI.AllCallSitesDevirted = false;
538 CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
539}
540
541struct DevirtModule {
542 Module &M;
543 function_ref<AAResults &(Function &)> AARGetter;
544 function_ref<DominatorTree &(Function &)> LookupDomTree;
545
546 ModuleSummaryIndex *ExportSummary;
547 const ModuleSummaryIndex *ImportSummary;
548
549 IntegerType *Int8Ty;
550 PointerType *Int8PtrTy;
552 IntegerType *Int64Ty;
553 IntegerType *IntPtrTy;
554 /// Sizeless array type, used for imported vtables. This provides a signal
555 /// to analyzers that these imports may alias, as they do for example
556 /// when multiple unique return values occur in the same vtable.
557 ArrayType *Int8Arr0Ty;
558
559 bool RemarksEnabled;
561
563
564 // Calls that have already been optimized. We may add a call to multiple
565 // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
566 // optimize a call more than once.
567 SmallPtrSet<CallBase *, 8> OptimizedCalls;
568
569 // Store calls that had their ptrauth bundle removed. They are to be deleted
570 // at the end of the optimization.
571 SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
572
573 // This map keeps track of the number of "unsafe" uses of a loaded function
574 // pointer. The key is the associated llvm.type.test intrinsic call generated
575 // by this pass. An unsafe use is one that calls the loaded function pointer
576 // directly. Every time we eliminate an unsafe use (for example, by
577 // devirtualizing it or by applying virtual constant propagation), we
578 // decrement the value stored in this map. If a value reaches zero, we can
579 // eliminate the type check by RAUWing the associated llvm.type.test call with
580 // true.
581 std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
582 PatternList FunctionsToSkip;
583
584 DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
586 function_ref<DominatorTree &(Function &)> LookupDomTree,
587 ModuleSummaryIndex *ExportSummary,
588 const ModuleSummaryIndex *ImportSummary)
589 : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
590 ExportSummary(ExportSummary), ImportSummary(ImportSummary),
591 Int8Ty(Type::getInt8Ty(M.getContext())),
592 Int8PtrTy(PointerType::getUnqual(M.getContext())),
593 Int32Ty(Type::getInt32Ty(M.getContext())),
594 Int64Ty(Type::getInt64Ty(M.getContext())),
595 IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
596 Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
597 RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
598 assert(!(ExportSummary && ImportSummary));
599 FunctionsToSkip.init(SkipFunctionNames);
600 }
601
602 bool areRemarksEnabled();
603
604 void
605 scanTypeTestUsers(Function *TypeTestFunc,
606 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
607 void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
608
609 void buildTypeIdentifierMap(
610 std::vector<VTableBits> &Bits,
611 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
612
613 bool
614 tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
615 const std::set<TypeMemberInfo> &TypeMemberInfos,
616 uint64_t ByteOffset,
617 ModuleSummaryIndex *ExportSummary);
618
619 void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
620 bool &IsExported);
621 bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
623 VTableSlotInfo &SlotInfo,
625
626 void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
627 bool &IsExported);
628 void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
629 VTableSlotInfo &SlotInfo,
630 WholeProgramDevirtResolution *Res, VTableSlot Slot);
631
632 bool tryEvaluateFunctionsWithArgs(
634 ArrayRef<uint64_t> Args);
635
636 void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
637 uint64_t TheRetVal);
638 bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
639 CallSiteInfo &CSInfo,
641
642 // Returns the global symbol name that is used to export information about the
643 // given vtable slot and list of arguments.
644 std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
646
647 bool shouldExportConstantsAsAbsoluteSymbols();
648
649 // This function is called during the export phase to create a symbol
650 // definition containing information about the given vtable slot and list of
651 // arguments.
652 void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
653 Constant *C);
654 void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
655 uint32_t Const, uint32_t &Storage);
656
657 // This function is called during the import phase to create a reference to
658 // the symbol definition created during the export phase.
659 Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
661 Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
662 StringRef Name, IntegerType *IntTy,
663 uint32_t Storage);
664
665 Constant *getMemberAddr(const TypeMemberInfo *M);
666
667 void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
668 Constant *UniqueMemberAddr);
669 bool tryUniqueRetValOpt(unsigned BitWidth,
671 CallSiteInfo &CSInfo,
673 VTableSlot Slot, ArrayRef<uint64_t> Args);
674
675 void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
676 Constant *Byte, Constant *Bit);
677 bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
678 VTableSlotInfo &SlotInfo,
679 WholeProgramDevirtResolution *Res, VTableSlot Slot);
680
681 void rebuildGlobal(VTableBits &B);
682
683 // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
684 void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
685
686 // If we were able to eliminate all unsafe uses for a type checked load,
687 // eliminate the associated type tests by replacing them with true.
688 void removeRedundantTypeTests();
689
690 bool run();
691
692 // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
693 //
694 // Caller guarantees that `ExportSummary` is not nullptr.
695 static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
696 ModuleSummaryIndex *ExportSummary);
697
698 // Returns true if the function definition must be unreachable.
699 //
700 // Note if this helper function returns true, `F` is guaranteed
701 // to be unreachable; if it returns false, `F` might still
702 // be unreachable but not covered by this helper function.
703 //
704 // Implementation-wise, if function definition is present, IR is analyzed; if
705 // not, look up function flags from ExportSummary as a fallback.
706 static bool mustBeUnreachableFunction(Function *const F,
707 ModuleSummaryIndex *ExportSummary);
708
709 // Lower the module using the action and summary passed as command line
710 // arguments. For testing purposes only.
711 static bool
712 runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
714 function_ref<DominatorTree &(Function &)> LookupDomTree);
715};
716
717struct DevirtIndex {
718 ModuleSummaryIndex &ExportSummary;
719 // The set in which to record GUIDs exported from their module by
720 // devirtualization, used by client to ensure they are not internalized.
721 std::set<GlobalValue::GUID> &ExportedGUIDs;
722 // A map in which to record the information necessary to locate the WPD
723 // resolution for local targets in case they are exported by cross module
724 // importing.
725 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
726
728
729 PatternList FunctionsToSkip;
730
731 DevirtIndex(
732 ModuleSummaryIndex &ExportSummary,
733 std::set<GlobalValue::GUID> &ExportedGUIDs,
734 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
735 : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
736 LocalWPDTargetsMap(LocalWPDTargetsMap) {
737 FunctionsToSkip.init(SkipFunctionNames);
738 }
739
740 bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
741 const TypeIdCompatibleVtableInfo TIdInfo,
742 uint64_t ByteOffset);
743
744 bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
745 VTableSlotSummary &SlotSummary,
746 VTableSlotInfo &SlotInfo,
748 std::set<ValueInfo> &DevirtTargets);
749
750 void run();
751};
752} // end anonymous namespace
753
756 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
757 auto AARGetter = [&](Function &F) -> AAResults & {
758 return FAM.getResult<AAManager>(F);
759 };
760 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
762 };
763 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
765 };
766 if (UseCommandLine) {
767 if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
768 return PreservedAnalyses::all();
770 }
771 if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
772 ImportSummary)
773 .run())
774 return PreservedAnalyses::all();
776}
777
778// Enable whole program visibility if enabled by client (e.g. linker) or
779// internal option, and not force disabled.
780bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
781 return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
783}
784
785static bool
787 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
788 // TypeID for member function pointer type is an internal construct
789 // and won't exist in IsVisibleToRegularObj. The full TypeID
790 // will be present and participate in invalidation.
791 if (TypeID.ends_with(".virtual"))
792 return false;
793
794 // TypeID that doesn't start with Itanium mangling (_ZTS) will be
795 // non-externally visible types which cannot interact with
796 // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
797 if (!TypeID.consume_front("_ZTS"))
798 return false;
799
800 // TypeID is keyed off the type name symbol (_ZTS). However, the native
801 // object may not contain this symbol if it does not contain a key
802 // function for the base type and thus only contains a reference to the
803 // type info (_ZTI). To catch this case we query using the type info
804 // symbol corresponding to the TypeID.
805 std::string typeInfo = ("_ZTI" + TypeID).str();
806 return IsVisibleToRegularObj(typeInfo);
807}
808
809static bool
811 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
813 GV.getMetadata(LLVMContext::MD_type, Types);
814
815 for (auto Type : Types)
816 if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
817 return typeIDVisibleToRegularObj(TypeID->getString(),
818 IsVisibleToRegularObj);
819
820 return false;
821}
822
823/// If whole program visibility asserted, then upgrade all public vcall
824/// visibility metadata on vtable definitions to linkage unit visibility in
825/// Module IR (for regular or hybrid LTO).
827 Module &M, bool WholeProgramVisibilityEnabledInLTO,
828 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
829 bool ValidateAllVtablesHaveTypeInfos,
830 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
831 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
832 return;
833 for (GlobalVariable &GV : M.globals()) {
834 // Add linkage unit visibility to any variable with type metadata, which are
835 // the vtable definitions. We won't have an existing vcall_visibility
836 // metadata on vtable definitions with public visibility.
837 if (GV.hasMetadata(LLVMContext::MD_type) &&
839 // Don't upgrade the visibility for symbols exported to the dynamic
840 // linker, as we have no information on their eventual use.
841 !DynamicExportSymbols.count(GV.getGUID()) &&
842 // With validation enabled, we want to exclude symbols visible to
843 // regular objects. Local symbols will be in this group due to the
844 // current implementation but those with VCallVisibilityTranslationUnit
845 // will have already been marked in clang so are unaffected.
846 !(ValidateAllVtablesHaveTypeInfos &&
847 skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
849 }
850}
851
853 bool WholeProgramVisibilityEnabledInLTO) {
854 Function *PublicTypeTestFunc =
855 M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
856 if (!PublicTypeTestFunc)
857 return;
858 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {
859 Function *TypeTestFunc =
860 Intrinsic::getDeclaration(&M, Intrinsic::type_test);
861 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
862 auto *CI = cast<CallInst>(U.getUser());
863 auto *NewCI = CallInst::Create(
864 TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)},
865 std::nullopt, "", CI);
866 CI->replaceAllUsesWith(NewCI);
867 CI->eraseFromParent();
868 }
869 } else {
870 auto *True = ConstantInt::getTrue(M.getContext());
871 for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
872 auto *CI = cast<CallInst>(U.getUser());
873 CI->replaceAllUsesWith(True);
874 CI->eraseFromParent();
875 }
876 }
877}
878
879/// Based on typeID string, get all associated vtable GUIDS that are
880/// visible to regular objects.
883 DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
884 function_ref<bool(StringRef)> IsVisibleToRegularObj) {
885 for (const auto &typeID : Index.typeIdCompatibleVtableMap()) {
886 if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj))
887 for (const TypeIdOffsetVtableInfo &P : typeID.second)
888 VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
889 }
890}
891
892/// If whole program visibility asserted, then upgrade all public vcall
893/// visibility metadata on vtable definition summaries to linkage unit
894/// visibility in Module summary index (for ThinLTO).
896 ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
897 const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
898 const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
899 if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
900 return;
901 for (auto &P : Index) {
902 // Don't upgrade the visibility for symbols exported to the dynamic
903 // linker, as we have no information on their eventual use.
904 if (DynamicExportSymbols.count(P.first))
905 continue;
906 for (auto &S : P.second.SummaryList) {
907 auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
908 if (!GVar ||
909 GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
910 continue;
911 // With validation enabled, we want to exclude symbols visible to regular
912 // objects. Local symbols will be in this group due to the current
913 // implementation but those with VCallVisibilityTranslationUnit will have
914 // already been marked in clang so are unaffected.
915 if (VisibleToRegularObjSymbols.count(P.first))
916 continue;
917 GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
918 }
919 }
920}
921
923 ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
924 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
925 DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
926}
927
929 ModuleSummaryIndex &Summary,
930 function_ref<bool(StringRef, ValueInfo)> isExported,
931 std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
932 for (auto &T : LocalWPDTargetsMap) {
933 auto &VI = T.first;
934 // This was enforced earlier during trySingleImplDevirt.
935 assert(VI.getSummaryList().size() == 1 &&
936 "Devirt of local target has more than one copy");
937 auto &S = VI.getSummaryList()[0];
938 if (!isExported(S->modulePath(), VI))
939 continue;
940
941 // It's been exported by a cross module import.
942 for (auto &SlotSummary : T.second) {
943 auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
944 assert(TIdSum);
945 auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
946 assert(WPDRes != TIdSum->WPDRes.end());
947 WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
948 WPDRes->second.SingleImplName,
949 Summary.getModuleHash(S->modulePath()));
950 }
951 }
952}
953
955 // Check that summary index contains regular LTO module when performing
956 // export to prevent occasional use of index from pure ThinLTO compilation
957 // (-fno-split-lto-module). This kind of summary index is passed to
958 // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
959 const auto &ModPaths = Summary->modulePaths();
962 return createStringError(
964 "combined summary should contain Regular LTO module");
965 return ErrorSuccess();
966}
967
968bool DevirtModule::runForTesting(
969 Module &M, function_ref<AAResults &(Function &)> AARGetter,
971 function_ref<DominatorTree &(Function &)> LookupDomTree) {
972 std::unique_ptr<ModuleSummaryIndex> Summary =
973 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
974
975 // Handle the command-line summary arguments. This code is for testing
976 // purposes only, so we handle errors directly.
977 if (!ClReadSummary.empty()) {
978 ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
979 ": ");
980 auto ReadSummaryFile =
982 if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
983 getModuleSummaryIndex(*ReadSummaryFile)) {
984 Summary = std::move(*SummaryOrErr);
985 ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
986 } else {
987 // Try YAML if we've failed with bitcode.
988 consumeError(SummaryOrErr.takeError());
989 yaml::Input In(ReadSummaryFile->getBuffer());
990 In >> *Summary;
991 ExitOnErr(errorCodeToError(In.error()));
992 }
993 }
994
995 bool Changed =
996 DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
998 : nullptr,
1000 : nullptr)
1001 .run();
1002
1003 if (!ClWriteSummary.empty()) {
1004 ExitOnError ExitOnErr(
1005 "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
1006 std::error_code EC;
1007 if (StringRef(ClWriteSummary).endswith(".bc")) {
1009 ExitOnErr(errorCodeToError(EC));
1010 writeIndexToFile(*Summary, OS);
1011 } else {
1013 ExitOnErr(errorCodeToError(EC));
1014 yaml::Output Out(OS);
1015 Out << *Summary;
1016 }
1017 }
1018
1019 return Changed;
1020}
1021
1022void DevirtModule::buildTypeIdentifierMap(
1023 std::vector<VTableBits> &Bits,
1024 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1026 Bits.reserve(M.global_size());
1028 for (GlobalVariable &GV : M.globals()) {
1029 Types.clear();
1030 GV.getMetadata(LLVMContext::MD_type, Types);
1031 if (GV.isDeclaration() || Types.empty())
1032 continue;
1033
1034 VTableBits *&BitsPtr = GVToBits[&GV];
1035 if (!BitsPtr) {
1036 Bits.emplace_back();
1037 Bits.back().GV = &GV;
1038 Bits.back().ObjectSize =
1039 M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
1040 BitsPtr = &Bits.back();
1041 }
1042
1043 for (MDNode *Type : Types) {
1044 auto TypeID = Type->getOperand(1).get();
1045
1047 cast<ConstantInt>(
1048 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
1049 ->getZExtValue();
1050
1051 TypeIdMap[TypeID].insert({BitsPtr, Offset});
1052 }
1053 }
1054}
1055
1056bool DevirtModule::tryFindVirtualCallTargets(
1057 std::vector<VirtualCallTarget> &TargetsForSlot,
1058 const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
1059 ModuleSummaryIndex *ExportSummary) {
1060 for (const TypeMemberInfo &TM : TypeMemberInfos) {
1061 if (!TM.Bits->GV->isConstant())
1062 return false;
1063
1064 // We cannot perform whole program devirtualization analysis on a vtable
1065 // with public LTO visibility.
1066 if (TM.Bits->GV->getVCallVisibility() ==
1068 return false;
1069
1070 Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
1071 TM.Offset + ByteOffset, M, TM.Bits->GV);
1072 if (!Ptr)
1073 return false;
1074
1075 auto C = Ptr->stripPointerCasts();
1076 // Make sure this is a function or alias to a function.
1077 auto Fn = dyn_cast<Function>(C);
1078 auto A = dyn_cast<GlobalAlias>(C);
1079 if (!Fn && A)
1080 Fn = dyn_cast<Function>(A->getAliasee());
1081
1082 if (!Fn)
1083 return false;
1084
1085 if (FunctionsToSkip.match(Fn->getName()))
1086 return false;
1087
1088 // We can disregard __cxa_pure_virtual as a possible call target, as
1089 // calls to pure virtuals are UB.
1090 if (Fn->getName() == "__cxa_pure_virtual")
1091 continue;
1092
1093 // We can disregard unreachable functions as possible call targets, as
1094 // unreachable functions shouldn't be called.
1095 if (mustBeUnreachableFunction(Fn, ExportSummary))
1096 continue;
1097
1098 // Save the symbol used in the vtable to use as the devirtualization
1099 // target.
1100 auto GV = dyn_cast<GlobalValue>(C);
1101 assert(GV);
1102 TargetsForSlot.push_back({GV, &TM});
1103 }
1104
1105 // Give up if we couldn't find any targets.
1106 return !TargetsForSlot.empty();
1107}
1108
1109bool DevirtIndex::tryFindVirtualCallTargets(
1110 std::vector<ValueInfo> &TargetsForSlot,
1111 const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
1112 for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
1113 // Find a representative copy of the vtable initializer.
1114 // We can have multiple available_externally, linkonce_odr and weak_odr
1115 // vtable initializers. We can also have multiple external vtable
1116 // initializers in the case of comdats, which we cannot check here.
1117 // The linker should give an error in this case.
1118 //
1119 // Also, handle the case of same-named local Vtables with the same path
1120 // and therefore the same GUID. This can happen if there isn't enough
1121 // distinguishing path when compiling the source file. In that case we
1122 // conservatively return false early.
1123 const GlobalVarSummary *VS = nullptr;
1124 bool LocalFound = false;
1125 for (const auto &S : P.VTableVI.getSummaryList()) {
1126 if (GlobalValue::isLocalLinkage(S->linkage())) {
1127 if (LocalFound)
1128 return false;
1129 LocalFound = true;
1130 }
1131 auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
1132 if (!CurVS->vTableFuncs().empty() ||
1133 // Previously clang did not attach the necessary type metadata to
1134 // available_externally vtables, in which case there would not
1135 // be any vtable functions listed in the summary and we need
1136 // to treat this case conservatively (in case the bitcode is old).
1137 // However, we will also not have any vtable functions in the
1138 // case of a pure virtual base class. In that case we do want
1139 // to set VS to avoid treating it conservatively.
1141 VS = CurVS;
1142 // We cannot perform whole program devirtualization analysis on a vtable
1143 // with public LTO visibility.
1144 if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
1145 return false;
1146 }
1147 }
1148 // There will be no VS if all copies are available_externally having no
1149 // type metadata. In that case we can't safely perform WPD.
1150 if (!VS)
1151 return false;
1152 if (!VS->isLive())
1153 continue;
1154 for (auto VTP : VS->vTableFuncs()) {
1155 if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
1156 continue;
1157
1158 if (mustBeUnreachableFunction(VTP.FuncVI))
1159 continue;
1160
1161 TargetsForSlot.push_back(VTP.FuncVI);
1162 }
1163 }
1164
1165 // Give up if we couldn't find any targets.
1166 return !TargetsForSlot.empty();
1167}
1168
1169void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
1170 Constant *TheFn, bool &IsExported) {
1171 // Don't devirtualize function if we're told to skip it
1172 // in -wholeprogramdevirt-skip.
1173 if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
1174 return;
1175 auto Apply = [&](CallSiteInfo &CSInfo) {
1176 for (auto &&VCallSite : CSInfo.CallSites) {
1177 if (!OptimizedCalls.insert(&VCallSite.CB).second)
1178 continue;
1179
1180 if (RemarksEnabled)
1181 VCallSite.emitRemark("single-impl",
1182 TheFn->stripPointerCasts()->getName(), OREGetter);
1183 NumSingleImpl++;
1184 auto &CB = VCallSite.CB;
1185 assert(!CB.getCalledFunction() && "devirtualizing direct call?");
1186 IRBuilder<> Builder(&CB);
1187 Value *Callee =
1188 Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
1189
1190 // If trap checking is enabled, add support to compare the virtual
1191 // function pointer to the devirtualized target. In case of a mismatch,
1192 // perform a debug trap.
1193 if (DevirtCheckMode == WPDCheckMode::Trap) {
1194 auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
1195 Instruction *ThenTerm =
1196 SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
1197 Builder.SetInsertPoint(ThenTerm);
1198 Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);
1199 auto *CallTrap = Builder.CreateCall(TrapFn);
1200 CallTrap->setDebugLoc(CB.getDebugLoc());
1201 }
1202
1203 // If fallback checking is enabled, add support to compare the virtual
1204 // function pointer to the devirtualized target. In case of a mismatch,
1205 // fall back to indirect call.
1206 if (DevirtCheckMode == WPDCheckMode::Fallback) {
1207 MDNode *Weights =
1208 MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
1209 // Version the indirect call site. If the called value is equal to the
1210 // given callee, 'NewInst' will be executed, otherwise the original call
1211 // site will be executed.
1212 CallBase &NewInst = versionCallSite(CB, Callee, Weights);
1213 NewInst.setCalledOperand(Callee);
1214 // Since the new call site is direct, we must clear metadata that
1215 // is only appropriate for indirect calls. This includes !prof and
1216 // !callees metadata.
1217 NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
1218 NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
1219 // Additionally, we should remove them from the fallback indirect call,
1220 // so that we don't attempt to perform indirect call promotion later.
1221 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1222 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1223 }
1224
1225 // In either trapping or non-checking mode, devirtualize original call.
1226 else {
1227 // Devirtualize unconditionally.
1228 CB.setCalledOperand(Callee);
1229 // Since the call site is now direct, we must clear metadata that
1230 // is only appropriate for indirect calls. This includes !prof and
1231 // !callees metadata.
1232 CB.setMetadata(LLVMContext::MD_prof, nullptr);
1233 CB.setMetadata(LLVMContext::MD_callees, nullptr);
1234 if (CB.getCalledOperand() &&
1236 auto *NewCS =
1238 CB.replaceAllUsesWith(NewCS);
1239 // Schedule for deletion at the end of pass run.
1240 CallsWithPtrAuthBundleRemoved.push_back(&CB);
1241 }
1242 }
1243
1244 // This use is no longer unsafe.
1245 if (VCallSite.NumUnsafeUses)
1246 --*VCallSite.NumUnsafeUses;
1247 }
1248 if (CSInfo.isExported())
1249 IsExported = true;
1250 CSInfo.markDevirt();
1251 };
1252 Apply(SlotInfo.CSInfo);
1253 for (auto &P : SlotInfo.ConstCSInfo)
1254 Apply(P.second);
1255}
1256
1257static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
1258 // We can't add calls if we haven't seen a definition
1259 if (Callee.getSummaryList().empty())
1260 return false;
1261
1262 // Insert calls into the summary index so that the devirtualized targets
1263 // are eligible for import.
1264 // FIXME: Annotate type tests with hotness. For now, mark these as hot
1265 // to better ensure we have the opportunity to inline them.
1266 bool IsExported = false;
1267 auto &S = Callee.getSummaryList()[0];
1268 CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
1269 auto AddCalls = [&](CallSiteInfo &CSInfo) {
1270 for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
1271 FS->addCall({Callee, CI});
1272 IsExported |= S->modulePath() != FS->modulePath();
1273 }
1274 for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
1275 FS->addCall({Callee, CI});
1276 IsExported |= S->modulePath() != FS->modulePath();
1277 }
1278 };
1279 AddCalls(SlotInfo.CSInfo);
1280 for (auto &P : SlotInfo.ConstCSInfo)
1281 AddCalls(P.second);
1282 return IsExported;
1283}
1284
1285bool DevirtModule::trySingleImplDevirt(
1286 ModuleSummaryIndex *ExportSummary,
1287 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1289 // See if the program contains a single implementation of this virtual
1290 // function.
1291 auto *TheFn = TargetsForSlot[0].Fn;
1292 for (auto &&Target : TargetsForSlot)
1293 if (TheFn != Target.Fn)
1294 return false;
1295
1296 // If so, update each call site to call that implementation directly.
1297 if (RemarksEnabled || AreStatisticsEnabled())
1298 TargetsForSlot[0].WasDevirt = true;
1299
1300 bool IsExported = false;
1301 applySingleImplDevirt(SlotInfo, TheFn, IsExported);
1302 if (!IsExported)
1303 return false;
1304
1305 // If the only implementation has local linkage, we must promote to external
1306 // to make it visible to thin LTO objects. We can only get here during the
1307 // ThinLTO export phase.
1308 if (TheFn->hasLocalLinkage()) {
1309 std::string NewName = (TheFn->getName() + ".llvm.merged").str();
1310
1311 // Since we are renaming the function, any comdats with the same name must
1312 // also be renamed. This is required when targeting COFF, as the comdat name
1313 // must match one of the names of the symbols in the comdat.
1314 if (Comdat *C = TheFn->getComdat()) {
1315 if (C->getName() == TheFn->getName()) {
1316 Comdat *NewC = M.getOrInsertComdat(NewName);
1317 NewC->setSelectionKind(C->getSelectionKind());
1318 for (GlobalObject &GO : M.global_objects())
1319 if (GO.getComdat() == C)
1320 GO.setComdat(NewC);
1321 }
1322 }
1323
1324 TheFn->setLinkage(GlobalValue::ExternalLinkage);
1325 TheFn->setVisibility(GlobalValue::HiddenVisibility);
1326 TheFn->setName(NewName);
1327 }
1328 if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
1329 // Any needed promotion of 'TheFn' has already been done during
1330 // LTO unit split, so we can ignore return value of AddCalls.
1331 AddCalls(SlotInfo, TheFnVI);
1332
1334 Res->SingleImplName = std::string(TheFn->getName());
1335
1336 return true;
1337}
1338
1339bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
1340 VTableSlotSummary &SlotSummary,
1341 VTableSlotInfo &SlotInfo,
1343 std::set<ValueInfo> &DevirtTargets) {
1344 // See if the program contains a single implementation of this virtual
1345 // function.
1346 auto TheFn = TargetsForSlot[0];
1347 for (auto &&Target : TargetsForSlot)
1348 if (TheFn != Target)
1349 return false;
1350
1351 // Don't devirtualize if we don't have target definition.
1352 auto Size = TheFn.getSummaryList().size();
1353 if (!Size)
1354 return false;
1355
1356 // Don't devirtualize function if we're told to skip it
1357 // in -wholeprogramdevirt-skip.
1358 if (FunctionsToSkip.match(TheFn.name()))
1359 return false;
1360
1361 // If the summary list contains multiple summaries where at least one is
1362 // a local, give up, as we won't know which (possibly promoted) name to use.
1363 for (const auto &S : TheFn.getSummaryList())
1364 if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
1365 return false;
1366
1367 // Collect functions devirtualized at least for one call site for stats.
1369 DevirtTargets.insert(TheFn);
1370
1371 auto &S = TheFn.getSummaryList()[0];
1372 bool IsExported = AddCalls(SlotInfo, TheFn);
1373 if (IsExported)
1374 ExportedGUIDs.insert(TheFn.getGUID());
1375
1376 // Record in summary for use in devirtualization during the ThinLTO import
1377 // step.
1379 if (GlobalValue::isLocalLinkage(S->linkage())) {
1380 if (IsExported)
1381 // If target is a local function and we are exporting it by
1382 // devirtualizing a call in another module, we need to record the
1383 // promoted name.
1385 TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
1386 else {
1387 LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
1388 Res->SingleImplName = std::string(TheFn.name());
1389 }
1390 } else
1391 Res->SingleImplName = std::string(TheFn.name());
1392
1393 // Name will be empty if this thin link driven off of serialized combined
1394 // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
1395 // legacy LTO API anyway.
1396 assert(!Res->SingleImplName.empty());
1397
1398 return true;
1399}
1400
1401void DevirtModule::tryICallBranchFunnel(
1402 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1403 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1404 Triple T(M.getTargetTriple());
1405 if (T.getArch() != Triple::x86_64)
1406 return;
1407
1408 if (TargetsForSlot.size() > ClThreshold)
1409 return;
1410
1411 bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
1412 if (!HasNonDevirt)
1413 for (auto &P : SlotInfo.ConstCSInfo)
1414 if (!P.second.AllCallSitesDevirted) {
1415 HasNonDevirt = true;
1416 break;
1417 }
1418
1419 if (!HasNonDevirt)
1420 return;
1421
1422 FunctionType *FT =
1423 FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
1424 Function *JT;
1425 if (isa<MDString>(Slot.TypeID)) {
1427 M.getDataLayout().getProgramAddressSpace(),
1428 getGlobalName(Slot, {}, "branch_funnel"), &M);
1429 JT->setVisibility(GlobalValue::HiddenVisibility);
1430 } else {
1432 M.getDataLayout().getProgramAddressSpace(),
1433 "branch_funnel", &M);
1434 }
1435 JT->addParamAttr(0, Attribute::Nest);
1436
1437 std::vector<Value *> JTArgs;
1438 JTArgs.push_back(JT->arg_begin());
1439 for (auto &T : TargetsForSlot) {
1440 JTArgs.push_back(getMemberAddr(T.TM));
1441 JTArgs.push_back(T.Fn);
1442 }
1443
1444 BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
1445 Function *Intr =
1446 Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
1447
1448 auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
1449 CI->setTailCallKind(CallInst::TCK_MustTail);
1450 ReturnInst::Create(M.getContext(), nullptr, BB);
1451
1452 bool IsExported = false;
1453 applyICallBranchFunnel(SlotInfo, JT, IsExported);
1454 if (IsExported)
1456}
1457
1458void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
1459 Constant *JT, bool &IsExported) {
1460 auto Apply = [&](CallSiteInfo &CSInfo) {
1461 if (CSInfo.isExported())
1462 IsExported = true;
1463 if (CSInfo.AllCallSitesDevirted)
1464 return;
1465
1466 std::map<CallBase *, CallBase *> CallBases;
1467 for (auto &&VCallSite : CSInfo.CallSites) {
1468 CallBase &CB = VCallSite.CB;
1469
1470 if (CallBases.find(&CB) != CallBases.end()) {
1471 // When finding devirtualizable calls, it's possible to find the same
1472 // vtable passed to multiple llvm.type.test or llvm.type.checked.load
1473 // calls, which can cause duplicate call sites to be recorded in
1474 // [Const]CallSites. If we've already found one of these
1475 // call instances, just ignore it. It will be replaced later.
1476 continue;
1477 }
1478
1479 // Jump tables are only profitable if the retpoline mitigation is enabled.
1480 Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
1481 if (!FSAttr.isValid() ||
1482 !FSAttr.getValueAsString().contains("+retpoline"))
1483 continue;
1484
1485 NumBranchFunnel++;
1486 if (RemarksEnabled)
1487 VCallSite.emitRemark("branch-funnel",
1488 JT->stripPointerCasts()->getName(), OREGetter);
1489
1490 // Pass the address of the vtable in the nest register, which is r10 on
1491 // x86_64.
1492 std::vector<Type *> NewArgs;
1493 NewArgs.push_back(Int8PtrTy);
1494 append_range(NewArgs, CB.getFunctionType()->params());
1495 FunctionType *NewFT =
1497 CB.getFunctionType()->isVarArg());
1498 PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
1499
1500 IRBuilder<> IRB(&CB);
1501 std::vector<Value *> Args;
1502 Args.push_back(VCallSite.VTable);
1503 llvm::append_range(Args, CB.args());
1504
1505 CallBase *NewCS = nullptr;
1506 if (isa<CallInst>(CB))
1507 NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
1508 else
1509 NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
1510 cast<InvokeInst>(CB).getNormalDest(),
1511 cast<InvokeInst>(CB).getUnwindDest(), Args);
1512 NewCS->setCallingConv(CB.getCallingConv());
1513
1515 std::vector<AttributeSet> NewArgAttrs;
1516 NewArgAttrs.push_back(AttributeSet::get(
1517 M.getContext(), ArrayRef<Attribute>{Attribute::get(
1518 M.getContext(), Attribute::Nest)}));
1519 for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
1520 NewArgAttrs.push_back(Attrs.getParamAttrs(I));
1521 NewCS->setAttributes(
1522 AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
1523 Attrs.getRetAttrs(), NewArgAttrs));
1524
1525 CallBases[&CB] = NewCS;
1526
1527 // This use is no longer unsafe.
1528 if (VCallSite.NumUnsafeUses)
1529 --*VCallSite.NumUnsafeUses;
1530 }
1531 // Don't mark as devirtualized because there may be callers compiled without
1532 // retpoline mitigation, which would mean that they are lowered to
1533 // llvm.type.test and therefore require an llvm.type.test resolution for the
1534 // type identifier.
1535
1536 for (auto &[Old, New] : CallBases) {
1537 Old->replaceAllUsesWith(New);
1538 Old->eraseFromParent();
1539 }
1540 };
1541 Apply(SlotInfo.CSInfo);
1542 for (auto &P : SlotInfo.ConstCSInfo)
1543 Apply(P.second);
1544}
1545
1546bool DevirtModule::tryEvaluateFunctionsWithArgs(
1548 ArrayRef<uint64_t> Args) {
1549 // Evaluate each function and store the result in each target's RetVal
1550 // field.
1551 for (VirtualCallTarget &Target : TargetsForSlot) {
1552 // TODO: Skip for now if the vtable symbol was an alias to a function,
1553 // need to evaluate whether it would be correct to analyze the aliasee
1554 // function for this optimization.
1555 auto Fn = dyn_cast<Function>(Target.Fn);
1556 if (!Fn)
1557 return false;
1558
1559 if (Fn->arg_size() != Args.size() + 1)
1560 return false;
1561
1562 Evaluator Eval(M.getDataLayout(), nullptr);
1564 EvalArgs.push_back(
1565 Constant::getNullValue(Fn->getFunctionType()->getParamType(0)));
1566 for (unsigned I = 0; I != Args.size(); ++I) {
1567 auto *ArgTy =
1568 dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
1569 if (!ArgTy)
1570 return false;
1571 EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
1572 }
1573
1574 Constant *RetVal;
1575 if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
1576 !isa<ConstantInt>(RetVal))
1577 return false;
1578 Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
1579 }
1580 return true;
1581}
1582
1583void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1584 uint64_t TheRetVal) {
1585 for (auto Call : CSInfo.CallSites) {
1586 if (!OptimizedCalls.insert(&Call.CB).second)
1587 continue;
1588 NumUniformRetVal++;
1589 Call.replaceAndErase(
1590 "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
1591 ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
1592 }
1593 CSInfo.markDevirt();
1594}
1595
1596bool DevirtModule::tryUniformRetValOpt(
1597 MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
1599 // Uniform return value optimization. If all functions return the same
1600 // constant, replace all calls with that constant.
1601 uint64_t TheRetVal = TargetsForSlot[0].RetVal;
1602 for (const VirtualCallTarget &Target : TargetsForSlot)
1603 if (Target.RetVal != TheRetVal)
1604 return false;
1605
1606 if (CSInfo.isExported()) {
1608 Res->Info = TheRetVal;
1609 }
1610
1611 applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
1612 if (RemarksEnabled || AreStatisticsEnabled())
1613 for (auto &&Target : TargetsForSlot)
1614 Target.WasDevirt = true;
1615 return true;
1616}
1617
1618std::string DevirtModule::getGlobalName(VTableSlot Slot,
1619 ArrayRef<uint64_t> Args,
1620 StringRef Name) {
1621 std::string FullName = "__typeid_";
1622 raw_string_ostream OS(FullName);
1623 OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
1624 for (uint64_t Arg : Args)
1625 OS << '_' << Arg;
1626 OS << '_' << Name;
1627 return OS.str();
1628}
1629
1630bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
1631 Triple T(M.getTargetTriple());
1632 return T.isX86() && T.getObjectFormat() == Triple::ELF;
1633}
1634
1635void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1638 getGlobalName(Slot, Args, Name), C, &M);
1640}
1641
1642void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1643 StringRef Name, uint32_t Const,
1644 uint32_t &Storage) {
1645 if (shouldExportConstantsAsAbsoluteSymbols()) {
1646 exportGlobal(
1647 Slot, Args, Name,
1649 return;
1650 }
1651
1652 Storage = Const;
1653}
1654
1655Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
1656 StringRef Name) {
1657 Constant *C =
1658 M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
1659 auto *GV = dyn_cast<GlobalVariable>(C);
1660 if (GV)
1662 return C;
1663}
1664
1665Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
1666 StringRef Name, IntegerType *IntTy,
1667 uint32_t Storage) {
1668 if (!shouldExportConstantsAsAbsoluteSymbols())
1669 return ConstantInt::get(IntTy, Storage);
1670
1671 Constant *C = importGlobal(Slot, Args, Name);
1672 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
1673 C = ConstantExpr::getPtrToInt(C, IntTy);
1674
1675 // We only need to set metadata if the global is newly created, in which
1676 // case it would not have hidden visibility.
1677 if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
1678 return C;
1679
1680 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
1681 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
1682 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
1683 GV->setMetadata(LLVMContext::MD_absolute_symbol,
1684 MDNode::get(M.getContext(), {MinC, MaxC}));
1685 };
1686 unsigned AbsWidth = IntTy->getBitWidth();
1687 if (AbsWidth == IntPtrTy->getBitWidth())
1688 SetAbsRange(~0ull, ~0ull); // Full set.
1689 else
1690 SetAbsRange(0, 1ull << AbsWidth);
1691 return C;
1692}
1693
1694void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
1695 bool IsOne,
1696 Constant *UniqueMemberAddr) {
1697 for (auto &&Call : CSInfo.CallSites) {
1698 if (!OptimizedCalls.insert(&Call.CB).second)
1699 continue;
1700 IRBuilder<> B(&Call.CB);
1701 Value *Cmp =
1702 B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
1703 B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
1704 Cmp = B.CreateZExt(Cmp, Call.CB.getType());
1705 NumUniqueRetVal++;
1706 Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
1707 Cmp);
1708 }
1709 CSInfo.markDevirt();
1710}
1711
1712Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
1713 Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
1714 return ConstantExpr::getGetElementPtr(Int8Ty, C,
1715 ConstantInt::get(Int64Ty, M->Offset));
1716}
1717
1718bool DevirtModule::tryUniqueRetValOpt(
1719 unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
1720 CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
1721 VTableSlot Slot, ArrayRef<uint64_t> Args) {
1722 // IsOne controls whether we look for a 0 or a 1.
1723 auto tryUniqueRetValOptFor = [&](bool IsOne) {
1724 const TypeMemberInfo *UniqueMember = nullptr;
1725 for (const VirtualCallTarget &Target : TargetsForSlot) {
1726 if (Target.RetVal == (IsOne ? 1 : 0)) {
1727 if (UniqueMember)
1728 return false;
1729 UniqueMember = Target.TM;
1730 }
1731 }
1732
1733 // We should have found a unique member or bailed out by now. We already
1734 // checked for a uniform return value in tryUniformRetValOpt.
1735 assert(UniqueMember);
1736
1737 Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
1738 if (CSInfo.isExported()) {
1740 Res->Info = IsOne;
1741
1742 exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
1743 }
1744
1745 // Replace each call with the comparison.
1746 applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
1747 UniqueMemberAddr);
1748
1749 // Update devirtualization statistics for targets.
1750 if (RemarksEnabled || AreStatisticsEnabled())
1751 for (auto &&Target : TargetsForSlot)
1752 Target.WasDevirt = true;
1753
1754 return true;
1755 };
1756
1757 if (BitWidth == 1) {
1758 if (tryUniqueRetValOptFor(true))
1759 return true;
1760 if (tryUniqueRetValOptFor(false))
1761 return true;
1762 }
1763 return false;
1764}
1765
1766void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
1767 Constant *Byte, Constant *Bit) {
1768 for (auto Call : CSInfo.CallSites) {
1769 if (!OptimizedCalls.insert(&Call.CB).second)
1770 continue;
1771 auto *RetType = cast<IntegerType>(Call.CB.getType());
1772 IRBuilder<> B(&Call.CB);
1773 Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
1774 if (RetType->getBitWidth() == 1) {
1775 Value *Bits = B.CreateLoad(Int8Ty, Addr);
1776 Value *BitsAndBit = B.CreateAnd(Bits, Bit);
1777 auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
1778 NumVirtConstProp1Bit++;
1779 Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
1780 OREGetter, IsBitSet);
1781 } else {
1782 Value *Val = B.CreateLoad(RetType, Addr);
1783 NumVirtConstProp++;
1784 Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
1785 OREGetter, Val);
1786 }
1787 }
1788 CSInfo.markDevirt();
1789}
1790
1791bool DevirtModule::tryVirtualConstProp(
1792 MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
1793 WholeProgramDevirtResolution *Res, VTableSlot Slot) {
1794 // TODO: Skip for now if the vtable symbol was an alias to a function,
1795 // need to evaluate whether it would be correct to analyze the aliasee
1796 // function for this optimization.
1797 auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
1798 if (!Fn)
1799 return false;
1800 // This only works if the function returns an integer.
1801 auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
1802 if (!RetType)
1803 return false;
1804 unsigned BitWidth = RetType->getBitWidth();
1805 if (BitWidth > 64)
1806 return false;
1807
1808 // Make sure that each function is defined, does not access memory, takes at
1809 // least one argument, does not use its first argument (which we assume is
1810 // 'this'), and has the same return type.
1811 //
1812 // Note that we test whether this copy of the function is readnone, rather
1813 // than testing function attributes, which must hold for any copy of the
1814 // function, even a less optimized version substituted at link time. This is
1815 // sound because the virtual constant propagation optimizations effectively
1816 // inline all implementations of the virtual function into each call site,
1817 // rather than using function attributes to perform local optimization.
1818 for (VirtualCallTarget &Target : TargetsForSlot) {
1819 // TODO: Skip for now if the vtable symbol was an alias to a function,
1820 // need to evaluate whether it would be correct to analyze the aliasee
1821 // function for this optimization.
1822 auto Fn = dyn_cast<Function>(Target.Fn);
1823 if (!Fn)
1824 return false;
1825
1826 if (Fn->isDeclaration() ||
1827 !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
1828 .doesNotAccessMemory() ||
1829 Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
1830 Fn->getReturnType() != RetType)
1831 return false;
1832 }
1833
1834 for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
1835 if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
1836 continue;
1837
1838 WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
1839 if (Res)
1840 ResByArg = &Res->ResByArg[CSByConstantArg.first];
1841
1842 if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
1843 continue;
1844
1845 if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
1846 ResByArg, Slot, CSByConstantArg.first))
1847 continue;
1848
1849 // Find an allocation offset in bits in all vtables associated with the
1850 // type.
1851 uint64_t AllocBefore =
1852 findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
1853 uint64_t AllocAfter =
1854 findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
1855
1856 // Calculate the total amount of padding needed to store a value at both
1857 // ends of the object.
1858 uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
1859 for (auto &&Target : TargetsForSlot) {
1860 TotalPaddingBefore += std::max<int64_t>(
1861 (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
1862 TotalPaddingAfter += std::max<int64_t>(
1863 (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
1864 }
1865
1866 // If the amount of padding is too large, give up.
1867 // FIXME: do something smarter here.
1868 if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
1869 continue;
1870
1871 // Calculate the offset to the value as a (possibly negative) byte offset
1872 // and (if applicable) a bit offset, and store the values in the targets.
1873 int64_t OffsetByte;
1874 uint64_t OffsetBit;
1875 if (TotalPaddingBefore <= TotalPaddingAfter)
1876 setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
1877 OffsetBit);
1878 else
1879 setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
1880 OffsetBit);
1881
1882 if (RemarksEnabled || AreStatisticsEnabled())
1883 for (auto &&Target : TargetsForSlot)
1884 Target.WasDevirt = true;
1885
1886
1887 if (CSByConstantArg.second.isExported()) {
1889 exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
1890 ResByArg->Byte);
1891 exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
1892 ResByArg->Bit);
1893 }
1894
1895 // Rewrite each call to a load from OffsetByte/OffsetBit.
1896 Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
1897 Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
1898 applyVirtualConstProp(CSByConstantArg.second,
1899 TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
1900 }
1901 return true;
1902}
1903
1904void DevirtModule::rebuildGlobal(VTableBits &B) {
1905 if (B.Before.Bytes.empty() && B.After.Bytes.empty())
1906 return;
1907
1908 // Align the before byte array to the global's minimum alignment so that we
1909 // don't break any alignment requirements on the global.
1910 Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
1911 B.GV->getAlign(), B.GV->getValueType());
1912 B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
1913
1914 // Before was stored in reverse order; flip it now.
1915 for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
1916 std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
1917
1918 // Build an anonymous global containing the before bytes, followed by the
1919 // original initializer, followed by the after bytes.
1920 auto NewInit = ConstantStruct::getAnon(
1921 {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
1922 B.GV->getInitializer(),
1923 ConstantDataArray::get(M.getContext(), B.After.Bytes)});
1924 auto NewGV =
1925 new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
1926 GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
1927 NewGV->setSection(B.GV->getSection());
1928 NewGV->setComdat(B.GV->getComdat());
1929 NewGV->setAlignment(B.GV->getAlign());
1930
1931 // Copy the original vtable's metadata to the anonymous global, adjusting
1932 // offsets as required.
1933 NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
1934
1935 // Build an alias named after the original global, pointing at the second
1936 // element (the original initializer).
1937 auto Alias = GlobalAlias::create(
1938 B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
1940 NewInit->getType(), NewGV,
1941 ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
1942 ConstantInt::get(Int32Ty, 1)}),
1943 &M);
1944 Alias->setVisibility(B.GV->getVisibility());
1945 Alias->takeName(B.GV);
1946
1947 B.GV->replaceAllUsesWith(Alias);
1948 B.GV->eraseFromParent();
1949}
1950
1951bool DevirtModule::areRemarksEnabled() {
1952 const auto &FL = M.getFunctionList();
1953 for (const Function &Fn : FL) {
1954 if (Fn.empty())
1955 continue;
1956 auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());
1957 return DI.isEnabled();
1958 }
1959 return false;
1960}
1961
1962void DevirtModule::scanTypeTestUsers(
1963 Function *TypeTestFunc,
1964 DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
1965 // Find all virtual calls via a virtual table pointer %p under an assumption
1966 // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
1967 // points to a member of the type identifier %md. Group calls by (type ID,
1968 // offset) pair (effectively the identity of the virtual function) and store
1969 // to CallSlots.
1970 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1971 auto *CI = dyn_cast<CallInst>(U.getUser());
1972 if (!CI)
1973 continue;
1974
1975 // Search for virtual calls based on %p and add them to DevirtCalls.
1978 auto &DT = LookupDomTree(*CI->getFunction());
1979 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1980
1981 Metadata *TypeId =
1982 cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
1983 // If we found any, add them to CallSlots.
1984 if (!Assumes.empty()) {
1985 Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
1986 for (DevirtCallSite Call : DevirtCalls)
1987 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
1988 }
1989
1990 auto RemoveTypeTestAssumes = [&]() {
1991 // We no longer need the assumes or the type test.
1992 for (auto *Assume : Assumes)
1993 Assume->eraseFromParent();
1994 // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
1995 // may use the vtable argument later.
1996 if (CI->use_empty())
1997 CI->eraseFromParent();
1998 };
1999
2000 // At this point we could remove all type test assume sequences, as they
2001 // were originally inserted for WPD. However, we can keep these in the
2002 // code stream for later analysis (e.g. to help drive more efficient ICP
2003 // sequences). They will eventually be removed by a second LowerTypeTests
2004 // invocation that cleans them up. In order to do this correctly, the first
2005 // LowerTypeTests invocation needs to know that they have "Unknown" type
2006 // test resolution, so that they aren't treated as Unsat and lowered to
2007 // False, which will break any uses on assumes. Below we remove any type
2008 // test assumes that will not be treated as Unknown by LTT.
2009
2010 // The type test assumes will be treated by LTT as Unsat if the type id is
2011 // not used on a global (in which case it has no entry in the TypeIdMap).
2012 if (!TypeIdMap.count(TypeId))
2013 RemoveTypeTestAssumes();
2014
2015 // For ThinLTO importing, we need to remove the type test assumes if this is
2016 // an MDString type id without a corresponding TypeIdSummary. Any
2017 // non-MDString type ids are ignored and treated as Unknown by LTT, so their
2018 // type test assumes can be kept. If the MDString type id is missing a
2019 // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
2020 // exporting phase of WPD from analyzing it), then it would be treated as
2021 // Unsat by LTT and we need to remove its type test assumes here. If not
2022 // used on a vcall we don't need them for later optimization use in any
2023 // case.
2024 else if (ImportSummary && isa<MDString>(TypeId)) {
2025 const TypeIdSummary *TidSummary =
2026 ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
2027 if (!TidSummary)
2028 RemoveTypeTestAssumes();
2029 else
2030 // If one was created it should not be Unsat, because if we reached here
2031 // the type id was used on a global.
2033 }
2034 }
2035}
2036
2037void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
2038 Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
2039
2040 for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
2041 auto *CI = dyn_cast<CallInst>(U.getUser());
2042 if (!CI)
2043 continue;
2044
2045 Value *Ptr = CI->getArgOperand(0);
2046 Value *Offset = CI->getArgOperand(1);
2047 Value *TypeIdValue = CI->getArgOperand(2);
2048 Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
2049
2053 bool HasNonCallUses = false;
2054 auto &DT = LookupDomTree(*CI->getFunction());
2055 findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
2056 HasNonCallUses, CI, DT);
2057
2058 // Start by generating "pessimistic" code that explicitly loads the function
2059 // pointer from the vtable and performs the type check. If possible, we will
2060 // eliminate the load and the type check later.
2061
2062 // If possible, only generate the load at the point where it is used.
2063 // This helps avoid unnecessary spills.
2064 IRBuilder<> LoadB(
2065 (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
2066
2067 Value *LoadedValue = nullptr;
2068 if (TypeCheckedLoadFunc->getIntrinsicID() ==
2069 Intrinsic::type_checked_load_relative) {
2070 Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
2071 LoadedValue = LoadB.CreateLoad(Int32Ty, GEP);
2072 LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);
2073 GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);
2074 LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);
2075 LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);
2076 } else {
2077 Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
2078 LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
2079 }
2080
2081 for (Instruction *LoadedPtr : LoadedPtrs) {
2082 LoadedPtr->replaceAllUsesWith(LoadedValue);
2083 LoadedPtr->eraseFromParent();
2084 }
2085
2086 // Likewise for the type test.
2087 IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
2088 CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
2089
2090 for (Instruction *Pred : Preds) {
2091 Pred->replaceAllUsesWith(TypeTestCall);
2092 Pred->eraseFromParent();
2093 }
2094
2095 // We have already erased any extractvalue instructions that refer to the
2096 // intrinsic call, but the intrinsic may have other non-extractvalue uses
2097 // (although this is unlikely). In that case, explicitly build a pair and
2098 // RAUW it.
2099 if (!CI->use_empty()) {
2100 Value *Pair = PoisonValue::get(CI->getType());
2101 IRBuilder<> B(CI);
2102 Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
2103 Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
2104 CI->replaceAllUsesWith(Pair);
2105 }
2106
2107 // The number of unsafe uses is initially the number of uses.
2108 auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
2109 NumUnsafeUses = DevirtCalls.size();
2110
2111 // If the function pointer has a non-call user, we cannot eliminate the type
2112 // check, as one of those users may eventually call the pointer. Increment
2113 // the unsafe use count to make sure it cannot reach zero.
2114 if (HasNonCallUses)
2115 ++NumUnsafeUses;
2116 for (DevirtCallSite Call : DevirtCalls) {
2117 CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
2118 &NumUnsafeUses);
2119 }
2120
2121 CI->eraseFromParent();
2122 }
2123}
2124
2125void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
2126 auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
2127 if (!TypeId)
2128 return;
2129 const TypeIdSummary *TidSummary =
2130 ImportSummary->getTypeIdSummary(TypeId->getString());
2131 if (!TidSummary)
2132 return;
2133 auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
2134 if (ResI == TidSummary->WPDRes.end())
2135 return;
2136 const WholeProgramDevirtResolution &Res = ResI->second;
2137
2139 assert(!Res.SingleImplName.empty());
2140 // The type of the function in the declaration is irrelevant because every
2141 // call site will cast it to the correct type.
2142 Constant *SingleImpl =
2143 cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
2144 Type::getVoidTy(M.getContext()))
2145 .getCallee());
2146
2147 // This is the import phase so we should not be exporting anything.
2148 bool IsExported = false;
2149 applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
2150 assert(!IsExported);
2151 }
2152
2153 for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
2154 auto I = Res.ResByArg.find(CSByConstantArg.first);
2155 if (I == Res.ResByArg.end())
2156 continue;
2157 auto &ResByArg = I->second;
2158 // FIXME: We should figure out what to do about the "function name" argument
2159 // to the apply* functions, as the function names are unavailable during the
2160 // importing phase. For now we just pass the empty string. This does not
2161 // impact correctness because the function names are just used for remarks.
2162 switch (ResByArg.TheKind) {
2164 applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
2165 break;
2167 Constant *UniqueMemberAddr =
2168 importGlobal(Slot, CSByConstantArg.first, "unique_member");
2169 applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
2170 UniqueMemberAddr);
2171 break;
2172 }
2174 Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
2175 Int32Ty, ResByArg.Byte);
2176 Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
2177 ResByArg.Bit);
2178 applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
2179 break;
2180 }
2181 default:
2182 break;
2183 }
2184 }
2185
2187 // The type of the function is irrelevant, because it's bitcast at calls
2188 // anyhow.
2189 Constant *JT = cast<Constant>(
2190 M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
2191 Type::getVoidTy(M.getContext()))
2192 .getCallee());
2193 bool IsExported = false;
2194 applyICallBranchFunnel(SlotInfo, JT, IsExported);
2195 assert(!IsExported);
2196 }
2197}
2198
2199void DevirtModule::removeRedundantTypeTests() {
2200 auto True = ConstantInt::getTrue(M.getContext());
2201 for (auto &&U : NumUnsafeUsesForTypeTest) {
2202 if (U.second == 0) {
2203 U.first->replaceAllUsesWith(True);
2204 U.first->eraseFromParent();
2205 }
2206 }
2207}
2208
2210DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
2211 ModuleSummaryIndex *ExportSummary) {
2212 assert((ExportSummary != nullptr) &&
2213 "Caller guarantees ExportSummary is not nullptr");
2214
2215 const auto TheFnGUID = TheFn->getGUID();
2216 const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
2217 // Look up ValueInfo with the GUID in the current linkage.
2218 ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
2219 // If no entry is found and GUID is different from GUID computed using
2220 // exported name, look up ValueInfo with the exported name unconditionally.
2221 // This is a fallback.
2222 //
2223 // The reason to have a fallback:
2224 // 1. LTO could enable global value internalization via
2225 // `enable-lto-internalization`.
2226 // 2. The GUID in ExportedSummary is computed using exported name.
2227 if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
2228 TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
2229 }
2230 return TheFnVI;
2231}
2232
2233bool DevirtModule::mustBeUnreachableFunction(
2234 Function *const F, ModuleSummaryIndex *ExportSummary) {
2235 // First, learn unreachability by analyzing function IR.
2236 if (!F->isDeclaration()) {
2237 // A function must be unreachable if its entry block ends with an
2238 // 'unreachable'.
2239 return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
2240 }
2241 // Learn unreachability from ExportSummary if ExportSummary is present.
2242 return ExportSummary &&
2244 DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
2245}
2246
2247bool DevirtModule::run() {
2248 // If only some of the modules were split, we cannot correctly perform
2249 // this transformation. We already checked for the presense of type tests
2250 // with partially split modules during the thin link, and would have emitted
2251 // an error if any were found, so here we can simply return.
2252 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
2253 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2254 return false;
2255
2256 Function *TypeTestFunc =
2257 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
2258 Function *TypeCheckedLoadFunc =
2259 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
2260 Function *TypeCheckedLoadRelativeFunc =
2261 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));
2262 Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
2263
2264 // Normally if there are no users of the devirtualization intrinsics in the
2265 // module, this pass has nothing to do. But if we are exporting, we also need
2266 // to handle any users that appear only in the function summaries.
2267 if (!ExportSummary &&
2268 (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
2269 AssumeFunc->use_empty()) &&
2270 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
2271 (!TypeCheckedLoadRelativeFunc ||
2272 TypeCheckedLoadRelativeFunc->use_empty()))
2273 return false;
2274
2275 // Rebuild type metadata into a map for easy lookup.
2276 std::vector<VTableBits> Bits;
2278 buildTypeIdentifierMap(Bits, TypeIdMap);
2279
2280 if (TypeTestFunc && AssumeFunc)
2281 scanTypeTestUsers(TypeTestFunc, TypeIdMap);
2282
2283 if (TypeCheckedLoadFunc)
2284 scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
2285
2286 if (TypeCheckedLoadRelativeFunc)
2287 scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
2288
2289 if (ImportSummary) {
2290 for (auto &S : CallSlots)
2291 importResolution(S.first, S.second);
2292
2293 removeRedundantTypeTests();
2294
2295 // We have lowered or deleted the type intrinsics, so we will no longer have
2296 // enough information to reason about the liveness of virtual function
2297 // pointers in GlobalDCE.
2298 for (GlobalVariable &GV : M.globals())
2299 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2300
2301 // The rest of the code is only necessary when exporting or during regular
2302 // LTO, so we are done.
2303 return true;
2304 }
2305
2306 if (TypeIdMap.empty())
2307 return true;
2308
2309 // Collect information from summary about which calls to try to devirtualize.
2310 if (ExportSummary) {
2312 for (auto &P : TypeIdMap) {
2313 if (auto *TypeId = dyn_cast<MDString>(P.first))
2314 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2315 TypeId);
2316 }
2317
2318 for (auto &P : *ExportSummary) {
2319 for (auto &S : P.second.SummaryList) {
2320 auto *FS = dyn_cast<FunctionSummary>(S.get());
2321 if (!FS)
2322 continue;
2323 // FIXME: Only add live functions.
2324 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2325 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2326 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2327 }
2328 }
2329 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2330 for (Metadata *MD : MetadataByGUID[VF.GUID]) {
2331 CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2332 }
2333 }
2334 for (const FunctionSummary::ConstVCall &VC :
2335 FS->type_test_assume_const_vcalls()) {
2336 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2337 CallSlots[{MD, VC.VFunc.Offset}]
2338 .ConstCSInfo[VC.Args]
2339 .addSummaryTypeTestAssumeUser(FS);
2340 }
2341 }
2342 for (const FunctionSummary::ConstVCall &VC :
2343 FS->type_checked_load_const_vcalls()) {
2344 for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
2345 CallSlots[{MD, VC.VFunc.Offset}]
2346 .ConstCSInfo[VC.Args]
2347 .addSummaryTypeCheckedLoadUser(FS);
2348 }
2349 }
2350 }
2351 }
2352 }
2353
2354 // For each (type, offset) pair:
2355 bool DidVirtualConstProp = false;
2356 std::map<std::string, GlobalValue *> DevirtTargets;
2357 for (auto &S : CallSlots) {
2358 // Search each of the members of the type identifier for the virtual
2359 // function implementation at offset S.first.ByteOffset, and add to
2360 // TargetsForSlot.
2361 std::vector<VirtualCallTarget> TargetsForSlot;
2362 WholeProgramDevirtResolution *Res = nullptr;
2363 const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
2364 if (ExportSummary && isa<MDString>(S.first.TypeID) &&
2365 TypeMemberInfos.size())
2366 // For any type id used on a global's type metadata, create the type id
2367 // summary resolution regardless of whether we can devirtualize, so that
2368 // lower type tests knows the type id is not Unsat. If it was not used on
2369 // a global's type metadata, the TypeIdMap entry set will be empty, and
2370 // we don't want to create an entry (with the default Unknown type
2371 // resolution), which can prevent detection of the Unsat.
2372 Res = &ExportSummary
2373 ->getOrInsertTypeIdSummary(
2374 cast<MDString>(S.first.TypeID)->getString())
2375 .WPDRes[S.first.ByteOffset];
2376 if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
2377 S.first.ByteOffset, ExportSummary)) {
2378
2379 if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
2380 DidVirtualConstProp |=
2381 tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
2382
2383 tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
2384 }
2385
2386 // Collect functions devirtualized at least for one call site for stats.
2387 if (RemarksEnabled || AreStatisticsEnabled())
2388 for (const auto &T : TargetsForSlot)
2389 if (T.WasDevirt)
2390 DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
2391 }
2392
2393 // CFI-specific: if we are exporting and any llvm.type.checked.load
2394 // intrinsics were *not* devirtualized, we need to add the resulting
2395 // llvm.type.test intrinsics to the function summaries so that the
2396 // LowerTypeTests pass will export them.
2397 if (ExportSummary && isa<MDString>(S.first.TypeID)) {
2398 auto GUID =
2399 GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
2400 for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
2401 FS->addTypeTest(GUID);
2402 for (auto &CCS : S.second.ConstCSInfo)
2403 for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)
2404 FS->addTypeTest(GUID);
2405 }
2406 }
2407
2408 if (RemarksEnabled) {
2409 // Generate remarks for each devirtualized function.
2410 for (const auto &DT : DevirtTargets) {
2411 GlobalValue *GV = DT.second;
2412 auto F = dyn_cast<Function>(GV);
2413 if (!F) {
2414 auto A = dyn_cast<GlobalAlias>(GV);
2415 assert(A && isa<Function>(A->getAliasee()));
2416 F = dyn_cast<Function>(A->getAliasee());
2417 assert(F);
2418 }
2419
2420 using namespace ore;
2421 OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
2422 << "devirtualized "
2423 << NV("FunctionName", DT.first));
2424 }
2425 }
2426
2427 NumDevirtTargets += DevirtTargets.size();
2428
2429 removeRedundantTypeTests();
2430
2431 // Rebuild each global we touched as part of virtual constant propagation to
2432 // include the before and after bytes.
2433 if (DidVirtualConstProp)
2434 for (VTableBits &B : Bits)
2435 rebuildGlobal(B);
2436
2437 // We have lowered or deleted the type intrinsics, so we will no longer have
2438 // enough information to reason about the liveness of virtual function
2439 // pointers in GlobalDCE.
2440 for (GlobalVariable &GV : M.globals())
2441 GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
2442
2443 for (auto *CI : CallsWithPtrAuthBundleRemoved)
2444 CI->eraseFromParent();
2445
2446 return true;
2447}
2448
2449void DevirtIndex::run() {
2450 if (ExportSummary.typeIdCompatibleVtableMap().empty())
2451 return;
2452
2454 for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
2455 NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
2456 // Create the type id summary resolution regardlness of whether we can
2457 // devirtualize, so that lower type tests knows the type id is used on
2458 // a global and not Unsat. We do this here rather than in the loop over the
2459 // CallSlots, since that handling will only see type tests that directly
2460 // feed assumes, and we would miss any that aren't currently handled by WPD
2461 // (such as type tests that feed assumes via phis).
2462 ExportSummary.getOrInsertTypeIdSummary(P.first);
2463 }
2464
2465 // Collect information from summary about which calls to try to devirtualize.
2466 for (auto &P : ExportSummary) {
2467 for (auto &S : P.second.SummaryList) {
2468 auto *FS = dyn_cast<FunctionSummary>(S.get());
2469 if (!FS)
2470 continue;
2471 // FIXME: Only add live functions.
2472 for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
2473 for (StringRef Name : NameByGUID[VF.GUID]) {
2474 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
2475 }
2476 }
2477 for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
2478 for (StringRef Name : NameByGUID[VF.GUID]) {
2479 CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
2480 }
2481 }
2482 for (const FunctionSummary::ConstVCall &VC :
2483 FS->type_test_assume_const_vcalls()) {
2484 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2485 CallSlots[{Name, VC.VFunc.Offset}]
2486 .ConstCSInfo[VC.Args]
2487 .addSummaryTypeTestAssumeUser(FS);
2488 }
2489 }
2490 for (const FunctionSummary::ConstVCall &VC :
2491 FS->type_checked_load_const_vcalls()) {
2492 for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
2493 CallSlots[{Name, VC.VFunc.Offset}]
2494 .ConstCSInfo[VC.Args]
2495 .addSummaryTypeCheckedLoadUser(FS);
2496 }
2497 }
2498 }
2499 }
2500
2501 std::set<ValueInfo> DevirtTargets;
2502 // For each (type, offset) pair:
2503 for (auto &S : CallSlots) {
2504 // Search each of the members of the type identifier for the virtual
2505 // function implementation at offset S.first.ByteOffset, and add to
2506 // TargetsForSlot.
2507 std::vector<ValueInfo> TargetsForSlot;
2508 auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
2509 assert(TidSummary);
2510 // The type id summary would have been created while building the NameByGUID
2511 // map earlier.
2513 &ExportSummary.getTypeIdSummary(S.first.TypeID)
2514 ->WPDRes[S.first.ByteOffset];
2515 if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
2516 S.first.ByteOffset)) {
2517
2518 if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
2519 DevirtTargets))
2520 continue;
2521 }
2522 }
2523
2524 // Optionally have the thin link print message for each devirtualized
2525 // function.
2527 for (const auto &DT : DevirtTargets)
2528 errs() << "Devirtualized call to " << DT << "\n";
2529
2530 NumDevirtTargets += DevirtTargets.size();
2531}
unsigned Intr
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Addr
std::string Name
uint64_t Size
Provides passes for computing function attributes based on interprocedural analyses.
#define DEBUG_TYPE
Hexagon Common GEP
static cl::opt< std::string > ClReadSummary("lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden)
static cl::opt< PassSummaryAction > ClSummaryAction("lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< std::string > ClWriteSummary("lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
Type::TypeID TypeID
static bool mustBeUnreachableFunction(const Function &F)
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static cl::opt< bool > DisableWholeProgramVisibility("disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)"))
Provide a way to force disable whole program for debugging or workarounds, when enabled via the linke...
WPDCheckMode
Mechanism to add runtime checking of devirtualization decisions, optionally trapping or falling back ...
static cl::opt< PassSummaryAction > ClSummaryAction("wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden)
static cl::opt< bool > WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility"))
Provide a way to force enable whole program visibility in tests.
static bool typeIDVisibleToRegularObj(StringRef TypeID, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary)
static cl::list< std::string > SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), cl::Hidden, cl::CommaSeparated)
Provide way to prevent certain function from being devirtualized.
static cl::opt< std::string > ClWriteSummary("wholeprogramdevirt-write-summary", cl::desc("Write summary to given bitcode or YAML file after running pass. " "Output file format is deduced from extension: *.bc means writing " "bitcode, otherwise YAML"), cl::Hidden)
static cl::opt< unsigned > ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels"))
static cl::opt< WPDCheckMode > DevirtCheckMode("wholeprogramdevirt-check", cl::Hidden, cl::desc("Type of checking for incorrect devirtualizations"), cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect")))
static cl::opt< std::string > ClReadSummary("wholeprogramdevirt-read-summary", cl::desc("Read summary from given bitcode or YAML file before running pass"), cl::Hidden)
static bool skipUpdateDueToValidation(GlobalVariable &GV, function_ref< bool(StringRef)> IsVisibleToRegularObj)
static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee)
static cl::opt< bool > PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, cl::desc("Print index-based devirtualization messages"))
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:712
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:318
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:184
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1474
bool arg_empty() const
Definition: InstrTypes.h:1354
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:2054
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1470
static CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, Instruction *InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
Value * getCalledOperand() const
Definition: InstrTypes.h:1405
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1493
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1270
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1348
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1448
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1489
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
@ ICMP_EQ
equal
Definition: InstrTypes.h:732
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:690
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2199
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2185
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, std::optional< unsigned > InRangeIndex=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1225
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2213
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:461
This is an important base class in LLVM.
Definition: Constant.h:41
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:332
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
Helper for check-and-exit error handling.
Definition: Error.h:1353
Tagged union holding either a T or a Error.
Definition: Error.h:474
Function summary information to aid decisions and implementation of importing.
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
bool isVarArg() const
Definition: DerivedTypes.h:123
Type * getReturnType() const
Definition: DerivedTypes.h:124
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:138
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:671
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:206
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition: GlobPattern.h:51
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:506
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:585
void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
Definition: Metadata.cpp:1391
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1354
VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1712
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1436
void setVCallVisibilityMetadata(VCallVisibility Visibility)
Definition: Metadata.cpp:1702
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:404
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:273
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:374
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:591
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:587
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
Global variable summary information to aid decisions and implementation of importing.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:950
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:61
Class to hold module path string table and global value map, and encapsulate methods for operating on...
const TypeIdSummary * getTypeIdSummary(StringRef TypeId) const
This returns either a pointer to the type id summary (if present in the summary map) or null (if not ...
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
const ModuleHash & getModuleHash(const StringRef ModPath) const
Get the module SHA1 hash recorded for the given module path.
static constexpr const char * getRegularLTOModuleName()
static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash)
Convenience method for creating a promoted global name for the given value name of a local,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:659
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:428
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
static Type * getVoidTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:378
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
bool use_empty() const
Definition: Value.h:344
bool eraseMetadata(unsigned KindID)
Erase all metadata attachments with the given kind.
Definition: Metadata.cpp:1436
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:454
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:988
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
@ FS
Definition: X86.h:207
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ CommaSeparated
Definition: CommandLine.h:164
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:768
uint64_t findLowestOffset(ArrayRef< VirtualCallTarget > Targets, bool IsAfter, uint64_t Size)
void setAfterReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocAfter, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
void setBeforeReturnValues(MutableArrayRef< VirtualCallTarget > Targets, uint64_t AllocBefore, unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:330
@ Offset
Definition: DWP.cpp:440
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR)
Returns the memory access properties of this copy of the function.
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map< std::string, GVSummaryMapTy > *ModuleToSummariesForIndex=nullptr)
Write the specified module summary index to the given raw output stream, where it will be written in ...
@ Export
Export information to summary.
@ Import
Import information from summary.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2037
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1244
bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
void updatePublicTypeTestCalls(Module &M, bool WholeProgramVisibilityEnabledInLTO)
void getVisibleToRegularObjVtableGUIDs(ModuleSummaryIndex &Index, DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols, function_ref< bool(StringRef)> IsVisibleToRegularObj)
Based on typeID string, get all associated vtable GUIDS that are visible to regular objects.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
void updateIndexWPDForExports(ModuleSummaryIndex &Summary, function_ref< bool(StringRef, ValueInfo)> isExported, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Call after cross-module importing to update the recorded single impl devirt target names for any loca...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void runWholeProgramDevirtOnIndex(ModuleSummaryIndex &Summary, std::set< GlobalValue::GUID > &ExportedGUIDs, std::map< ValueInfo, std::vector< VTableSlotSummary > > &LocalWPDTargetsMap)
Perform index-based whole program devirtualization on the Summary index.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1184
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:103
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::vector< TypeIdOffsetVtableInfo > TypeIdCompatibleVtableInfo
List of vtable definitions decorated by a particular type identifier, and their corresponding offsets...
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1041
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
void updateVCallVisibilityInModule(Module &M, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, bool ValidateAllVtablesHaveTypeInfos, function_ref< bool(StringRef)> IsVisibleToRegularObj)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
Constant * getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, Constant *TopLevelGlobal=nullptr)
Processes a Constant recursively looking into elements of arrays, structs and expressions to find a t...
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, const DenseSet< GlobalValue::GUID > &DynamicExportSymbols, const DenseSet< GlobalValue::GUID > &VisibleToRegularObjSymbols)
If whole program visibility asserted, then upgrade all public vcall visibility metadata on vtable def...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Class to accumulate and hold information about a callee.
static unsigned getHashValue(const VTableSlotSummary &I)
static bool isEqual(const VTableSlotSummary &LHS, const VTableSlotSummary &RHS)
static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS)
static unsigned getHashValue(const VTableSlot &I)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
An "identifier" for a virtual function.
The following data structures summarize type metadata information.
std::map< uint64_t, WholeProgramDevirtResolution > WPDRes
Mapping from byte offset to whole-program devirt resolution for that (typeid, byte offset) pair.
TypeTestResolution TTRes
@ Unsat
Unsatisfiable type (i.e. no global has this type metadata)
enum llvm::TypeTestResolution::Kind TheKind
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
@ UniformRetVal
Uniform return value optimization.
@ VirtualConstProp
Virtual constant propagation.
@ UniqueRetVal
Unique return value optimization.
uint64_t Info
Additional information for the resolution:
enum llvm::WholeProgramDevirtResolution::ByArg::Kind TheKind
enum llvm::WholeProgramDevirtResolution::Kind TheKind
std::map< std::vector< uint64_t >, ByArg > ResByArg
Resolutions for calls with all constant integer arguments (excluding the first argument,...
@ SingleImpl
Single implementation devirtualization.
@ BranchFunnel
When retpoline mitigation is enabled, use a branch funnel that is defined in the merged module.
VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)