LLVM 17.0.0git
MergeFunctions.cpp
Go to the documentation of this file.
1//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass looks for equivalent functions that are mergable and folds them.
10//
11// Order relation is defined on set of functions. It was made through
12// special function comparison procedure that returns
13// 0 when functions are equal,
14// -1 when Left function is less than right function, and
15// 1 for opposite case. We need total-ordering, so we need to maintain
16// four properties on the functions set:
17// a <= a (reflexivity)
18// if a <= b and b <= a then a = b (antisymmetry)
19// if a <= b and b <= c then a <= c (transitivity).
20// for all a and b: a <= b or b <= a (totality).
21//
22// Comparison iterates through each instruction in each basic block.
23// Functions are kept on binary tree. For each new function F we perform
24// lookup in binary tree.
25// In practice it works the following way:
26// -- We define Function* container class with custom "operator<" (FunctionPtr).
27// -- "FunctionPtr" instances are stored in std::set collection, so every
28// std::set::insert operation will give you result in log(N) time.
29//
30// As an optimization, a hash of the function structure is calculated first, and
31// two functions are only compared if they have the same hash. This hash is
32// cheap to compute, and has the property that if function F == G according to
33// the comparison function, then hash(F) == hash(G). This consistency property
34// is critical to ensuring all possible merging opportunities are exploited.
35// Collisions in the hash affect the speed of the pass but not the correctness
36// or determinism of the resulting transformation.
37//
38// When a match is found the functions are folded. If both functions are
39// overridable, we move the functionality into a new internal function and
40// leave two overridable thunks to it.
41//
42//===----------------------------------------------------------------------===//
43//
44// Future work:
45//
46// * virtual functions.
47//
48// Many functions have their address taken by the virtual function table for
49// the object they belong to. However, as long as it's only used for a lookup
50// and call, this is irrelevant, and we'd like to fold such functions.
51//
52// * be smarter about bitcasts.
53//
54// In order to fold functions, we will sometimes add either bitcast instructions
55// or bitcast constant expressions. Unfortunately, this can confound further
56// analysis since the two functions differ where one has a bitcast and the
57// other doesn't. We should learn to look through bitcasts.
58//
59// * Compare complex types with pointer types inside.
60// * Compare cross-reference cases.
61// * Compare complex expressions.
62//
63// All the three issues above could be described as ability to prove that
64// fA == fB == fC == fE == fF == fG in example below:
65//
66// void fA() {
67// fB();
68// }
69// void fB() {
70// fA();
71// }
72//
73// void fE() {
74// fF();
75// }
76// void fF() {
77// fG();
78// }
79// void fG() {
80// fE();
81// }
82//
83// Simplest cross-reference case (fA <--> fB) was implemented in previous
84// versions of MergeFunctions, though it presented only in two function pairs
85// in test-suite (that counts >50k functions)
86// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
87// could cover much more cases.
88//
89//===----------------------------------------------------------------------===//
90
92#include "llvm/ADT/ArrayRef.h"
94#include "llvm/ADT/Statistic.h"
95#include "llvm/IR/Argument.h"
96#include "llvm/IR/BasicBlock.h"
97#include "llvm/IR/Constant.h"
98#include "llvm/IR/Constants.h"
100#include "llvm/IR/DebugLoc.h"
101#include "llvm/IR/DerivedTypes.h"
102#include "llvm/IR/Function.h"
103#include "llvm/IR/GlobalValue.h"
104#include "llvm/IR/IRBuilder.h"
105#include "llvm/IR/InstrTypes.h"
106#include "llvm/IR/Instruction.h"
107#include "llvm/IR/Instructions.h"
109#include "llvm/IR/Module.h"
110#include "llvm/IR/Type.h"
111#include "llvm/IR/Use.h"
112#include "llvm/IR/User.h"
113#include "llvm/IR/Value.h"
114#include "llvm/IR/ValueHandle.h"
116#include "llvm/Pass.h"
117#include "llvm/Support/Casting.h"
119#include "llvm/Support/Debug.h"
121#include "llvm/Transforms/IPO.h"
124#include <algorithm>
125#include <cassert>
126#include <iterator>
127#include <set>
128#include <utility>
129#include <vector>
130
131using namespace llvm;
132
133#define DEBUG_TYPE "mergefunc"
134
135STATISTIC(NumFunctionsMerged, "Number of functions merged");
136STATISTIC(NumThunksWritten, "Number of thunks generated");
137STATISTIC(NumAliasesWritten, "Number of aliases generated");
138STATISTIC(NumDoubleWeak, "Number of new functions created");
139
141 "mergefunc-verify",
142 cl::desc("How many functions in a module could be used for "
143 "MergeFunctions to pass a basic correctness check. "
144 "'0' disables this check. Works only with '-debug' key."),
145 cl::init(0), cl::Hidden);
146
147// Under option -mergefunc-preserve-debug-info we:
148// - Do not create a new function for a thunk.
149// - Retain the debug info for a thunk's parameters (and associated
150// instructions for the debug info) from the entry block.
151// Note: -debug will display the algorithm at work.
152// - Create debug-info for the call (to the shared implementation) made by
153// a thunk and its return value.
154// - Erase the rest of the function, retaining the (minimally sized) entry
155// block to create a thunk.
156// - Preserve a thunk's call site to point to the thunk even when both occur
157// within the same translation unit, to aid debugability. Note that this
158// behaviour differs from the underlying -mergefunc implementation which
159// modifies the thunk's call site to point to the shared implementation
160// when both occur within the same translation unit.
161static cl::opt<bool>
162 MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
163 cl::init(false),
164 cl::desc("Preserve debug info in thunk when mergefunc "
165 "transformations are made."));
166
167static cl::opt<bool>
168 MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden,
169 cl::init(false),
170 cl::desc("Allow mergefunc to create aliases"));
171
172namespace {
173
174class FunctionNode {
175 mutable AssertingVH<Function> F;
177
178public:
179 // Note the hash is recalculated potentially multiple times, but it is cheap.
180 FunctionNode(Function *F)
181 : F(F), Hash(FunctionComparator::functionHash(*F)) {}
182
183 Function *getFunc() const { return F; }
184 FunctionComparator::FunctionHash getHash() const { return Hash; }
185
186 /// Replace the reference to the function F by the function G, assuming their
187 /// implementations are equal.
188 void replaceBy(Function *G) const {
189 F = G;
190 }
191};
192
193/// MergeFunctions finds functions which will generate identical machine code,
194/// by considering all pointer types to be equivalent. Once identified,
195/// MergeFunctions will fold them by replacing a call to one to a call to a
196/// bitcast of the other.
197class MergeFunctions {
198public:
199 MergeFunctions() : FnTree(FunctionNodeCmp(&GlobalNumbers)) {
200 }
201
202 bool runOnModule(Module &M);
203
204private:
205 // The function comparison operator is provided here so that FunctionNodes do
206 // not need to become larger with another pointer.
207 class FunctionNodeCmp {
208 GlobalNumberState* GlobalNumbers;
209
210 public:
211 FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
212
213 bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
214 // Order first by hashes, then full function comparison.
215 if (LHS.getHash() != RHS.getHash())
216 return LHS.getHash() < RHS.getHash();
217 FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
218 return FCmp.compare() < 0;
219 }
220 };
221 using FnTreeType = std::set<FunctionNode, FunctionNodeCmp>;
222
223 GlobalNumberState GlobalNumbers;
224
225 /// A work queue of functions that may have been modified and should be
226 /// analyzed again.
227 std::vector<WeakTrackingVH> Deferred;
228
229 /// Set of values marked as used in llvm.used and llvm.compiler.used.
231
232#ifndef NDEBUG
233 /// Checks the rules of order relation introduced among functions set.
234 /// Returns true, if check has been passed, and false if failed.
235 bool doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist);
236#endif
237
238 /// Insert a ComparableFunction into the FnTree, or merge it away if it's
239 /// equal to one that's already present.
240 bool insert(Function *NewFunction);
241
242 /// Remove a Function from the FnTree and queue it up for a second sweep of
243 /// analysis.
244 void remove(Function *F);
245
246 /// Find the functions that use this Value and remove them from FnTree and
247 /// queue the functions.
248 void removeUsers(Value *V);
249
250 /// Replace all direct calls of Old with calls of New. Will bitcast New if
251 /// necessary to make types match.
252 void replaceDirectCallers(Function *Old, Function *New);
253
254 /// Merge two equivalent functions. Upon completion, G may be deleted, or may
255 /// be converted into a thunk. In either case, it should never be visited
256 /// again.
257 void mergeTwoFunctions(Function *F, Function *G);
258
259 /// Fill PDIUnrelatedWL with instructions from the entry block that are
260 /// unrelated to parameter related debug info.
261 void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
262 std::vector<Instruction *> &PDIUnrelatedWL);
263
264 /// Erase the rest of the CFG (i.e. barring the entry block).
265 void eraseTail(Function *G);
266
267 /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
268 /// parameter debug info, from the entry block.
269 void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
270
271 /// Replace G with a simple tail call to bitcast(F). Also (unless
272 /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
273 /// delete G.
274 void writeThunk(Function *F, Function *G);
275
276 // Replace G with an alias to F (deleting function G)
277 void writeAlias(Function *F, Function *G);
278
279 // Replace G with an alias to F if possible, or a thunk to F if possible.
280 // Returns false if neither is the case.
281 bool writeThunkOrAlias(Function *F, Function *G);
282
283 /// Replace function F with function G in the function tree.
284 void replaceFunctionInTree(const FunctionNode &FN, Function *G);
285
286 /// The set of all distinct functions. Use the insert() and remove() methods
287 /// to modify it. The map allows efficient lookup and deferring of Functions.
288 FnTreeType FnTree;
289
290 // Map functions to the iterators of the FunctionNode which contains them
291 // in the FnTree. This must be updated carefully whenever the FnTree is
292 // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
293 // dangling iterators into FnTree. The invariant that preserves this is that
294 // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
295 DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
296};
297} // end anonymous namespace
298
301 MergeFunctions MF;
302 if (!MF.runOnModule(M))
303 return PreservedAnalyses::all();
305}
306
307#ifndef NDEBUG
308bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) {
309 if (const unsigned Max = NumFunctionsForVerificationCheck) {
310 unsigned TripleNumber = 0;
311 bool Valid = true;
312
313 dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n";
314
315 unsigned i = 0;
316 for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
317 E = Worklist.end();
318 I != E && i < Max; ++I, ++i) {
319 unsigned j = i;
320 for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
321 ++J, ++j) {
322 Function *F1 = cast<Function>(*I);
323 Function *F2 = cast<Function>(*J);
324 int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
325 int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
326
327 // If F1 <= F2, then F2 >= F1, otherwise report failure.
328 if (Res1 != -Res2) {
329 dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber
330 << "\n";
331 dbgs() << *F1 << '\n' << *F2 << '\n';
332 Valid = false;
333 }
334
335 if (Res1 == 0)
336 continue;
337
338 unsigned k = j;
339 for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
340 ++k, ++K, ++TripleNumber) {
341 if (K == J)
342 continue;
343
344 Function *F3 = cast<Function>(*K);
345 int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
346 int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
347
348 bool Transitive = true;
349
350 if (Res1 != 0 && Res1 == Res4) {
351 // F1 > F2, F2 > F3 => F1 > F3
352 Transitive = Res3 == Res1;
353 } else if (Res3 != 0 && Res3 == -Res4) {
354 // F1 > F3, F3 > F2 => F1 > F2
355 Transitive = Res3 == Res1;
356 } else if (Res4 != 0 && -Res3 == Res4) {
357 // F2 > F3, F3 > F1 => F2 > F1
358 Transitive = Res4 == -Res1;
359 }
360
361 if (!Transitive) {
362 dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: "
363 << TripleNumber << "\n";
364 dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
365 << Res4 << "\n";
366 dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
367 Valid = false;
368 }
369 }
370 }
371 }
372
373 dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n";
374 return Valid;
375 }
376 return true;
377}
378#endif
379
380/// Check whether \p F is eligible for function merging.
382 return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
383}
384
385bool MergeFunctions::runOnModule(Module &M) {
386 bool Changed = false;
387
389 collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false);
390 collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true);
391 Used.insert(UsedV.begin(), UsedV.end());
392
393 // All functions in the module, ordered by hash. Functions with a unique
394 // hash value are easily eliminated.
395 std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
396 HashedFuncs;
397 for (Function &Func : M) {
398 if (isEligibleForMerging(Func)) {
399 HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
400 }
401 }
402
403 llvm::stable_sort(HashedFuncs, less_first());
404
405 auto S = HashedFuncs.begin();
406 for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
407 // If the hash value matches the previous value or the next one, we must
408 // consider merging it. Otherwise it is dropped and never considered again.
409 if ((I != S && std::prev(I)->first == I->first) ||
410 (std::next(I) != IE && std::next(I)->first == I->first) ) {
411 Deferred.push_back(WeakTrackingVH(I->second));
412 }
413 }
414
415 do {
416 std::vector<WeakTrackingVH> Worklist;
417 Deferred.swap(Worklist);
418
419 LLVM_DEBUG(doFunctionalCheck(Worklist));
420
421 LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n');
422 LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
423
424 // Insert functions and merge them.
425 for (WeakTrackingVH &I : Worklist) {
426 if (!I)
427 continue;
428 Function *F = cast<Function>(I);
429 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) {
430 Changed |= insert(F);
431 }
432 }
433 LLVM_DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
434 } while (!Deferred.empty());
435
436 FnTree.clear();
437 FNodesInTree.clear();
438 GlobalNumbers.clear();
439 Used.clear();
440
441 return Changed;
442}
443
444// Replace direct callers of Old with New.
445void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
446 Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
447 for (Use &U : llvm::make_early_inc_range(Old->uses())) {
448 CallBase *CB = dyn_cast<CallBase>(U.getUser());
449 if (CB && CB->isCallee(&U)) {
450 // Do not copy attributes from the called function to the call-site.
451 // Function comparison ensures that the attributes are the same up to
452 // type congruences in byval(), in which case we need to keep the byval
453 // type of the call-site, not the callee function.
454 remove(CB->getFunction());
455 U.set(BitcastNew);
456 }
457 }
458}
459
460// Helper for writeThunk,
461// Selects proper bitcast operation,
462// but a bit simpler then CastInst::getCastOpcode.
463static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
464 Type *SrcTy = V->getType();
465 if (SrcTy->isStructTy()) {
466 assert(DestTy->isStructTy());
467 assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
468 Value *Result = PoisonValue::get(DestTy);
469 for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
470 Value *Element =
471 createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
472 DestTy->getStructElementType(I));
473
474 Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
475 }
476 return Result;
477 }
478 assert(!DestTy->isStructTy());
479 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
480 return Builder.CreateIntToPtr(V, DestTy);
481 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
482 return Builder.CreatePtrToInt(V, DestTy);
483 else
484 return Builder.CreateBitCast(V, DestTy);
485}
486
487// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
488// parameter debug info, from the entry block.
489void MergeFunctions::eraseInstsUnrelatedToPDI(
490 std::vector<Instruction *> &PDIUnrelatedWL) {
492 dbgs() << " Erasing instructions (in reverse order of appearance in "
493 "entry block) unrelated to parameter debug info from entry "
494 "block: {\n");
495 while (!PDIUnrelatedWL.empty()) {
496 Instruction *I = PDIUnrelatedWL.back();
497 LLVM_DEBUG(dbgs() << " Deleting Instruction: ");
498 LLVM_DEBUG(I->print(dbgs()));
499 LLVM_DEBUG(dbgs() << "\n");
500 I->eraseFromParent();
501 PDIUnrelatedWL.pop_back();
502 }
503 LLVM_DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
504 "debug info from entry block. \n");
505}
506
507// Reduce G to its entry block.
508void MergeFunctions::eraseTail(Function *G) {
509 std::vector<BasicBlock *> WorklistBB;
510 for (BasicBlock &BB : drop_begin(*G)) {
511 BB.dropAllReferences();
512 WorklistBB.push_back(&BB);
513 }
514 while (!WorklistBB.empty()) {
515 BasicBlock *BB = WorklistBB.back();
516 BB->eraseFromParent();
517 WorklistBB.pop_back();
518 }
519}
520
521// We are interested in the following instructions from the entry block as being
522// related to parameter debug info:
523// - @llvm.dbg.declare
524// - stores from the incoming parameters to locations on the stack-frame
525// - allocas that create these locations on the stack-frame
526// - @llvm.dbg.value
527// - the entry block's terminator
528// The rest are unrelated to debug info for the parameters; fill up
529// PDIUnrelatedWL with such instructions.
530void MergeFunctions::filterInstsUnrelatedToPDI(
531 BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
532 std::set<Instruction *> PDIRelated;
533 for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
534 BI != BIE; ++BI) {
535 if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
536 LLVM_DEBUG(dbgs() << " Deciding: ");
537 LLVM_DEBUG(BI->print(dbgs()));
538 LLVM_DEBUG(dbgs() << "\n");
539 DILocalVariable *DILocVar = DVI->getVariable();
540 if (DILocVar->isParameter()) {
541 LLVM_DEBUG(dbgs() << " Include (parameter): ");
542 LLVM_DEBUG(BI->print(dbgs()));
543 LLVM_DEBUG(dbgs() << "\n");
544 PDIRelated.insert(&*BI);
545 } else {
546 LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
547 LLVM_DEBUG(BI->print(dbgs()));
548 LLVM_DEBUG(dbgs() << "\n");
549 }
550 } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
551 LLVM_DEBUG(dbgs() << " Deciding: ");
552 LLVM_DEBUG(BI->print(dbgs()));
553 LLVM_DEBUG(dbgs() << "\n");
554 DILocalVariable *DILocVar = DDI->getVariable();
555 if (DILocVar->isParameter()) {
556 LLVM_DEBUG(dbgs() << " Parameter: ");
557 LLVM_DEBUG(DILocVar->print(dbgs()));
558 AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
559 if (AI) {
560 LLVM_DEBUG(dbgs() << " Processing alloca users: ");
561 LLVM_DEBUG(dbgs() << "\n");
562 for (User *U : AI->users()) {
563 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
564 if (Value *Arg = SI->getValueOperand()) {
565 if (isa<Argument>(Arg)) {
566 LLVM_DEBUG(dbgs() << " Include: ");
567 LLVM_DEBUG(AI->print(dbgs()));
568 LLVM_DEBUG(dbgs() << "\n");
569 PDIRelated.insert(AI);
570 LLVM_DEBUG(dbgs() << " Include (parameter): ");
571 LLVM_DEBUG(SI->print(dbgs()));
572 LLVM_DEBUG(dbgs() << "\n");
573 PDIRelated.insert(SI);
574 LLVM_DEBUG(dbgs() << " Include: ");
575 LLVM_DEBUG(BI->print(dbgs()));
576 LLVM_DEBUG(dbgs() << "\n");
577 PDIRelated.insert(&*BI);
578 } else {
579 LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
580 LLVM_DEBUG(SI->print(dbgs()));
581 LLVM_DEBUG(dbgs() << "\n");
582 }
583 }
584 } else {
585 LLVM_DEBUG(dbgs() << " Defer: ");
586 LLVM_DEBUG(U->print(dbgs()));
587 LLVM_DEBUG(dbgs() << "\n");
588 }
589 }
590 } else {
591 LLVM_DEBUG(dbgs() << " Delete (alloca NULL): ");
592 LLVM_DEBUG(BI->print(dbgs()));
593 LLVM_DEBUG(dbgs() << "\n");
594 }
595 } else {
596 LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
597 LLVM_DEBUG(BI->print(dbgs()));
598 LLVM_DEBUG(dbgs() << "\n");
599 }
600 } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
601 LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
602 LLVM_DEBUG(BI->print(dbgs()));
603 LLVM_DEBUG(dbgs() << "\n");
604 PDIRelated.insert(&*BI);
605 } else {
606 LLVM_DEBUG(dbgs() << " Defer: ");
607 LLVM_DEBUG(BI->print(dbgs()));
608 LLVM_DEBUG(dbgs() << "\n");
609 }
610 }
612 dbgs()
613 << " Report parameter debug info related/related instructions: {\n");
614 for (Instruction &I : *GEntryBlock) {
615 if (PDIRelated.find(&I) == PDIRelated.end()) {
616 LLVM_DEBUG(dbgs() << " !PDIRelated: ");
617 LLVM_DEBUG(I.print(dbgs()));
618 LLVM_DEBUG(dbgs() << "\n");
619 PDIUnrelatedWL.push_back(&I);
620 } else {
621 LLVM_DEBUG(dbgs() << " PDIRelated: ");
622 LLVM_DEBUG(I.print(dbgs()));
623 LLVM_DEBUG(dbgs() << "\n");
624 }
625 }
626 LLVM_DEBUG(dbgs() << " }\n");
627}
628
629/// Whether this function may be replaced by a forwarding thunk.
631 if (F->isVarArg())
632 return false;
633
634 // Don't merge tiny functions using a thunk, since it can just end up
635 // making the function larger.
636 if (F->size() == 1) {
637 if (F->front().size() <= 2) {
638 LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
639 << " is too small to bother creating a thunk for\n");
640 return false;
641 }
642 }
643 return true;
644}
645
646// Replace G with a simple tail call to bitcast(F). Also (unless
647// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
648// delete G. Under MergeFunctionsPDI, we use G itself for creating
649// the thunk as we preserve the debug info (and associated instructions)
650// from G's entry block pertaining to G's incoming arguments which are
651// passed on as corresponding arguments in the call that G makes to F.
652// For better debugability, under MergeFunctionsPDI, we do not modify G's
653// call sites to point to F even when within the same translation unit.
654void MergeFunctions::writeThunk(Function *F, Function *G) {
655 BasicBlock *GEntryBlock = nullptr;
656 std::vector<Instruction *> PDIUnrelatedWL;
657 BasicBlock *BB = nullptr;
658 Function *NewG = nullptr;
659 if (MergeFunctionsPDI) {
660 LLVM_DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
661 "function as thunk; retain original: "
662 << G->getName() << "()\n");
663 GEntryBlock = &G->getEntryBlock();
665 dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
666 "debug info for "
667 << G->getName() << "() {\n");
668 filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
669 GEntryBlock->getTerminator()->eraseFromParent();
670 BB = GEntryBlock;
671 } else {
672 NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
673 G->getAddressSpace(), "", G->getParent());
674 NewG->setComdat(G->getComdat());
675 BB = BasicBlock::Create(F->getContext(), "", NewG);
676 }
677
679 Function *H = MergeFunctionsPDI ? G : NewG;
681 unsigned i = 0;
682 FunctionType *FFTy = F->getFunctionType();
683 for (Argument &AI : H->args()) {
684 Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
685 ++i;
686 }
687
688 CallInst *CI = Builder.CreateCall(F, Args);
689 ReturnInst *RI = nullptr;
690 bool isSwiftTailCall = F->getCallingConv() == CallingConv::SwiftTail &&
691 G->getCallingConv() == CallingConv::SwiftTail;
694 CI->setCallingConv(F->getCallingConv());
695 CI->setAttributes(F->getAttributes());
696 if (H->getReturnType()->isVoidTy()) {
697 RI = Builder.CreateRetVoid();
698 } else {
699 RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
700 }
701
702 if (MergeFunctionsPDI) {
703 DISubprogram *DIS = G->getSubprogram();
704 if (DIS) {
705 DebugLoc CIDbgLoc =
706 DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
707 DebugLoc RIDbgLoc =
708 DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
709 CI->setDebugLoc(CIDbgLoc);
710 RI->setDebugLoc(RIDbgLoc);
711 } else {
713 dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
714 << G->getName() << "()\n");
715 }
716 eraseTail(G);
717 eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
719 dbgs() << "} // End of parameter related debug info filtering for: "
720 << G->getName() << "()\n");
721 } else {
722 NewG->copyAttributesFrom(G);
723 NewG->takeName(G);
724 removeUsers(G);
725 G->replaceAllUsesWith(NewG);
726 G->eraseFromParent();
727 }
728
729 LLVM_DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
730 ++NumThunksWritten;
731}
732
733// Whether this function may be replaced by an alias
735 if (!MergeFunctionsAliases || !F->hasGlobalUnnamedAddr())
736 return false;
737
738 // We should only see linkages supported by aliases here
739 assert(F->hasLocalLinkage() || F->hasExternalLinkage()
740 || F->hasWeakLinkage() || F->hasLinkOnceLinkage());
741 return true;
742}
743
744// Replace G with an alias to F (deleting function G)
745void MergeFunctions::writeAlias(Function *F, Function *G) {
746 Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
747 PointerType *PtrType = G->getType();
748 auto *GA = GlobalAlias::create(G->getValueType(), PtrType->getAddressSpace(),
749 G->getLinkage(), "", BitcastF, G->getParent());
750
751 const MaybeAlign FAlign = F->getAlign();
752 const MaybeAlign GAlign = G->getAlign();
753 if (FAlign || GAlign)
754 F->setAlignment(std::max(FAlign.valueOrOne(), GAlign.valueOrOne()));
755 else
756 F->setAlignment(std::nullopt);
757 GA->takeName(G);
758 GA->setVisibility(G->getVisibility());
759 GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
760
761 removeUsers(G);
762 G->replaceAllUsesWith(GA);
763 G->eraseFromParent();
764
765 LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
766 ++NumAliasesWritten;
767}
768
769// Replace G with an alias to F if possible, or a thunk to F if
770// profitable. Returns false if neither is the case.
771bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
772 if (canCreateAliasFor(G)) {
773 writeAlias(F, G);
774 return true;
775 }
776 if (canCreateThunkFor(F)) {
777 writeThunk(F, G);
778 return true;
779 }
780 return false;
781}
782
783// Merge two equivalent functions. Upon completion, Function G is deleted.
784void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
785 if (F->isInterposable()) {
786 assert(G->isInterposable());
787
788 // Both writeThunkOrAlias() calls below must succeed, either because we can
789 // create aliases for G and NewF, or because a thunk for F is profitable.
790 // F here has the same signature as NewF below, so that's what we check.
791 if (!canCreateThunkFor(F) &&
793 return;
794
795 // Make them both thunks to the same internal function.
796 Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
797 F->getAddressSpace(), "", F->getParent());
798 NewF->copyAttributesFrom(F);
799 NewF->takeName(F);
800 removeUsers(F);
801 F->replaceAllUsesWith(NewF);
802
803 // We collect alignment before writeThunkOrAlias that overwrites NewF and
804 // G's content.
805 const MaybeAlign NewFAlign = NewF->getAlign();
806 const MaybeAlign GAlign = G->getAlign();
807
808 writeThunkOrAlias(F, G);
809 writeThunkOrAlias(F, NewF);
810
811 if (NewFAlign || GAlign)
812 F->setAlignment(std::max(NewFAlign.valueOrOne(), GAlign.valueOrOne()));
813 else
814 F->setAlignment(std::nullopt);
815 F->setLinkage(GlobalValue::PrivateLinkage);
816 ++NumDoubleWeak;
817 ++NumFunctionsMerged;
818 } else {
819 // For better debugability, under MergeFunctionsPDI, we do not modify G's
820 // call sites to point to F even when within the same translation unit.
821 if (!G->isInterposable() && !MergeFunctionsPDI) {
822 // Functions referred to by llvm.used/llvm.compiler.used are special:
823 // there are uses of the symbol name that are not visible to LLVM,
824 // usually from inline asm.
825 if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) {
826 // G might have been a key in our GlobalNumberState, and it's illegal
827 // to replace a key in ValueMap<GlobalValue *> with a non-global.
828 GlobalNumbers.erase(G);
829 // If G's address is not significant, replace it entirely.
830 Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
831 removeUsers(G);
832 G->replaceAllUsesWith(BitcastF);
833 } else {
834 // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
835 // above).
836 replaceDirectCallers(G, F);
837 }
838 }
839
840 // If G was internal then we may have replaced all uses of G with F. If so,
841 // stop here and delete G. There's no need for a thunk. (See note on
842 // MergeFunctionsPDI above).
843 if (G->isDiscardableIfUnused() && G->use_empty() && !MergeFunctionsPDI) {
844 G->eraseFromParent();
845 ++NumFunctionsMerged;
846 return;
847 }
848
849 if (writeThunkOrAlias(F, G)) {
850 ++NumFunctionsMerged;
851 }
852 }
853}
854
855/// Replace function F by function G.
856void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
857 Function *G) {
858 Function *F = FN.getFunc();
859 assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
860 "The two functions must be equal");
861
862 auto I = FNodesInTree.find(F);
863 assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
864 assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
865
866 FnTreeType::iterator IterToFNInFnTree = I->second;
867 assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
868 // Remove F -> FN and insert G -> FN
869 FNodesInTree.erase(I);
870 FNodesInTree.insert({G, IterToFNInFnTree});
871 // Replace F with G in FN, which is stored inside the FnTree.
872 FN.replaceBy(G);
873}
874
875// Ordering for functions that are equal under FunctionComparator
876static bool isFuncOrderCorrect(const Function *F, const Function *G) {
877 if (F->isInterposable() != G->isInterposable()) {
878 // Strong before weak, because the weak function may call the strong
879 // one, but not the other way around.
880 return !F->isInterposable();
881 }
882 if (F->hasLocalLinkage() != G->hasLocalLinkage()) {
883 // External before local, because we definitely have to keep the external
884 // function, but may be able to drop the local one.
885 return !F->hasLocalLinkage();
886 }
887 // Impose a total order (by name) on the replacement of functions. This is
888 // important when operating on more than one module independently to prevent
889 // cycles of thunks calling each other when the modules are linked together.
890 return F->getName() <= G->getName();
891}
892
893// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
894// that was already inserted.
895bool MergeFunctions::insert(Function *NewFunction) {
896 std::pair<FnTreeType::iterator, bool> Result =
897 FnTree.insert(FunctionNode(NewFunction));
898
899 if (Result.second) {
900 assert(FNodesInTree.count(NewFunction) == 0);
901 FNodesInTree.insert({NewFunction, Result.first});
902 LLVM_DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName()
903 << '\n');
904 return false;
905 }
906
907 const FunctionNode &OldF = *Result.first;
908
909 if (!isFuncOrderCorrect(OldF.getFunc(), NewFunction)) {
910 // Swap the two functions.
911 Function *F = OldF.getFunc();
912 replaceFunctionInTree(*Result.first, NewFunction);
913 NewFunction = F;
914 assert(OldF.getFunc() != F && "Must have swapped the functions.");
915 }
916
917 LLVM_DEBUG(dbgs() << " " << OldF.getFunc()->getName()
918 << " == " << NewFunction->getName() << '\n');
919
920 Function *DeleteF = NewFunction;
921 mergeTwoFunctions(OldF.getFunc(), DeleteF);
922 return true;
923}
924
925// Remove a function from FnTree. If it was already in FnTree, add
926// it to Deferred so that we'll look at it in the next round.
927void MergeFunctions::remove(Function *F) {
928 auto I = FNodesInTree.find(F);
929 if (I != FNodesInTree.end()) {
930 LLVM_DEBUG(dbgs() << "Deferred " << F->getName() << ".\n");
931 FnTree.erase(I->second);
932 // I->second has been invalidated, remove it from the FNodesInTree map to
933 // preserve the invariant.
934 FNodesInTree.erase(I);
935 Deferred.emplace_back(F);
936 }
937}
938
939// For each instruction used by the value, remove() the function that contains
940// the instruction. This should happen right before a call to RAUW.
941void MergeFunctions::removeUsers(Value *V) {
942 for (User *U : V->users())
943 if (auto *I = dyn_cast<Instruction>(U))
944 remove(I->getFunction());
945}
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
static bool canCreateAliasFor(Function *F)
static bool isEligibleForMerging(Function &F)
Check whether F is eligible for function merging.
static cl::opt< unsigned > NumFunctionsForVerificationCheck("mergefunc-verify", cl::desc("How many functions in a module could be used for " "MergeFunctions to pass a basic correctness check. " "'0' disables this check. Works only with '-debug' key."), cl::init(0), cl::Hidden)
static bool canCreateThunkFor(Function *F)
Whether this function may be replaced by a forwarding thunk.
static cl::opt< bool > MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden, cl::init(false), cl::desc("Preserve debug info in thunk when mergefunc " "transformations are made."))
static Value * createCast(IRBuilder<> &Builder, Value *V, Type *DestTy)
static cl::opt< bool > MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden, cl::init(false), cl::desc("Allow mergefunc to create aliases"))
static bool isFuncOrderCorrect(const Function *F, const Function *G)
Module.h This file contains the declarations for the Module class.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
Value * RHS
Value * LHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:132
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Instruction & back() const
Definition: BasicBlock.h:328
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1471
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
Definition: InstrTypes.h:1419
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1490
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2220
This is an important base class in LLVM.
Definition: Constant.h:41
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
FunctionComparator - Compares two functions to determine whether or not they will generate machine co...
int compare()
Test whether the two functions have equivalent behaviour.
static FunctionHash functionHash(Function &)
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:136
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:743
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:520
GlobalNumberState assigns an integer to each global value in the program, which is used by the compar...
void erase(GlobalValue *Global)
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
void setComdat(Comdat *C)
Definition: Globals.cpp:198
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
LLVMContext & getContext() const
Definition: Metadata.h:1107
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
void print(raw_ostream &OS, const Module *M=nullptr, bool IsForDebug=false) const
Print.
Definition: AsmWriter.cpp:4897
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1750
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Return a value (possibly void), from a function.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type * getStructElementType(unsigned N) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
unsigned getStructNumElements() const
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:252
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4698
iterator_range< user_iterator > users()
Definition: Value.h:421
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
void pop_back()
Definition: ilist.h:319
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
void stable_sort(R &&Range)
Definition: STLExtras.h:2063
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:807
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1537