LLVM 20.0.0git
CallPromotionUtils.cpp
Go to the documentation of this file.
1//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities useful for promoting indirect call sites to
10// direct call sites.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/Constant.h"
20#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Module.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "call-promotion-utils"
30
31/// Fix-up phi nodes in an invoke instruction's normal destination.
32///
33/// After versioning an invoke instruction, values coming from the original
34/// block will now be coming from the "merge" block. For example, in the code
35/// below:
36///
37/// then_bb:
38/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
39///
40/// else_bb:
41/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
42///
43/// merge_bb:
44/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
45/// br %normal_dst
46///
47/// normal_dst:
48/// %t3 = phi i32 [ %x, %orig_bb ], ...
49///
50/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
51/// "normal_dst" must be fixed to refer to "merge_bb":
52///
53/// normal_dst:
54/// %t3 = phi i32 [ %x, %merge_bb ], ...
55///
56static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
57 BasicBlock *MergeBlock) {
58 for (PHINode &Phi : Invoke->getNormalDest()->phis()) {
59 int Idx = Phi.getBasicBlockIndex(OrigBlock);
60 if (Idx == -1)
61 continue;
62 Phi.setIncomingBlock(Idx, MergeBlock);
63 }
64}
65
66/// Fix-up phi nodes in an invoke instruction's unwind destination.
67///
68/// After versioning an invoke instruction, values coming from the original
69/// block will now be coming from either the "then" block or the "else" block.
70/// For example, in the code below:
71///
72/// then_bb:
73/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
74///
75/// else_bb:
76/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
77///
78/// unwind_dst:
79/// %t3 = phi i32 [ %x, %orig_bb ], ...
80///
81/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
82/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
83///
84/// unwind_dst:
85/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
86///
87static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
88 BasicBlock *ThenBlock,
89 BasicBlock *ElseBlock) {
90 for (PHINode &Phi : Invoke->getUnwindDest()->phis()) {
91 int Idx = Phi.getBasicBlockIndex(OrigBlock);
92 if (Idx == -1)
93 continue;
94 auto *V = Phi.getIncomingValue(Idx);
95 Phi.setIncomingBlock(Idx, ThenBlock);
96 Phi.addIncoming(V, ElseBlock);
97 }
98}
99
100/// Create a phi node for the returned value of a call or invoke instruction.
101///
102/// After versioning a call or invoke instruction that returns a value, we have
103/// to merge the value of the original and new instructions. We do this by
104/// creating a phi node and replacing uses of the original instruction with this
105/// phi node.
106///
107/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
108/// defined in "then_bb", we create the following phi node:
109///
110/// ; Uses of the original instruction are replaced by uses of the phi node.
111/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
112///
113static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
114 BasicBlock *MergeBlock, IRBuilder<> &Builder) {
115
116 if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
117 return;
118
119 Builder.SetInsertPoint(MergeBlock, MergeBlock->begin());
120 PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
121 SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
122 for (User *U : UsersToUpdate)
123 U->replaceUsesOfWith(OrigInst, Phi);
124 Phi->addIncoming(OrigInst, OrigInst->getParent());
125 Phi->addIncoming(NewInst, NewInst->getParent());
126}
127
128/// Cast a call or invoke instruction to the given type.
129///
130/// When promoting a call site, the return type of the call site might not match
131/// that of the callee. If this is the case, we have to cast the returned value
132/// to the correct type. The location of the cast depends on if we have a call
133/// or invoke instruction.
134///
135/// For example, if the call instruction below requires a bitcast after
136/// promotion:
137///
138/// orig_bb:
139/// %t0 = call i32 @func()
140/// ...
141///
142/// The bitcast is placed after the call instruction:
143///
144/// orig_bb:
145/// ; Uses of the original return value are replaced by uses of the bitcast.
146/// %t0 = call i32 @func()
147/// %t1 = bitcast i32 %t0 to ...
148/// ...
149///
150/// A similar transformation is performed for invoke instructions. However,
151/// since invokes are terminating, a new block is created for the bitcast. For
152/// example, if the invoke instruction below requires a bitcast after promotion:
153///
154/// orig_bb:
155/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
156///
157/// The edge between the original block and the invoke's normal destination is
158/// split, and the bitcast is placed there:
159///
160/// orig_bb:
161/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
162///
163/// split_bb:
164/// ; Uses of the original return value are replaced by uses of the bitcast.
165/// %t1 = bitcast i32 %t0 to ...
166/// br label %normal_dst
167///
168static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
169
170 // Save the users of the calling instruction. These uses will be changed to
171 // use the bitcast after we create it.
172 SmallVector<User *, 16> UsersToUpdate(CB.users());
173
174 // Determine an appropriate location to create the bitcast for the return
175 // value. The location depends on if we have a call or invoke instruction.
176 BasicBlock::iterator InsertBefore;
177 if (auto *Invoke = dyn_cast<InvokeInst>(&CB))
178 InsertBefore =
179 SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->begin();
180 else
181 InsertBefore = std::next(CB.getIterator());
182
183 // Bitcast the return value to the correct type.
184 auto *Cast = CastInst::CreateBitOrPointerCast(&CB, RetTy, "", InsertBefore);
185 if (RetBitCast)
186 *RetBitCast = Cast;
187
188 // Replace all the original uses of the calling instruction with the bitcast.
189 for (User *U : UsersToUpdate)
190 U->replaceUsesOfWith(&CB, Cast);
191}
192
193/// Predicate and clone the given call site.
194///
195/// This function creates an if-then-else structure at the location of the call
196/// site. The "if" condition is specified by `Cond`.
197/// The original call site is moved into the "else" block, and a clone of the
198/// call site is placed in the "then" block. The cloned instruction is returned.
199///
200/// For example, the call instruction below:
201///
202/// orig_bb:
203/// %t0 = call i32 %ptr()
204/// ...
205///
206/// Is replace by the following:
207///
208/// orig_bb:
209/// %cond = Cond
210/// br i1 %cond, %then_bb, %else_bb
211///
212/// then_bb:
213/// ; The clone of the original call instruction is placed in the "then"
214/// ; block. It is not yet promoted.
215/// %t1 = call i32 %ptr()
216/// br merge_bb
217///
218/// else_bb:
219/// ; The original call instruction is moved to the "else" block.
220/// %t0 = call i32 %ptr()
221/// br merge_bb
222///
223/// merge_bb:
224/// ; Uses of the original call instruction are replaced by uses of the phi
225/// ; node.
226/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
227/// ...
228///
229/// A similar transformation is performed for invoke instructions. However,
230/// since invokes are terminating, more work is required. For example, the
231/// invoke instruction below:
232///
233/// orig_bb:
234/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
235///
236/// Is replace by the following:
237///
238/// orig_bb:
239/// %cond = Cond
240/// br i1 %cond, %then_bb, %else_bb
241///
242/// then_bb:
243/// ; The clone of the original invoke instruction is placed in the "then"
244/// ; block, and its normal destination is set to the "merge" block. It is
245/// ; not yet promoted.
246/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
247///
248/// else_bb:
249/// ; The original invoke instruction is moved into the "else" block, and
250/// ; its normal destination is set to the "merge" block.
251/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
252///
253/// merge_bb:
254/// ; Uses of the original invoke instruction are replaced by uses of the
255/// ; phi node, and the merge block branches to the normal destination.
256/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
257/// br %normal_dst
258///
259/// An indirect musttail call is processed slightly differently in that:
260/// 1. No merge block needed for the orginal and the cloned callsite, since
261/// either one ends the flow. No phi node is needed either.
262/// 2. The return statement following the original call site is duplicated too
263/// and placed immediately after the cloned call site per the IR convention.
264///
265/// For example, the musttail call instruction below:
266///
267/// orig_bb:
268/// %t0 = musttail call i32 %ptr()
269/// ...
270///
271/// Is replaced by the following:
272///
273/// cond_bb:
274/// %cond = Cond
275/// br i1 %cond, %then_bb, %orig_bb
276///
277/// then_bb:
278/// ; The clone of the original call instruction is placed in the "then"
279/// ; block. It is not yet promoted.
280/// %t1 = musttail call i32 %ptr()
281/// ret %t1
282///
283/// orig_bb:
284/// ; The original call instruction stays in its original block.
285/// %t0 = musttail call i32 %ptr()
286/// ret %t0
288 MDNode *BranchWeights) {
289
290 IRBuilder<> Builder(&CB);
291 CallBase *OrigInst = &CB;
292 BasicBlock *OrigBlock = OrigInst->getParent();
293
294 if (OrigInst->isMustTailCall()) {
295 // Create an if-then structure. The original instruction stays in its block,
296 // and a clone of the original instruction is placed in the "then" block.
297 Instruction *ThenTerm =
298 SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
299 BasicBlock *ThenBlock = ThenTerm->getParent();
300 ThenBlock->setName("if.true.direct_targ");
301 CallBase *NewInst = cast<CallBase>(OrigInst->clone());
302 NewInst->insertBefore(ThenTerm);
303
304 // Place a clone of the optional bitcast after the new call site.
305 Value *NewRetVal = NewInst;
306 auto Next = OrigInst->getNextNode();
307 if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
308 assert(BitCast->getOperand(0) == OrigInst &&
309 "bitcast following musttail call must use the call");
310 auto NewBitCast = BitCast->clone();
311 NewBitCast->replaceUsesOfWith(OrigInst, NewInst);
312 NewBitCast->insertBefore(ThenTerm);
313 NewRetVal = NewBitCast;
314 Next = BitCast->getNextNode();
315 }
316
317 // Place a clone of the return instruction after the new call site.
318 ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
319 assert(Ret && "musttail call must precede a ret with an optional bitcast");
320 auto NewRet = Ret->clone();
321 if (Ret->getReturnValue())
322 NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
323 NewRet->insertBefore(ThenTerm);
324
325 // A return instructions is terminating, so we don't need the terminator
326 // instruction just created.
327 ThenTerm->eraseFromParent();
328
329 return *NewInst;
330 }
331
332 // Create an if-then-else structure. The original instruction is moved into
333 // the "else" block, and a clone of the original instruction is placed in the
334 // "then" block.
335 Instruction *ThenTerm = nullptr;
336 Instruction *ElseTerm = nullptr;
337 SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
338 BasicBlock *ThenBlock = ThenTerm->getParent();
339 BasicBlock *ElseBlock = ElseTerm->getParent();
340 BasicBlock *MergeBlock = OrigInst->getParent();
341
342 ThenBlock->setName("if.true.direct_targ");
343 ElseBlock->setName("if.false.orig_indirect");
344 MergeBlock->setName("if.end.icp");
345
346 CallBase *NewInst = cast<CallBase>(OrigInst->clone());
347 OrigInst->moveBefore(ElseTerm);
348 NewInst->insertBefore(ThenTerm);
349
350 // If the original call site is an invoke instruction, we have extra work to
351 // do since invoke instructions are terminating. We have to fix-up phi nodes
352 // in the invoke's normal and unwind destinations.
353 if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
354 auto *NewInvoke = cast<InvokeInst>(NewInst);
355
356 // Invoke instructions are terminating, so we don't need the terminator
357 // instructions that were just created.
358 ThenTerm->eraseFromParent();
359 ElseTerm->eraseFromParent();
360
361 // Branch from the "merge" block to the original normal destination.
362 Builder.SetInsertPoint(MergeBlock);
363 Builder.CreateBr(OrigInvoke->getNormalDest());
364
365 // Fix-up phi nodes in the original invoke's normal and unwind destinations.
366 fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
367 fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
368
369 // Now set the normal destinations of the invoke instructions to be the
370 // "merge" block.
371 OrigInvoke->setNormalDest(MergeBlock);
372 NewInvoke->setNormalDest(MergeBlock);
373 }
374
375 // Create a phi node for the returned value of the call site.
376 createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
377
378 return *NewInst;
379}
380
381// Predicate and clone the given call site using condition `CB.callee ==
382// Callee`. See the comment `versionCallSiteWithCond` for the transformation.
384 MDNode *BranchWeights) {
385
386 IRBuilder<> Builder(&CB);
387
388 // Create the compare. The called value and callee must have the same type to
389 // be compared.
390 if (CB.getCalledOperand()->getType() != Callee->getType())
391 Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
392 auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
393
394 return versionCallSiteWithCond(CB, Cond, BranchWeights);
395}
396
398 const char **FailureReason) {
399 assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
400
401 auto &DL = Callee->getDataLayout();
402
403 // Check the return type. The callee's return value type must be bitcast
404 // compatible with the call site's type.
405 Type *CallRetTy = CB.getType();
406 Type *FuncRetTy = Callee->getReturnType();
407 if (CallRetTy != FuncRetTy)
408 if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
409 if (FailureReason)
410 *FailureReason = "Return type mismatch";
411 return false;
412 }
413
414 // The number of formal arguments of the callee.
415 unsigned NumParams = Callee->getFunctionType()->getNumParams();
416
417 // The number of actual arguments in the call.
418 unsigned NumArgs = CB.arg_size();
419
420 // Check the number of arguments. The callee and call site must agree on the
421 // number of arguments.
422 if (NumArgs != NumParams && !Callee->isVarArg()) {
423 if (FailureReason)
424 *FailureReason = "The number of arguments mismatch";
425 return false;
426 }
427
428 // Check the argument types. The callee's formal argument types must be
429 // bitcast compatible with the corresponding actual argument types of the call
430 // site.
431 unsigned I = 0;
432 for (; I < NumParams; ++I) {
433 // Make sure that the callee and call agree on byval/inalloca. The types do
434 // not have to match.
435 if (Callee->hasParamAttribute(I, Attribute::ByVal) !=
436 CB.getAttributes().hasParamAttr(I, Attribute::ByVal)) {
437 if (FailureReason)
438 *FailureReason = "byval mismatch";
439 return false;
440 }
441 if (Callee->hasParamAttribute(I, Attribute::InAlloca) !=
442 CB.getAttributes().hasParamAttr(I, Attribute::InAlloca)) {
443 if (FailureReason)
444 *FailureReason = "inalloca mismatch";
445 return false;
446 }
447
448 Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
449 Type *ActualTy = CB.getArgOperand(I)->getType();
450 if (FormalTy == ActualTy)
451 continue;
452 if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
453 if (FailureReason)
454 *FailureReason = "Argument type mismatch";
455 return false;
456 }
457
458 // MustTail call needs stricter type match. See
459 // Verifier::verifyMustTailCall().
460 if (CB.isMustTailCall()) {
461 PointerType *PF = dyn_cast<PointerType>(FormalTy);
462 PointerType *PA = dyn_cast<PointerType>(ActualTy);
463 if (!PF || !PA || PF->getAddressSpace() != PA->getAddressSpace()) {
464 if (FailureReason)
465 *FailureReason = "Musttail call Argument type mismatch";
466 return false;
467 }
468 }
469 }
470 for (; I < NumArgs; I++) {
471 // Vararg functions can have more arguments than parameters.
472 assert(Callee->isVarArg());
473 if (CB.paramHasAttr(I, Attribute::StructRet)) {
474 if (FailureReason)
475 *FailureReason = "SRet arg to vararg function";
476 return false;
477 }
478 }
479
480 return true;
481}
482
484 CastInst **RetBitCast) {
485 assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
486
487 // Set the called function of the call site to be the given callee (but don't
488 // change the type).
489 CB.setCalledOperand(Callee);
490
491 // Since the call site will no longer be direct, we must clear metadata that
492 // is only appropriate for indirect calls. This includes !prof and !callees
493 // metadata.
494 CB.setMetadata(LLVMContext::MD_prof, nullptr);
495 CB.setMetadata(LLVMContext::MD_callees, nullptr);
496
497 // If the function type of the call site matches that of the callee, no
498 // additional work is required.
499 if (CB.getFunctionType() == Callee->getFunctionType())
500 return CB;
501
502 // Save the return types of the call site and callee.
503 Type *CallSiteRetTy = CB.getType();
504 Type *CalleeRetTy = Callee->getReturnType();
505
506 // Change the function type of the call site the match that of the callee.
507 CB.mutateFunctionType(Callee->getFunctionType());
508
509 // Inspect the arguments of the call site. If an argument's type doesn't
510 // match the corresponding formal argument's type in the callee, bitcast it
511 // to the correct type.
512 auto CalleeType = Callee->getFunctionType();
513 auto CalleeParamNum = CalleeType->getNumParams();
514
515 LLVMContext &Ctx = Callee->getContext();
516 const AttributeList &CallerPAL = CB.getAttributes();
517 // The new list of argument attributes.
519 bool AttributeChanged = false;
520
521 for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
522 auto *Arg = CB.getArgOperand(ArgNo);
523 Type *FormalTy = CalleeType->getParamType(ArgNo);
524 Type *ActualTy = Arg->getType();
525 if (FormalTy != ActualTy) {
526 auto *Cast =
527 CastInst::CreateBitOrPointerCast(Arg, FormalTy, "", CB.getIterator());
528 CB.setArgOperand(ArgNo, Cast);
529
530 // Remove any incompatible attributes for the argument.
531 AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo));
533 FormalTy, CallerPAL.getParamAttrs(ArgNo)));
534
535 // We may have a different byval/inalloca type.
536 if (ArgAttrs.getByValType())
537 ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo));
538 if (ArgAttrs.getInAllocaType())
539 ArgAttrs.addInAllocaAttr(Callee->getParamInAllocaType(ArgNo));
540
541 NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
542 AttributeChanged = true;
543 } else
544 NewArgAttrs.push_back(CallerPAL.getParamAttrs(ArgNo));
545 }
546
547 // If the return type of the call site doesn't match that of the callee, cast
548 // the returned value to the appropriate type.
549 // Remove any incompatible return value attribute.
550 AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs());
551 if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
552 createRetBitCast(CB, CallSiteRetTy, RetBitCast);
553 RAttrs.remove(
554 AttributeFuncs::typeIncompatible(CalleeRetTy, CallerPAL.getRetAttrs()));
555 AttributeChanged = true;
556 }
557
558 // Set the new callsite attribute.
559 if (AttributeChanged)
560 CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttrs(),
561 AttributeSet::get(Ctx, RAttrs),
562 NewArgAttrs));
563
564 return CB;
565}
566
568 MDNode *BranchWeights) {
569
570 // Version the indirect call site. If the called value is equal to the given
571 // callee, 'NewInst' will be executed, otherwise the original call site will
572 // be executed.
573 CallBase &NewInst = versionCallSite(CB, Callee, BranchWeights);
574
575 // Promote 'NewInst' so that it directly calls the desired function.
576 return promoteCall(NewInst, Callee);
577}
578
580 PGOContextualProfile &CtxProf) {
582 if (!CtxProf.isFunctionKnown(Callee))
583 return nullptr;
584 auto &Caller = *CB.getFunction();
586 if (!CSInstr)
587 return nullptr;
588 const uint64_t CSIndex = CSInstr->getIndex()->getZExtValue();
589
591 versionCallSite(CB, &Callee, /*BranchWeights=*/nullptr), &Callee);
592 CSInstr->moveBefore(&CB);
593 const auto NewCSID = CtxProf.allocateNextCallsiteIndex(Caller);
594 auto *NewCSInstr = cast<InstrProfCallsite>(CSInstr->clone());
595 NewCSInstr->setIndex(NewCSID);
596 NewCSInstr->setCallee(&Callee);
597 NewCSInstr->insertBefore(&DirectCall);
598 auto &DirectBB = *DirectCall.getParent();
599 auto &IndirectBB = *CB.getParent();
600
601 assert((CtxProfAnalysis::getBBInstrumentation(IndirectBB) == nullptr) &&
602 "The ICP direct BB is new, it shouldn't have instrumentation");
603 assert((CtxProfAnalysis::getBBInstrumentation(DirectBB) == nullptr) &&
604 "The ICP indirect BB is new, it shouldn't have instrumentation");
605
606 // Allocate counters for the new basic blocks.
607 const uint32_t DirectID = CtxProf.allocateNextCounterIndex(Caller);
608 const uint32_t IndirectID = CtxProf.allocateNextCounterIndex(Caller);
609 auto *EntryBBIns =
610 CtxProfAnalysis::getBBInstrumentation(Caller.getEntryBlock());
611 auto *DirectBBIns = cast<InstrProfCntrInstBase>(EntryBBIns->clone());
612 DirectBBIns->setIndex(DirectID);
613 DirectBBIns->insertInto(&DirectBB, DirectBB.getFirstInsertionPt());
614
615 auto *IndirectBBIns = cast<InstrProfCntrInstBase>(EntryBBIns->clone());
616 IndirectBBIns->setIndex(IndirectID);
617 IndirectBBIns->insertInto(&IndirectBB, IndirectBB.getFirstInsertionPt());
618
619 const GlobalValue::GUID CalleeGUID = AssignGUIDPass::getGUID(Callee);
620 const uint32_t NewCountersSize = IndirectID + 1;
621
622 auto ProfileUpdater = [&](PGOCtxProfContext &Ctx) {
623 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
624 assert(NewCountersSize - 2 == Ctx.counters().size());
625 // All the ctx-es belonging to a function must have the same size counters.
626 Ctx.resizeCounters(NewCountersSize);
627
628 // Maybe in this context, the indirect callsite wasn't observed at all. That
629 // would make both direct and indirect BBs cold - which is what we already
630 // have from resising the counters.
631 if (!Ctx.hasCallsite(CSIndex))
632 return;
633 auto &CSData = Ctx.callsite(CSIndex);
634
635 uint64_t TotalCount = 0;
636 for (const auto &[_, V] : CSData)
637 TotalCount += V.getEntrycount();
638 uint64_t DirectCount = 0;
639 // If we called the direct target, update the DirectCount. If we didn't, we
640 // still want to update the indirect BB (to which the TotalCount goes, in
641 // that case).
642 if (auto It = CSData.find(CalleeGUID); It != CSData.end()) {
643 assert(CalleeGUID == It->second.guid());
644 DirectCount = It->second.getEntrycount();
645 // This direct target needs to be moved to this caller under the
646 // newly-allocated callsite index.
647 assert(Ctx.callsites().count(NewCSID) == 0);
648 Ctx.ingestContext(NewCSID, std::move(It->second));
649 CSData.erase(CalleeGUID);
650 }
651
652 assert(TotalCount >= DirectCount);
653 uint64_t IndirectCount = TotalCount - DirectCount;
654 // The ICP's effect is as-if the direct BB would have been taken DirectCount
655 // times, and the indirect BB, IndirectCount times
656 Ctx.counters()[DirectID] = DirectCount;
657 Ctx.counters()[IndirectID] = IndirectCount;
658
659 };
660 CtxProf.update(ProfileUpdater, Caller);
661 return &DirectCall;
662}
663
665 Function *Callee,
666 ArrayRef<Constant *> AddressPoints,
667 MDNode *BranchWeights) {
668 assert(!AddressPoints.empty() && "Caller should guarantee");
669 IRBuilder<> Builder(&CB);
671 for (auto &AddressPoint : AddressPoints)
672 ICmps.push_back(Builder.CreateICmpEQ(VPtr, AddressPoint));
673
674 // TODO: Perform tree height reduction if the number of ICmps is high.
675 Value *Cond = Builder.CreateOr(ICmps);
676
677 // Version the indirect call site. If Cond is true, 'NewInst' will be
678 // executed, otherwise the original call site will be executed.
679 CallBase &NewInst = versionCallSiteWithCond(CB, Cond, BranchWeights);
680
681 // Promote 'NewInst' so that it directly calls the desired function.
682 return promoteCall(NewInst, Callee);
683}
684
687 Module *M = CB.getCaller()->getParent();
688 const DataLayout &DL = M->getDataLayout();
689 Value *Callee = CB.getCalledOperand();
690
691 LoadInst *VTableEntryLoad = dyn_cast<LoadInst>(Callee);
692 if (!VTableEntryLoad)
693 return false; // Not a vtable entry load.
694 Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand();
695 APInt VTableOffset(DL.getIndexTypeSizeInBits(VTableEntryPtr->getType()), 0);
696 Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets(
697 DL, VTableOffset, /* AllowNonInbounds */ true);
698 LoadInst *VTablePtrLoad = dyn_cast<LoadInst>(VTableBasePtr);
699 if (!VTablePtrLoad)
700 return false; // Not a vtable load.
701 Value *Object = VTablePtrLoad->getPointerOperand();
702 APInt ObjectOffset(DL.getIndexTypeSizeInBits(Object->getType()), 0);
703 Value *ObjectBase = Object->stripAndAccumulateConstantOffsets(
704 DL, ObjectOffset, /* AllowNonInbounds */ true);
705 if (!(isa<AllocaInst>(ObjectBase) && ObjectOffset == 0))
706 // Not an Alloca or the offset isn't zero.
707 return false;
708
709 // Look for the vtable pointer store into the object by the ctor.
710 BasicBlock::iterator BBI(VTablePtrLoad);
711 Value *VTablePtr = FindAvailableLoadedValue(
712 VTablePtrLoad, VTablePtrLoad->getParent(), BBI, 0, nullptr, nullptr);
713 if (!VTablePtr || !VTablePtr->getType()->isPointerTy())
714 return false; // No vtable found.
715 APInt VTableOffsetGVBase(DL.getIndexTypeSizeInBits(VTablePtr->getType()), 0);
716 Value *VTableGVBase = VTablePtr->stripAndAccumulateConstantOffsets(
717 DL, VTableOffsetGVBase, /* AllowNonInbounds */ true);
718 GlobalVariable *GV = dyn_cast<GlobalVariable>(VTableGVBase);
719 if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer()))
720 // Not in the form of a global constant variable with an initializer.
721 return false;
722
723 APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset;
724 if (!(VTableGVOffset.getActiveBits() <= 64))
725 return false; // Out of range.
726
727 Function *DirectCallee = nullptr;
728 std::tie(DirectCallee, std::ignore) =
729 getFunctionAtVTableOffset(GV, VTableGVOffset.getZExtValue(), *M);
730 if (!DirectCallee)
731 return false; // No function pointer found.
732
733 if (!isLegalToPromote(CB, DirectCallee))
734 return false;
735
736 // Success.
737 promoteCall(CB, DirectCallee);
738 return true;
739}
740
741#undef DEBUG_TYPE
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, BasicBlock *MergeBlock)
Fix-up phi nodes in an invoke instruction's normal destination.
static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, BasicBlock *ThenBlock, BasicBlock *ElseBlock)
Fix-up phi nodes in an invoke instruction's unwind destination.
static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast)
Cast a call or invoke instruction to the given type.
static CallBase & versionCallSiteWithCond(CallBase &CB, Value *Cond, MDNode *BranchWeights)
Predicate and clone the given call site.
static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, BasicBlock *MergeBlock, IRBuilder<> &Builder)
Create a phi node for the returned value of a call or invoke instruction.
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define _
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition: MD5.cpp:58
Reader for contextual iFDO profile, which comes in bitstream format.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
static uint64_t getGUID(const Function &F)
Type * getByValType() const
Retrieve the byval type.
Definition: Attributes.h:1163
AttrBuilder & addByValAttr(Type *Ty)
This turns a byval type into the form used internally in Attribute.
Type * getInAllocaType() const
Retrieve the inalloca type.
Definition: Attributes.h:1177
AttrBuilder & addInAllocaAttr(Type *Ty)
This turns an inalloca type into the form used internally in Attribute.
AttrBuilder & remove(const AttributeMask &AM)
Remove the attributes from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the attribute exists for the given argument.
Definition: Attributes.h:829
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:897
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1428
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
void mutateFunctionType(FunctionType *FTy)
Definition: InstrTypes.h:1209
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1299
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1207
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1385
unsigned arg_size() const
Definition: InstrTypes.h:1292
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
Function * getCaller()
Helper to get the caller (the parent function).
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2264
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2146
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1534
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1152
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:194
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2699
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Invoke instruction.
BasicBlock * getUnwindDest() const
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The instrumented contextual profile, produced by the CtxProfAnalysis.
void update(Visitor, const Function &F)
uint32_t allocateNextCounterIndex(const Function &F)
uint32_t allocateNextCallsiteIndex(const Function &F)
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Return a value (possibly void), from a function.
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:492
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
CallBase & versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights)
Predicate and clone the given call site.
CallBase & promoteCall(CallBase &CB, Function *Callee, CastInst **RetBitCast=nullptr)
Promote the given indirect call site to unconditionally call Callee.
bool tryPromoteCall(CallBase &CB)
Try to promote (devirtualize) a virtual call on an Alloca.
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...