LLVM 22.0.0git
CoroAnnotationElide.cpp
Go to the documentation of this file.
1//===- CoroAnnotationElide.cpp - Elide attributed safe coroutine calls ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This pass transforms all Call or Invoke instructions that are annotated
11// "coro_elide_safe" to call the `.noalloc` variant of coroutine instead.
12// The frame of the callee coroutine is allocated inside the caller. A pointer
13// to the allocated frame will be passed into the `.noalloc` ramp function.
14//
15//===----------------------------------------------------------------------===//
16
18
22#include "llvm/IR/Analysis.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/PassManager.h"
32
33#include <cassert>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "coro-annotation-elide"
38
40 "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
41 cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
43
45 for (Instruction &I : F->getEntryBlock())
46 if (!isa<AllocaInst>(&I))
47 return &I;
48 llvm_unreachable("no terminator in the entry block");
49}
50
51// Create an alloca in the caller, using FrameSize and FrameAlign as the callee
52// coroutine's activation frame.
53static Value *allocateFrameInCaller(Function *Caller, uint64_t FrameSize,
54 Align FrameAlign) {
55 LLVMContext &C = Caller->getContext();
56 BasicBlock::iterator InsertPt =
58 const DataLayout &DL = Caller->getDataLayout();
59 auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize);
60 auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
61 Frame->setAlignment(FrameAlign);
62 return Frame;
63}
64
65// Given a call or invoke instruction to the elide safe coroutine, this function
66// does the following:
67// - Allocate a frame for the callee coroutine in the caller using alloca.
68// - Replace the old CB with a new Call or Invoke to `NewCallee`, with the
69// pointer to the frame as an additional argument to NewCallee.
70static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
71 uint64_t FrameSize, Align FrameAlign) {
72 // TODO: generate the lifetime intrinsics for the new frame. This will require
73 // introduction of two pesudo lifetime intrinsics in the frontend around the
74 // `co_await` expression and convert them to real lifetime intrinsics here.
75 auto *FramePtr = allocateFrameInCaller(Caller, FrameSize, FrameAlign);
76 auto NewCBInsertPt = CB->getIterator();
77 llvm::CallBase *NewCB = nullptr;
79 NewArgs.append(CB->arg_begin(), CB->arg_end());
80 NewArgs.push_back(FramePtr);
81
82 if (auto *CI = dyn_cast<CallInst>(CB)) {
83 auto *NewCI = CallInst::Create(NewCallee->getFunctionType(), NewCallee,
84 NewArgs, "", NewCBInsertPt);
85 NewCI->setTailCallKind(CI->getTailCallKind());
86 NewCB = NewCI;
87 } else if (auto *II = dyn_cast<InvokeInst>(CB)) {
88 NewCB = InvokeInst::Create(NewCallee->getFunctionType(), NewCallee,
89 II->getNormalDest(), II->getUnwindDest(),
90 NewArgs, {}, "", NewCBInsertPt);
91 } else {
92 llvm_unreachable("CallBase should either be Call or Invoke!");
93 }
94
95 NewCB->setCalledFunction(NewCallee->getFunctionType(), NewCallee);
96 NewCB->setCallingConv(CB->getCallingConv());
97 NewCB->setAttributes(CB->getAttributes());
98 NewCB->setDebugLoc(CB->getDebugLoc());
99 std::copy(CB->bundle_op_info_begin(), CB->bundle_op_info_end(),
100 NewCB->bundle_op_info_begin());
101
102 NewCB->removeFnAttr(llvm::Attribute::CoroElideSafe);
103 CB->replaceAllUsesWith(NewCB);
104
106 InlineResult IR = InlineFunction(*NewCB, IFI);
107 if (IR.isSuccess()) {
108 CB->eraseFromParent();
109 } else {
110 NewCB->replaceAllUsesWith(CB);
111 NewCB->eraseFromParent();
112 }
113}
114
117 LazyCallGraph &CG,
118 CGSCCUpdateResult &UR) {
119 bool Changed = false;
120 CallGraphUpdater CGUpdater;
121 CGUpdater.initialize(CG, C, AM, UR);
122
123 auto &FAM =
124 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
125
126 for (LazyCallGraph::Node &N : C) {
127 Function *Callee = &N.getFunction();
128 Function *NewCallee = Callee->getParent()->getFunction(
129 (Callee->getName() + ".noalloc").str());
130 if (!NewCallee)
131 continue;
132
134 for (auto *U : Callee->users()) {
135 if (auto *CB = dyn_cast<CallBase>(U)) {
136 if (CB->getCalledFunction() == Callee)
137 Users.push_back(CB);
138 }
139 }
140 auto FramePtrArgPosition = NewCallee->arg_size() - 1;
141 auto FrameSize =
142 NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
143 auto FrameAlign =
144 NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();
145
146 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(*Callee);
147
148 for (auto *CB : Users) {
149 auto *Caller = CB->getFunction();
150 if (!Caller)
151 continue;
152
153 bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
154 bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
155 if (IsCallerPresplitCoroutine && HasAttr) {
156 BranchProbability MinBranchProbability(
159
160 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
161
163 BFI.getBlockFreq(CB->getParent()).getFrequency(),
164 BFI.getEntryFreq().getFrequency());
165
166 if (Prob < MinBranchProbability) {
167 ORE.emit([&]() {
169 DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
170 << "'" << ore::NV("callee", Callee->getName())
171 << "' not elided in '"
172 << ore::NV("caller", Caller->getName())
173 << "' because of low probability: "
174 << ore::NV("probability", Prob) << " (threshold: "
175 << ore::NV("threshold", MinBranchProbability) << ")";
176 });
177 continue;
178 }
179
180 auto *CallerN = CG.lookup(*Caller);
181 auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
182 // If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller
183 // yet. Skip the call graph update.
184 auto ShouldUpdateCallGraph = !!CallerC;
185 processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);
186
187 ORE.emit([&]() {
188 return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
189 << "'" << ore::NV("callee", Callee->getName())
190 << "' elided in '" << ore::NV("caller", Caller->getName())
191 << "' (probability: " << ore::NV("probability", Prob) << ")";
192 });
193
194 FAM.invalidate(*Caller, PreservedAnalyses::none());
195 Changed = true;
196 if (ShouldUpdateCallGraph)
197 updateCGAndAnalysisManagerForCGSCCPass(CG, *CallerC, *CallerN, AM, UR,
198 FAM);
199
200 } else {
201 ORE.emit([&]() {
202 return OptimizationRemarkMissed(DEBUG_TYPE, "CoroAnnotationElide",
203 Caller)
204 << "'" << ore::NV("callee", Callee->getName())
205 << "' not elided in '" << ore::NV("caller", Caller->getName())
206 << "' (caller_presplit="
207 << ore::NV("caller_presplit", IsCallerPresplitCoroutine)
208 << ", elide_safe_attr=" << ore::NV("elide_safe_attr", HasAttr)
209 << ")";
210 });
211 }
212 }
213 }
214
216}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This header provides classes for managing passes over SCCs of the call graph.
This file provides interfaces used to manipulate a call graph, regardless if it is a "old style" Call...
static void processCall(CallBase *CB, Function *Caller, Function *NewCallee, uint64_t FrameSize, Align FrameAlign)
static cl::opt< float > CoroElideBranchRatio("coro-elide-branch-ratio", cl::init(0.55), cl::Hidden, cl::desc("Minimum BranchProbability to consider a elide a coroutine."))
static Instruction * getFirstNonAllocaInTheEntryBlock(Function *F)
cl::opt< unsigned > MinBlockCounterExecution
static Value * allocateFrameInCaller(Function *Caller, uint64_t FrameSize, Align FrameAlign)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
Definition IVUsers.cpp:48
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static const unsigned FramePtr
an instruction to allocate memory on the stack
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Analysis pass which computes BlockFrequencyInfo.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
bundle_op_iterator bundle_op_info_begin()
Return the start of the list of BundleOpInfo instances associated with this OperandBundleUser.
CallingConv::ID getCallingConv() const
bundle_op_iterator bundle_op_info_end()
Return the end of the list of BundleOpInfo instances associated with this OperandBundleUser.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
void setAttributes(AttributeList A)
Set the attributes for this call.
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
void removeFnAttr(Attribute::AttrKind Kind)
Removes the attribute from the function.
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
void initialize(LazyCallGraph &LCG, LazyCallGraph::SCC &SCC, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR)
Initializers for usage outside of a CGSCC pass, inside a CGSCC pass in the old and new pass manager (...
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A proxy from a FunctionAnalysisManager to an SCC.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const
Extract the number of dereferenceable bytes for a parameter.
Definition Function.h:522
MaybeAlign getParamAlign(unsigned ArgNo) const
Definition Function.h:487
size_t arg_size() const
Definition Function.h:899
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition Cloning.h:251
InlineResult is basically true or false.
Definition InlineCost.h:181
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A node in the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
SCC * lookupSCC(Node &N) const
Lookup a function's SCC in the graph.
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
Changed
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI LazyCallGraph::SCC & updateCGAndAnalysisManagerForCGSCCPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, FunctionAnalysisManager &FAM)
Helper to update the call graph after running a CGSCC pass.
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130