24#define DEBUG_TYPE "coro-elide"
26STATISTIC(NumOfCoroElided,
"The # of coroutine get elided.");
36class FunctionElideInfo {
38 FunctionElideInfo(
Function *
F) : ContainingFunction(
F) {
39 this->collectPostSplitCoroIds();
42 bool hasCoroIds()
const {
return !CoroIds.empty(); }
52 void collectPostSplitCoroIds();
53 friend class CoroIdElider;
60 void elideHeapAllocations(
uint64_t FrameSize,
Align FrameAlign);
61 bool lifetimeEligibleForElide()
const;
68 FunctionElideInfo &FEI;
92 if (ValueTy != IntrTy) {
122 if (
auto *Call = dyn_cast<CallInst>(&
I))
124 !Call->isMustTailCall())
125 Call->setTailCall(
false);
130static std::optional<std::pair<uint64_t, Align>>
142 if (!isa<AllocaInst>(&
I))
150 "coro-elide-info-output-file shouldn't be empty");
156 llvm::errs() <<
"Error opening coro-elide-info-output-file '"
158 return std::make_unique<raw_fd_ostream>(2,
false);
162void FunctionElideInfo::collectPostSplitCoroIds() {
164 if (
auto *CII = dyn_cast<CoroIdInst>(&
I))
165 if (CII->getInfo().isPostSplit())
167 if (CII->getCoroutine() != CII->getFunction())
168 CoroIds.push_back(CII);
175 if (
auto *CSI = dyn_cast<CoroSuspendInst>(&
I))
176 if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
177 SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
179 CoroSuspendSwitches.insert(SWI);
184CoroIdElider::CoroIdElider(
CoroIdInst *CoroId, FunctionElideInfo &FEI,
187 : CoroId(CoroId), FEI(FEI), AA(AA), DT(DT), ORE(ORE) {
190 if (
auto *CB = dyn_cast<CoroBeginInst>(U))
191 CoroBegins.push_back(CB);
192 else if (
auto *CA = dyn_cast<CoroAllocInst>(U))
193 CoroAllocs.push_back(CA);
202 if (
auto *
II = dyn_cast<CoroSubFnInst>(U))
203 switch (
II->getIndex()) {
205 ResumeAddr.push_back(
II);
208 DestroyAddr[CB].push_back(
II);
218void CoroIdElider::elideHeapAllocations(
uint64_t FrameSize,
Align FrameAlign) {
230 for (
auto *CA : CoroAllocs) {
231 CA->replaceAllUsesWith(False);
232 CA->eraseFromParent();
239 const DataLayout &
DL = FEI.ContainingFunction->getDataLayout();
241 auto *Frame =
new AllocaInst(FrameTy,
DL.getAllocaAddrSpace(),
"", InsertPt);
242 Frame->setAlignment(FrameAlign);
244 new BitCastInst(Frame, PointerType::getUnqual(
C),
"vFrame", InsertPt);
246 for (
auto *CB : CoroBegins) {
247 CB->replaceAllUsesWith(FrameVoidPtr);
248 CB->eraseFromParent();
256bool CoroIdElider::canCoroBeginEscape(
258 const auto &It = DestroyAddr.find(CB);
259 assert(It != DestroyAddr.end());
262 unsigned Limit = 32 * (1 + It->second.size());
270 for (
auto *DA : It->second)
274 for (
auto *U : CB->
users()) {
276 if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
290 EscapingBBs.
insert(cast<Instruction>(U)->getParent());
293 bool PotentiallyEscaped =
false;
297 if (!Visited.
insert(BB).second)
303 PotentiallyEscaped |= EscapingBBs.
count(BB);
306 if (isa<ReturnInst>(BB->getTerminator()) || PotentiallyEscaped)
321 auto TI = BB->getTerminator();
325 if (isa<SwitchInst>(TI) &&
326 FEI.CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
327 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(1));
328 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(2));
332 }
while (!Worklist.
empty());
339bool CoroIdElider::lifetimeEligibleForElide()
const {
342 if (CoroAllocs.empty())
357 auto *TI =
B.getTerminator();
359 if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
366 for (
const auto *CB : CoroBegins) {
367 auto It = DestroyAddr.find(CB);
371 if (It == DestroyAddr.end())
374 const auto &CorrespondingDestroyAddrs = It->second;
378 auto DominatesTerminator = [&](
auto *TI) {
379 return llvm::any_of(CorrespondingDestroyAddrs, [&](
auto *Destroy) {
380 return DT.
dominates(Destroy, TI->getTerminator());
393 if (canCoroBeginEscape(CB, Terminators))
402bool CoroIdElider::attemptElide() {
406 assert(Resumers &&
"PostSplit coro.id Info argument must refer to an array"
407 "of coroutine subfunctions");
408 auto *ResumeAddrConstant =
413 bool EligibleForElide = lifetimeEligibleForElide();
419 for (
auto &It : DestroyAddr)
422 auto FrameSizeAndAlign =
getFrameLayout(cast<Function>(ResumeAddrConstant));
424 auto CallerFunctionName = FEI.ContainingFunction->getName();
427 if (EligibleForElide && FrameSizeAndAlign) {
428 elideHeapAllocations(FrameSizeAndAlign->first, FrameSizeAndAlign->second);
435 << FEI.ContainingFunction->getName() <<
"\n";
440 <<
"'" <<
ore::NV(
"callee", CalleeCoroutineName)
441 <<
"' elided in '" <<
ore::NV(
"caller", CallerFunctionName)
443 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first) <<
", align="
444 <<
ore::NV(
"align", FrameSizeAndAlign->second.value()) <<
")";
449 <<
"'" <<
ore::NV(
"callee", CalleeCoroutineName)
450 <<
"' not elided in '"
451 <<
ore::NV(
"caller", CallerFunctionName);
453 if (FrameSizeAndAlign)
454 return Remark <<
"' (frame_size="
455 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first)
457 <<
ore::NV(
"align", FrameSizeAndAlign->second.value())
460 return Remark <<
"' (frame_size=unknown, align=unknown)";
468 auto &M = *
F.getParent();
472 FunctionElideInfo FEI{&
F};
474 if (!FEI.hasCoroIds())
481 bool Changed =
false;
482 for (
auto *CII : FEI.getCoroIds()) {
483 CoroIdElider CIE(CII, FEI, AA, DT, ORE);
484 Changed |= CIE.attemptElide();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void replaceWithConstant(Constant *Value, SmallVectorImpl< CoroSubFnInst * > &Users)
static Instruction * getFirstNonAllocaInTheEntryBlock(Function *F)
static cl::opt< std::string > CoroElideInfoOutputFilename("coro-elide-info-output-file", cl::value_desc("filename"), cl::desc("File to record the coroutines got elided"), cl::Hidden)
static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA)
static std::optional< std::pair< uint64_t, Align > > getFrameLayout(Function *Resume)
static std::unique_ptr< raw_fd_ostream > getOrCreateLogFile()
static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA)
This file defines the DenseMap class.
iv Induction Variable Users
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
ConstantArray - Constant Array Declarations.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents the llvm.coro.begin or llvm.coro.begin.custom.abi instructions.
This represents the llvm.coro.id instruction.
Function * getCoroutine() const
This class represents the llvm.coro.subfn.addr instruction.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const
Extract the number of dereferenceable bytes for a parameter.
MaybeAlign getParamAlign(unsigned ArgNo) const
const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumCases() const
Return the number of 'cases' in this switch instruction, excluding the default case.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt8Ty(LLVMContext &C)
iterator_range< value_op_iterator > operand_values()
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool declaresIntrinsics(const Module &M, const std::initializer_list< StringRef >)
void replaceCoroFree(CoroIdInst *CoroId, bool Elide)
DiagnosticInfoOptimizationBase::Argument NV
@ OF_Append
The file should be opened in append mode.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
This struct is a compact representation of a valid (non-zero power of two) alignment.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.