LLVM 20.0.0git
Analysis.cpp
Go to the documentation of this file.
1//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines several CodeGen-specific LLVM IR analysis utilities.
10//
11//===----------------------------------------------------------------------===//
12
19#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/Function.h"
24#include "llvm/IR/Module.h"
27
28using namespace llvm;
29
30/// Compute the linearized index of a member in a nested aggregate/struct/array
31/// by recursing and accumulating CurIndex as long as there are indices in the
32/// index list.
34 const unsigned *Indices,
35 const unsigned *IndicesEnd,
36 unsigned CurIndex) {
37 // Base case: We're done.
38 if (Indices && Indices == IndicesEnd)
39 return CurIndex;
40
41 // Given a struct type, recursively traverse the elements.
42 if (StructType *STy = dyn_cast<StructType>(Ty)) {
43 for (auto I : llvm::enumerate(STy->elements())) {
44 Type *ET = I.value();
45 if (Indices && *Indices == I.index())
46 return ComputeLinearIndex(ET, Indices + 1, IndicesEnd, CurIndex);
47 CurIndex = ComputeLinearIndex(ET, nullptr, nullptr, CurIndex);
48 }
49 assert(!Indices && "Unexpected out of bound");
50 return CurIndex;
51 }
52 // Given an array type, recursively traverse the elements.
53 else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
54 Type *EltTy = ATy->getElementType();
55 unsigned NumElts = ATy->getNumElements();
56 // Compute the Linear offset when jumping one element of the array
57 unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
58 if (Indices) {
59 assert(*Indices < NumElts && "Unexpected out of bound");
60 // If the indice is inside the array, compute the index to the requested
61 // elt and recurse inside the element with the end of the indices list
62 CurIndex += EltLinearOffset* *Indices;
63 return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
64 }
65 CurIndex += EltLinearOffset*NumElts;
66 return CurIndex;
67 }
68 // We haven't found the type we're looking for, so keep searching.
69 return CurIndex + 1;
70}
71
72/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
73/// EVTs that represent all the individual underlying
74/// non-aggregate types that comprise it.
75///
76/// If Offsets is non-null, it points to a vector to be filled in
77/// with the in-memory offsets of each of the individual values.
78///
80 Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
83 TypeSize StartingOffset) {
84 assert((Ty->isScalableTy() == StartingOffset.isScalable() ||
85 StartingOffset.isZero()) &&
86 "Offset/TypeSize mismatch!");
87 // Given a struct type, recursively traverse the elements.
88 if (StructType *STy = dyn_cast<StructType>(Ty)) {
89 // If the Offsets aren't needed, don't query the struct layout. This allows
90 // us to support structs with scalable vectors for operations that don't
91 // need offsets.
92 const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
93 for (StructType::element_iterator EB = STy->element_begin(),
94 EI = EB,
95 EE = STy->element_end();
96 EI != EE; ++EI) {
97 // Don't compute the element offset if we didn't get a StructLayout above.
98 TypeSize EltOffset =
99 SL ? SL->getElementOffset(EI - EB) : TypeSize::getZero();
100 ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
101 StartingOffset + EltOffset);
102 }
103 return;
104 }
105 // Given an array type, recursively traverse the elements.
106 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
107 Type *EltTy = ATy->getElementType();
108 TypeSize EltSize = DL.getTypeAllocSize(EltTy);
109 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
110 ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
111 StartingOffset + i * EltSize);
112 return;
113 }
114 // Interpret void as zero return values.
115 if (Ty->isVoidTy())
116 return;
117 // Base case: we can get an EVT for this LLVM IR type.
118 ValueVTs.push_back(TLI.getValueType(DL, Ty));
119 if (MemVTs)
120 MemVTs->push_back(TLI.getMemValueType(DL, Ty));
121 if (Offsets)
122 Offsets->push_back(StartingOffset);
123}
124
126 Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
127 SmallVectorImpl<EVT> *MemVTs,
128 SmallVectorImpl<uint64_t> *FixedOffsets,
129 uint64_t StartingOffset) {
130 TypeSize Offset = TypeSize::getFixed(StartingOffset);
131 if (FixedOffsets) {
133 ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
134 for (TypeSize Offset : Offsets)
135 FixedOffsets->push_back(Offset.getFixedValue());
136 } else {
137 ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
138 }
139}
140
142 SmallVectorImpl<LLT> &ValueTys,
144 uint64_t StartingOffset) {
145 // Given a struct type, recursively traverse the elements.
146 if (StructType *STy = dyn_cast<StructType>(&Ty)) {
147 // If the Offsets aren't needed, don't query the struct layout. This allows
148 // us to support structs with scalable vectors for operations that don't
149 // need offsets.
150 const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
151 for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
152 uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
153 computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
154 StartingOffset + EltOffset);
155 }
156 return;
157 }
158 // Given an array type, recursively traverse the elements.
159 if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
160 Type *EltTy = ATy->getElementType();
161 uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
162 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
163 computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
164 StartingOffset + i * EltSize);
165 return;
166 }
167 // Interpret void as zero return values.
168 if (Ty.isVoidTy())
169 return;
170 // Base case: we can get an LLT for this LLVM IR type.
171 ValueTys.push_back(getLLTForType(Ty, DL));
172 if (Offsets != nullptr)
173 Offsets->push_back(StartingOffset * 8);
174}
175
176/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
178 V = V->stripPointerCasts();
179 GlobalValue *GV = dyn_cast<GlobalValue>(V);
180 GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
181
182 if (Var && Var->getName() == "llvm.eh.catch.all.value") {
183 assert(Var->hasInitializer() &&
184 "The EH catch-all value must have an initializer");
185 Value *Init = Var->getInitializer();
186 GV = dyn_cast<GlobalValue>(Init);
187 if (!GV) V = cast<ConstantPointerNull>(Init);
188 }
189
190 assert((GV || isa<ConstantPointerNull>(V)) &&
191 "TypeInfo must be a global variable or NULL");
192 return GV;
193}
194
195/// getFCmpCondCode - Return the ISD condition code corresponding to
196/// the given LLVM IR floating-point condition code. This includes
197/// consideration of global floating-point math flags.
198///
200 switch (Pred) {
201 case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
202 case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
203 case FCmpInst::FCMP_OGT: return ISD::SETOGT;
204 case FCmpInst::FCMP_OGE: return ISD::SETOGE;
205 case FCmpInst::FCMP_OLT: return ISD::SETOLT;
206 case FCmpInst::FCMP_OLE: return ISD::SETOLE;
207 case FCmpInst::FCMP_ONE: return ISD::SETONE;
208 case FCmpInst::FCMP_ORD: return ISD::SETO;
209 case FCmpInst::FCMP_UNO: return ISD::SETUO;
210 case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
211 case FCmpInst::FCMP_UGT: return ISD::SETUGT;
212 case FCmpInst::FCMP_UGE: return ISD::SETUGE;
213 case FCmpInst::FCMP_ULT: return ISD::SETULT;
214 case FCmpInst::FCMP_ULE: return ISD::SETULE;
215 case FCmpInst::FCMP_UNE: return ISD::SETUNE;
216 case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
217 default: llvm_unreachable("Invalid FCmp predicate opcode!");
218 }
219}
220
222 switch (CC) {
223 case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
224 case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
225 case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
226 case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
227 case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
228 case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
229 default: return CC;
230 }
231}
232
234 switch (Pred) {
235 case ICmpInst::ICMP_EQ: return ISD::SETEQ;
236 case ICmpInst::ICMP_NE: return ISD::SETNE;
237 case ICmpInst::ICMP_SLE: return ISD::SETLE;
238 case ICmpInst::ICMP_ULE: return ISD::SETULE;
239 case ICmpInst::ICMP_SGE: return ISD::SETGE;
240 case ICmpInst::ICMP_UGE: return ISD::SETUGE;
241 case ICmpInst::ICMP_SLT: return ISD::SETLT;
242 case ICmpInst::ICMP_ULT: return ISD::SETULT;
243 case ICmpInst::ICMP_SGT: return ISD::SETGT;
244 case ICmpInst::ICMP_UGT: return ISD::SETUGT;
245 default:
246 llvm_unreachable("Invalid ICmp predicate opcode!");
247 }
248}
249
251 switch (Pred) {
252 case ISD::SETEQ:
253 return ICmpInst::ICMP_EQ;
254 case ISD::SETNE:
255 return ICmpInst::ICMP_NE;
256 case ISD::SETLE:
257 return ICmpInst::ICMP_SLE;
258 case ISD::SETULE:
259 return ICmpInst::ICMP_ULE;
260 case ISD::SETGE:
261 return ICmpInst::ICMP_SGE;
262 case ISD::SETUGE:
263 return ICmpInst::ICMP_UGE;
264 case ISD::SETLT:
265 return ICmpInst::ICMP_SLT;
266 case ISD::SETULT:
267 return ICmpInst::ICMP_ULT;
268 case ISD::SETGT:
269 return ICmpInst::ICMP_SGT;
270 case ISD::SETUGT:
271 return ICmpInst::ICMP_UGT;
272 default:
273 llvm_unreachable("Invalid ISD integer condition code!");
274 }
275}
276
277static bool isNoopBitcast(Type *T1, Type *T2,
278 const TargetLoweringBase& TLI) {
279 return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
280 (isa<VectorType>(T1) && isa<VectorType>(T2) &&
282}
283
284/// Look through operations that will be free to find the earliest source of
285/// this value.
286///
287/// @param ValLoc If V has aggregate type, we will be interested in a particular
288/// scalar component. This records its address; the reverse of this list gives a
289/// sequence of indices appropriate for an extractvalue to locate the important
290/// value. This value is updated during the function and on exit will indicate
291/// similar information for the Value returned.
292///
293/// @param DataBits If this function looks through truncate instructions, this
294/// will record the smallest size attained.
295static const Value *getNoopInput(const Value *V,
297 unsigned &DataBits,
298 const TargetLoweringBase &TLI,
299 const DataLayout &DL) {
300 while (true) {
301 // Try to look through V1; if V1 is not an instruction, it can't be looked
302 // through.
303 const Instruction *I = dyn_cast<Instruction>(V);
304 if (!I || I->getNumOperands() == 0) return V;
305 const Value *NoopInput = nullptr;
306
307 Value *Op = I->getOperand(0);
308 if (isa<BitCastInst>(I)) {
309 // Look through truly no-op bitcasts.
310 if (isNoopBitcast(Op->getType(), I->getType(), TLI))
311 NoopInput = Op;
312 } else if (isa<GetElementPtrInst>(I)) {
313 // Look through getelementptr
314 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
315 NoopInput = Op;
316 } else if (isa<IntToPtrInst>(I)) {
317 // Look through inttoptr.
318 // Make sure this isn't a truncating or extending cast. We could
319 // support this eventually, but don't bother for now.
320 if (!isa<VectorType>(I->getType()) &&
321 DL.getPointerSizeInBits() ==
322 cast<IntegerType>(Op->getType())->getBitWidth())
323 NoopInput = Op;
324 } else if (isa<PtrToIntInst>(I)) {
325 // Look through ptrtoint.
326 // Make sure this isn't a truncating or extending cast. We could
327 // support this eventually, but don't bother for now.
328 if (!isa<VectorType>(I->getType()) &&
329 DL.getPointerSizeInBits() ==
330 cast<IntegerType>(I->getType())->getBitWidth())
331 NoopInput = Op;
332 } else if (isa<TruncInst>(I) &&
333 TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
334 DataBits =
335 std::min((uint64_t)DataBits,
336 I->getType()->getPrimitiveSizeInBits().getFixedValue());
337 NoopInput = Op;
338 } else if (auto *CB = dyn_cast<CallBase>(I)) {
339 const Value *ReturnedOp = CB->getReturnedArgOperand();
340 if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
341 NoopInput = ReturnedOp;
342 } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
343 // Value may come from either the aggregate or the scalar
344 ArrayRef<unsigned> InsertLoc = IVI->getIndices();
345 if (ValLoc.size() >= InsertLoc.size() &&
346 std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
347 // The type being inserted is a nested sub-type of the aggregate; we
348 // have to remove those initial indices to get the location we're
349 // interested in for the operand.
350 ValLoc.resize(ValLoc.size() - InsertLoc.size());
351 NoopInput = IVI->getInsertedValueOperand();
352 } else {
353 // The struct we're inserting into has the value we're interested in, no
354 // change of address.
355 NoopInput = Op;
356 }
357 } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
358 // The part we're interested in will inevitably be some sub-section of the
359 // previous aggregate. Combine the two paths to obtain the true address of
360 // our element.
361 ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
362 ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
363 NoopInput = Op;
364 }
365 // Terminate if we couldn't find anything to look through.
366 if (!NoopInput)
367 return V;
368
369 V = NoopInput;
370 }
371}
372
373/// Return true if this scalar return value only has bits discarded on its path
374/// from the "tail call" to the "ret". This includes the obvious noop
375/// instructions handled by getNoopInput above as well as free truncations (or
376/// extensions prior to the call).
377static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
378 SmallVectorImpl<unsigned> &RetIndices,
379 SmallVectorImpl<unsigned> &CallIndices,
380 bool AllowDifferingSizes,
381 const TargetLoweringBase &TLI,
382 const DataLayout &DL) {
383
384 // Trace the sub-value needed by the return value as far back up the graph as
385 // possible, in the hope that it will intersect with the value produced by the
386 // call. In the simple case with no "returned" attribute, the hope is actually
387 // that we end up back at the tail call instruction itself.
388 unsigned BitsRequired = UINT_MAX;
389 RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
390
391 // If this slot in the value returned is undef, it doesn't matter what the
392 // call puts there, it'll be fine.
393 if (isa<UndefValue>(RetVal))
394 return true;
395
396 // Now do a similar search up through the graph to find where the value
397 // actually returned by the "tail call" comes from. In the simple case without
398 // a "returned" attribute, the search will be blocked immediately and the loop
399 // a Noop.
400 unsigned BitsProvided = UINT_MAX;
401 CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
402
403 // There's no hope if we can't actually trace them to (the same part of!) the
404 // same value.
405 if (CallVal != RetVal || CallIndices != RetIndices)
406 return false;
407
408 // However, intervening truncates may have made the call non-tail. Make sure
409 // all the bits that are needed by the "ret" have been provided by the "tail
410 // call". FIXME: with sufficiently cunning bit-tracking, we could look through
411 // extensions too.
412 if (BitsProvided < BitsRequired ||
413 (!AllowDifferingSizes && BitsProvided != BitsRequired))
414 return false;
415
416 return true;
417}
418
419/// For an aggregate type, determine whether a given index is within bounds or
420/// not.
421static bool indexReallyValid(Type *T, unsigned Idx) {
422 if (ArrayType *AT = dyn_cast<ArrayType>(T))
423 return Idx < AT->getNumElements();
424
425 return Idx < cast<StructType>(T)->getNumElements();
426}
427
428/// Move the given iterators to the next leaf type in depth first traversal.
429///
430/// Performs a depth-first traversal of the type as specified by its arguments,
431/// stopping at the next leaf node (which may be a legitimate scalar type or an
432/// empty struct or array).
433///
434/// @param SubTypes List of the partial components making up the type from
435/// outermost to innermost non-empty aggregate. The element currently
436/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
437///
438/// @param Path Set of extractvalue indices leading from the outermost type
439/// (SubTypes[0]) to the leaf node currently represented.
440///
441/// @returns true if a new type was found, false otherwise. Calling this
442/// function again on a finished iterator will repeatedly return
443/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
444/// aggregate or a non-aggregate
447 // First march back up the tree until we can successfully increment one of the
448 // coordinates in Path.
449 while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
450 Path.pop_back();
451 SubTypes.pop_back();
452 }
453
454 // If we reached the top, then the iterator is done.
455 if (Path.empty())
456 return false;
457
458 // We know there's *some* valid leaf now, so march back down the tree picking
459 // out the left-most element at each node.
460 ++Path.back();
461 Type *DeeperType =
462 ExtractValueInst::getIndexedType(SubTypes.back(), Path.back());
463 while (DeeperType->isAggregateType()) {
464 if (!indexReallyValid(DeeperType, 0))
465 return true;
466
467 SubTypes.push_back(DeeperType);
468 Path.push_back(0);
469
470 DeeperType = ExtractValueInst::getIndexedType(DeeperType, 0);
471 }
472
473 return true;
474}
475
476/// Find the first non-empty, scalar-like type in Next and setup the iterator
477/// components.
478///
479/// Assuming Next is an aggregate of some kind, this function will traverse the
480/// tree from left to right (i.e. depth-first) looking for the first
481/// non-aggregate type which will play a role in function return.
482///
483/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
484/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
485/// i32 in that type.
486static bool firstRealType(Type *Next, SmallVectorImpl<Type *> &SubTypes,
488 // First initialise the iterator components to the first "leaf" node
489 // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
490 // despite nominally being an aggregate).
491 while (Type *FirstInner = ExtractValueInst::getIndexedType(Next, 0)) {
492 SubTypes.push_back(Next);
493 Path.push_back(0);
494 Next = FirstInner;
495 }
496
497 // If there's no Path now, Next was originally scalar already (or empty
498 // leaf). We're done.
499 if (Path.empty())
500 return true;
501
502 // Otherwise, use normal iteration to keep looking through the tree until we
503 // find a non-aggregate type.
504 while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
505 ->isAggregateType()) {
506 if (!advanceToNextLeafType(SubTypes, Path))
507 return false;
508 }
509
510 return true;
511}
512
513/// Set the iterator data-structures to the next non-empty, non-aggregate
514/// subtype.
517 do {
518 if (!advanceToNextLeafType(SubTypes, Path))
519 return false;
520
521 assert(!Path.empty() && "found a leaf but didn't set the path?");
522 } while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
523 ->isAggregateType());
524
525 return true;
526}
527
528
529/// Test if the given instruction is in a position to be optimized
530/// with a tail-call. This roughly means that it's in a block with
531/// a return and there's nothing that needs to be scheduled
532/// between it and the return.
533///
534/// This function only tests target-independent requirements.
536 bool ReturnsFirstArg) {
537 const BasicBlock *ExitBB = Call.getParent();
538 const Instruction *Term = ExitBB->getTerminator();
539 const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
540
541 // The block must end in a return statement or unreachable.
542 //
543 // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
544 // an unreachable, for now. The way tailcall optimization is currently
545 // implemented means it will add an epilogue followed by a jump. That is
546 // not profitable. Also, if the callee is a special function (e.g.
547 // longjmp on x86), it can end up causing miscompilation that has not
548 // been fully understood.
549 if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
550 Call.getCallingConv() != CallingConv::Tail &&
551 Call.getCallingConv() != CallingConv::SwiftTail) ||
552 !isa<UnreachableInst>(Term)))
553 return false;
554
555 // If I will have a chain, make sure no other instruction that will have a
556 // chain interposes between I and the return.
557 // Check for all calls including speculatable functions.
558 for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
559 if (&*BBI == &Call)
560 break;
561 // Debug info intrinsics do not get in the way of tail call optimization.
562 // Pseudo probe intrinsics do not block tail call optimization either.
563 if (BBI->isDebugOrPseudoInst())
564 continue;
565 // A lifetime end, assume or noalias.decl intrinsic should not stop tail
566 // call optimization.
567 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
568 if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
569 II->getIntrinsicID() == Intrinsic::assume ||
570 II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
571 continue;
572 if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
574 return false;
575 }
576
577 const Function *F = ExitBB->getParent();
579 F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering(),
580 ReturnsFirstArg);
581}
582
584 const ReturnInst *Ret,
585 const TargetLoweringBase &TLI,
586 bool *AllowDifferingSizes) {
587 // ADS may be null, so don't write to it directly.
588 bool DummyADS;
589 bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
590 ADS = true;
591
592 AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs());
593 AttrBuilder CalleeAttrs(F->getContext(),
594 cast<CallInst>(I)->getAttributes().getRetAttrs());
595
596 // Following attributes are completely benign as far as calling convention
597 // goes, they shouldn't affect whether the call is a tail call.
598 for (const auto &Attr :
599 {Attribute::Alignment, Attribute::Dereferenceable,
600 Attribute::DereferenceableOrNull, Attribute::NoAlias,
601 Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) {
602 CallerAttrs.removeAttribute(Attr);
603 CalleeAttrs.removeAttribute(Attr);
604 }
605
606 if (CallerAttrs.contains(Attribute::ZExt)) {
607 if (!CalleeAttrs.contains(Attribute::ZExt))
608 return false;
609
610 ADS = false;
611 CallerAttrs.removeAttribute(Attribute::ZExt);
612 CalleeAttrs.removeAttribute(Attribute::ZExt);
613 } else if (CallerAttrs.contains(Attribute::SExt)) {
614 if (!CalleeAttrs.contains(Attribute::SExt))
615 return false;
616
617 ADS = false;
618 CallerAttrs.removeAttribute(Attribute::SExt);
619 CalleeAttrs.removeAttribute(Attribute::SExt);
620 }
621
622 // Drop sext and zext return attributes if the result is not used.
623 // This enables tail calls for code like:
624 //
625 // define void @caller() {
626 // entry:
627 // %unused_result = tail call zeroext i1 @callee()
628 // br label %retlabel
629 // retlabel:
630 // ret void
631 // }
632 if (I->use_empty()) {
633 CalleeAttrs.removeAttribute(Attribute::SExt);
634 CalleeAttrs.removeAttribute(Attribute::ZExt);
635 }
636
637 // If they're still different, there's some facet we don't understand
638 // (currently only "inreg", but in future who knows). It may be OK but the
639 // only safe option is to reject the tail call.
640 return CallerAttrs == CalleeAttrs;
641}
642
644 const Instruction *I,
645 const ReturnInst *Ret,
646 const TargetLoweringBase &TLI,
647 bool ReturnsFirstArg) {
648 // If the block ends with a void return or unreachable, it doesn't matter
649 // what the call's return type is.
650 if (!Ret || Ret->getNumOperands() == 0) return true;
651
652 // If the return value is undef, it doesn't matter what the call's
653 // return type is.
654 if (isa<UndefValue>(Ret->getOperand(0))) return true;
655
656 // Make sure the attributes attached to each return are compatible.
657 bool AllowDifferingSizes;
658 if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
659 return false;
660
661 // If the return value is the first argument of the call.
662 if (ReturnsFirstArg)
663 return true;
664
665 const Value *RetVal = Ret->getOperand(0), *CallVal = I;
666 SmallVector<unsigned, 4> RetPath, CallPath;
667 SmallVector<Type *, 4> RetSubTypes, CallSubTypes;
668
669 bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
670 bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
671
672 // Nothing's actually returned, it doesn't matter what the callee put there
673 // it's a valid tail call.
674 if (RetEmpty)
675 return true;
676
677 // Iterate pairwise through each of the value types making up the tail call
678 // and the corresponding return. For each one we want to know whether it's
679 // essentially going directly from the tail call to the ret, via operations
680 // that end up not generating any code.
681 //
682 // We allow a certain amount of covariance here. For example it's permitted
683 // for the tail call to define more bits than the ret actually cares about
684 // (e.g. via a truncate).
685 do {
686 if (CallEmpty) {
687 // We've exhausted the values produced by the tail call instruction, the
688 // rest are essentially undef. The type doesn't really matter, but we need
689 // *something*.
690 Type *SlotType =
691 ExtractValueInst::getIndexedType(RetSubTypes.back(), RetPath.back());
692 CallVal = UndefValue::get(SlotType);
693 }
694
695 // The manipulations performed when we're looking through an insertvalue or
696 // an extractvalue would happen at the front of the RetPath list, so since
697 // we have to copy it anyway it's more efficient to create a reversed copy.
698 SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath));
699 SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath));
700
701 // Finally, we can check whether the value produced by the tail call at this
702 // index is compatible with the value we return.
703 if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
704 AllowDifferingSizes, TLI,
705 F->getDataLayout()))
706 return false;
707
708 CallEmpty = !nextRealType(CallSubTypes, CallPath);
709 } while(nextRealType(RetSubTypes, RetPath));
710
711 return true;
712}
713
715 const ReturnInst *Ret = dyn_cast<ReturnInst>(CI.getParent()->getTerminator());
716 Value *RetVal = Ret ? Ret->getReturnValue() : nullptr;
717 bool ReturnsFirstArg = false;
718 if (RetVal && ((RetVal == CI.getArgOperand(0))))
719 ReturnsFirstArg = true;
720 return ReturnsFirstArg;
721}
722
724 DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
725 const MachineBasicBlock *MBB) {
727 while (!Worklist.empty()) {
728 const MachineBasicBlock *Visiting = Worklist.pop_back_val();
729 // Don't follow blocks which start new scopes.
730 if (Visiting->isEHPad() && Visiting != MBB)
731 continue;
732
733 // Add this MBB to our scope.
734 auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));
735
736 // Don't revisit blocks.
737 if (!P.second) {
738 assert(P.first->second == EHScope && "MBB is part of two scopes!");
739 continue;
740 }
741
742 // Returns are boundaries where scope transfer can occur, don't follow
743 // successors.
744 if (Visiting->isEHScopeReturnBlock())
745 continue;
746
747 append_range(Worklist, Visiting->successors());
748 }
749}
750
754
755 // We don't have anything to do if there aren't any EH pads.
756 if (!MF.hasEHScopes())
757 return EHScopeMembership;
758
759 int EntryBBNumber = MF.front().getNumber();
760 bool IsSEH = isAsynchronousEHPersonality(
762
768 for (const MachineBasicBlock &MBB : MF) {
769 if (MBB.isEHScopeEntry()) {
770 EHScopeBlocks.push_back(&MBB);
771 } else if (IsSEH && MBB.isEHPad()) {
772 SEHCatchPads.push_back(&MBB);
773 } else if (MBB.pred_empty()) {
774 UnreachableBlocks.push_back(&MBB);
775 }
776
778
779 // CatchPads are not scopes for SEH so do not consider CatchRet to
780 // transfer control to another scope.
781 if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
782 continue;
783
784 // FIXME: SEH CatchPads are not necessarily in the parent function:
785 // they could be inside a finally block.
786 const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
787 const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
788 CatchRetSuccessors.push_back(
789 {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
790 }
791
792 // We don't have anything to do if there aren't any EH pads.
793 if (EHScopeBlocks.empty())
794 return EHScopeMembership;
795
796 // Identify all the basic blocks reachable from the function entry.
797 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front());
798 // All blocks not part of a scope are in the parent function.
799 for (const MachineBasicBlock *MBB : UnreachableBlocks)
800 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
801 // Next, identify all the blocks inside the scopes.
802 for (const MachineBasicBlock *MBB : EHScopeBlocks)
803 collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB);
804 // SEH CatchPads aren't really scopes, handle them separately.
805 for (const MachineBasicBlock *MBB : SEHCatchPads)
806 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
807 // Finally, identify all the targets of a catchret.
808 for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
809 CatchRetSuccessors)
810 collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,
811 CatchRetPair.first);
812 return EHScopeMembership;
813}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isNoopBitcast(Type *T1, Type *T2, const TargetLoweringBase &TLI)
Definition: Analysis.cpp:277
static bool firstRealType(Type *Next, SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Find the first non-empty, scalar-like type in Next and setup the iterator components.
Definition: Analysis.cpp:486
static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, SmallVectorImpl< unsigned > &RetIndices, SmallVectorImpl< unsigned > &CallIndices, bool AllowDifferingSizes, const TargetLoweringBase &TLI, const DataLayout &DL)
Return true if this scalar return value only has bits discarded on its path from the "tail call" to t...
Definition: Analysis.cpp:377
static void collectEHScopeMembers(DenseMap< const MachineBasicBlock *, int > &EHScopeMembership, int EHScope, const MachineBasicBlock *MBB)
Definition: Analysis.cpp:723
static bool indexReallyValid(Type *T, unsigned Idx)
For an aggregate type, determine whether a given index is within bounds or not.
Definition: Analysis.cpp:421
static bool nextRealType(SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Set the iterator data-structures to the next non-empty, non-aggregate subtype.
Definition: Analysis.cpp:515
static bool advanceToNextLeafType(SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Move the given iterators to the next leaf type in depth first traversal.
Definition: Analysis.cpp:445
static const Value * getNoopInput(const Value *V, SmallVectorImpl< unsigned > &ValLoc, unsigned &DataBits, const TargetLoweringBase &TLI, const DataLayout &DL)
Look through operations that will be free to find the earliest source of this value.
Definition: Analysis.cpp:295
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
Class to represent array types.
Definition: DerivedTypes.h:371
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This instruction extracts a struct member or array element value from an aggregate value.
static Type * getIndexedType(Type *Agg, ArrayRef< unsigned > Idxs)
Returns the type of the element that would be extracted with an extractvalue instruction with the spe...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1993
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This instruction inserts a struct field of array element value into an aggregate value.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
bool isEHPad() const
Returns true if the block is a landing pad.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
iterator_range< succ_iterator > successors()
bool isEHScopeReturnBlock() const
Convenience function that returns true if the bock ends in a EH scope return instruction.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
Return a value (possibly void), from a function.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:571
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:600
Class to represent struct types.
Definition: DerivedTypes.h:216
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:329
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getZero()
Definition: TypeSize.h:351
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:291
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr bool isZero() const
Definition: TypeSize.h:156
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
Definition: Analysis.cpp:233
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2431
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool ReturnsFirstArg=false)
Test if given that the input instruction is in the tail call position if the return type or any attri...
Definition: Analysis.cpp:643
void computeValueLLTs(const DataLayout &DL, Type &Ty, SmallVectorImpl< LLT > &ValueTys, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
computeValueLLTs - Given an LLVM IR type, compute a sequence of LLTs that represent all the individua...
Definition: Analysis.cpp:141
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:583
bool isInTailCallPosition(const CallBase &Call, const TargetMachine &TM, bool ReturnsFirstArg=false)
Test if the given instruction is in a position to be optimized with a tail-call.
Definition: Analysis.cpp:535
DWARFExpression::Operation Op
ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC)
getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats, return the equivalent code if w...
Definition: Analysis.cpp:221
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
bool funcReturnsFirstArgOfCall(const CallInst &CI)
Returns true if the parent of CI returns CI's first argument after calling CI.
Definition: Analysis.cpp:714
GlobalValue * ExtractTypeInfo(Value *V)
ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
Definition: Analysis.cpp:177
unsigned ComputeLinearIndex(Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, unsigned CurIndex=0)
Compute the linearized index of a member in a nested aggregate/struct/array.
Definition: Analysis.cpp:33
DenseMap< const MachineBasicBlock *, int > getEHScopeMembership(const MachineFunction &MF)
Definition: Analysis.cpp:752
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275