Line data Source code
1 : //===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines several CodeGen-specific LLVM IR analysis utilities.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "llvm/CodeGen/Analysis.h"
15 : #include "llvm/Analysis/ValueTracking.h"
16 : #include "llvm/CodeGen/MachineFunction.h"
17 : #include "llvm/CodeGen/TargetInstrInfo.h"
18 : #include "llvm/CodeGen/TargetLowering.h"
19 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
20 : #include "llvm/IR/DataLayout.h"
21 : #include "llvm/IR/DerivedTypes.h"
22 : #include "llvm/IR/Function.h"
23 : #include "llvm/IR/Instructions.h"
24 : #include "llvm/IR/IntrinsicInst.h"
25 : #include "llvm/IR/LLVMContext.h"
26 : #include "llvm/IR/Module.h"
27 : #include "llvm/Support/ErrorHandling.h"
28 : #include "llvm/Support/MathExtras.h"
29 : #include "llvm/Transforms/Utils/GlobalStatus.h"
30 :
31 : using namespace llvm;
32 :
33 : /// Compute the linearized index of a member in a nested aggregate/struct/array
34 : /// by recursing and accumulating CurIndex as long as there are indices in the
35 : /// index list.
36 981745 : unsigned llvm::ComputeLinearIndex(Type *Ty,
37 : const unsigned *Indices,
38 : const unsigned *IndicesEnd,
39 : unsigned CurIndex) {
40 : // Base case: We're done.
41 1662881 : if (Indices && Indices == IndicesEnd)
42 : return CurIndex;
43 :
44 : // Given a struct type, recursively traverse the elements.
45 : if (StructType *STy = dyn_cast<StructType>(Ty)) {
46 300387 : for (StructType::element_iterator EB = STy->element_begin(),
47 : EI = EB,
48 680722 : EE = STy->element_end();
49 981109 : EI != EE; ++EI) {
50 981096 : if (Indices && *Indices == unsigned(EI - EB))
51 680709 : return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
52 300387 : CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
53 : }
54 : assert(!Indices && "Unexpected out of bound");
55 13 : return CurIndex;
56 : }
57 : // Given an array type, recursively traverse the elements.
58 : else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
59 435 : Type *EltTy = ATy->getElementType();
60 435 : unsigned NumElts = ATy->getNumElements();
61 : // Compute the Linear offset when jumping one element of the array
62 435 : unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
63 435 : if (Indices) {
64 : assert(*Indices < NumElts && "Unexpected out of bound");
65 : // If the indice is inside the array, compute the index to the requested
66 : // elt and recurse inside the element with the end of the indices list
67 427 : CurIndex += EltLinearOffset* *Indices;
68 427 : return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
69 : }
70 8 : CurIndex += EltLinearOffset*NumElts;
71 8 : return CurIndex;
72 : }
73 : // We haven't found the type we're looking for, so keep searching.
74 300801 : return CurIndex + 1;
75 : }
76 :
77 : /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
78 : /// EVTs that represent all the individual underlying
79 : /// non-aggregate types that comprise it.
80 : ///
81 : /// If Offsets is non-null, it points to a vector to be filled in
82 : /// with the in-memory offsets of each of the individual values.
83 : ///
84 29573196 : void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
85 : Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
86 : SmallVectorImpl<uint64_t> *Offsets,
87 : uint64_t StartingOffset) {
88 : // Given a struct type, recursively traverse the elements.
89 : if (StructType *STy = dyn_cast<StructType>(Ty)) {
90 1627325 : const StructLayout *SL = DL.getStructLayout(STy);
91 3261318 : for (StructType::element_iterator EB = STy->element_begin(),
92 : EI = EB,
93 1627325 : EE = STy->element_end();
94 4888643 : EI != EE; ++EI)
95 3261318 : ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
96 3261318 : StartingOffset + SL->getElementOffset(EI - EB));
97 : return;
98 : }
99 : // Given an array type, recursively traverse the elements.
100 : if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
101 3892 : Type *EltTy = ATy->getElementType();
102 3892 : uint64_t EltSize = DL.getTypeAllocSize(EltTy);
103 32416 : for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
104 28524 : ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
105 28524 : StartingOffset + i * EltSize);
106 : return;
107 : }
108 : // Interpret void as zero return values.
109 27941979 : if (Ty->isVoidTy())
110 : return;
111 : // Base case: we can get an EVT for this LLVM IR type.
112 23963173 : ValueVTs.push_back(TLI.getValueType(DL, Ty));
113 23963173 : if (Offsets)
114 4828562 : Offsets->push_back(StartingOffset);
115 : }
116 :
117 : /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
118 11388 : GlobalValue *llvm::ExtractTypeInfo(Value *V) {
119 : V = V->stripPointerCasts();
120 : GlobalValue *GV = dyn_cast<GlobalValue>(V);
121 : GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
122 :
123 11385 : if (Var && Var->getName() == "llvm.eh.catch.all.value") {
124 : assert(Var->hasInitializer() &&
125 : "The EH catch-all value must have an initializer");
126 : Value *Init = Var->getInitializer();
127 : GV = dyn_cast<GlobalValue>(Init);
128 : if (!GV) V = cast<ConstantPointerNull>(Init);
129 : }
130 :
131 : assert((GV || isa<ConstantPointerNull>(V)) &&
132 : "TypeInfo must be a global variable or NULL");
133 11388 : return GV;
134 : }
135 :
136 : /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
137 : /// processed uses a memory 'm' constraint.
138 : bool
139 0 : llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
140 : const TargetLowering &TLI) {
141 0 : for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
142 0 : InlineAsm::ConstraintInfo &CI = CInfos[i];
143 0 : for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
144 0 : TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
145 0 : if (CType == TargetLowering::C_Memory)
146 : return true;
147 : }
148 :
149 : // Indirect operand accesses access memory.
150 0 : if (CI.isIndirect)
151 : return true;
152 : }
153 :
154 : return false;
155 : }
156 :
157 : /// getFCmpCondCode - Return the ISD condition code corresponding to
158 : /// the given LLVM IR floating-point condition code. This includes
159 : /// consideration of global floating-point math flags.
160 : ///
161 9719 : ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
162 : switch (Pred) {
163 : case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
164 : case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
165 : case FCmpInst::FCMP_OGT: return ISD::SETOGT;
166 : case FCmpInst::FCMP_OGE: return ISD::SETOGE;
167 : case FCmpInst::FCMP_OLT: return ISD::SETOLT;
168 : case FCmpInst::FCMP_OLE: return ISD::SETOLE;
169 : case FCmpInst::FCMP_ONE: return ISD::SETONE;
170 : case FCmpInst::FCMP_ORD: return ISD::SETO;
171 : case FCmpInst::FCMP_UNO: return ISD::SETUO;
172 : case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
173 : case FCmpInst::FCMP_UGT: return ISD::SETUGT;
174 : case FCmpInst::FCMP_UGE: return ISD::SETUGE;
175 : case FCmpInst::FCMP_ULT: return ISD::SETULT;
176 : case FCmpInst::FCMP_ULE: return ISD::SETULE;
177 : case FCmpInst::FCMP_UNE: return ISD::SETUNE;
178 : case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
179 0 : default: llvm_unreachable("Invalid FCmp predicate opcode!");
180 : }
181 : }
182 :
183 1082 : ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
184 1082 : switch (CC) {
185 : case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
186 3 : case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
187 376 : case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
188 129 : case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
189 383 : case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
190 150 : case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
191 0 : default: return CC;
192 : }
193 : }
194 :
195 : /// getICmpCondCode - Return the ISD condition code corresponding to
196 : /// the given LLVM IR integer condition code.
197 : ///
198 99410 : ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
199 : switch (Pred) {
200 : case ICmpInst::ICMP_EQ: return ISD::SETEQ;
201 : case ICmpInst::ICMP_NE: return ISD::SETNE;
202 : case ICmpInst::ICMP_SLE: return ISD::SETLE;
203 : case ICmpInst::ICMP_ULE: return ISD::SETULE;
204 : case ICmpInst::ICMP_SGE: return ISD::SETGE;
205 : case ICmpInst::ICMP_UGE: return ISD::SETUGE;
206 : case ICmpInst::ICMP_SLT: return ISD::SETLT;
207 : case ICmpInst::ICMP_ULT: return ISD::SETULT;
208 : case ICmpInst::ICMP_SGT: return ISD::SETGT;
209 : case ICmpInst::ICMP_UGT: return ISD::SETUGT;
210 0 : default:
211 0 : llvm_unreachable("Invalid ICmp predicate opcode!");
212 : }
213 : }
214 :
215 91 : static bool isNoopBitcast(Type *T1, Type *T2,
216 : const TargetLoweringBase& TLI) {
217 91 : return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
218 1 : (isa<VectorType>(T1) && isa<VectorType>(T2) &&
219 2 : TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
220 : }
221 :
222 : /// Look through operations that will be free to find the earliest source of
223 : /// this value.
224 : ///
225 : /// @param ValLoc If V has aggegate type, we will be interested in a particular
226 : /// scalar component. This records its address; the reverse of this list gives a
227 : /// sequence of indices appropriate for an extractvalue to locate the important
228 : /// value. This value is updated during the function and on exit will indicate
229 : /// similar information for the Value returned.
230 : ///
231 : /// @param DataBits If this function looks through truncate instructions, this
232 : /// will record the smallest size attained.
233 4374 : static const Value *getNoopInput(const Value *V,
234 : SmallVectorImpl<unsigned> &ValLoc,
235 : unsigned &DataBits,
236 : const TargetLoweringBase &TLI,
237 : const DataLayout &DL) {
238 : while (true) {
239 : // Try to look through V1; if V1 is not an instruction, it can't be looked
240 : // through.
241 : const Instruction *I = dyn_cast<Instruction>(V);
242 4609 : if (!I || I->getNumOperands() == 0) return V;
243 : const Value *NoopInput = nullptr;
244 :
245 4273 : Value *Op = I->getOperand(0);
246 4273 : if (isa<BitCastInst>(I)) {
247 : // Look through truly no-op bitcasts.
248 26 : if (isNoopBitcast(Op->getType(), I->getType(), TLI))
249 : NoopInput = Op;
250 4247 : } else if (isa<GetElementPtrInst>(I)) {
251 : // Look through getelementptr
252 5 : if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
253 : NoopInput = Op;
254 4242 : } else if (isa<IntToPtrInst>(I)) {
255 : // Look through inttoptr.
256 : // Make sure this isn't a truncating or extending cast. We could
257 : // support this eventually, but don't bother for now.
258 18 : if (!isa<VectorType>(I->getType()) &&
259 : DL.getPointerSizeInBits() ==
260 6 : cast<IntegerType>(Op->getType())->getBitWidth())
261 : NoopInput = Op;
262 4236 : } else if (isa<PtrToIntInst>(I)) {
263 : // Look through ptrtoint.
264 : // Make sure this isn't a truncating or extending cast. We could
265 : // support this eventually, but don't bother for now.
266 0 : if (!isa<VectorType>(I->getType()) &&
267 : DL.getPointerSizeInBits() ==
268 0 : cast<IntegerType>(I->getType())->getBitWidth())
269 : NoopInput = Op;
270 4263 : } else if (isa<TruncInst>(I) &&
271 27 : TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
272 46 : DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
273 : NoopInput = Op;
274 4213 : } else if (auto CS = ImmutableCallSite(I)) {
275 3764 : const Value *ReturnedOp = CS.getReturnedArgOperand();
276 3764 : if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
277 : NoopInput = ReturnedOp;
278 : } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
279 : // Value may come from either the aggregate or the scalar
280 : ArrayRef<unsigned> InsertLoc = IVI->getIndices();
281 223 : if (ValLoc.size() >= InsertLoc.size() &&
282 : std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
283 : // The type being inserted is a nested sub-type of the aggregate; we
284 : // have to remove those initial indices to get the location we're
285 : // interested in for the operand.
286 44 : ValLoc.resize(ValLoc.size() - InsertLoc.size());
287 : NoopInput = IVI->getInsertedValueOperand();
288 : } else {
289 : // The struct we're inserting into has the value we're interested in, no
290 : // change of address.
291 : NoopInput = Op;
292 : }
293 : } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
294 : // The part we're interested in will inevitably be some sub-section of the
295 : // previous aggregate. Combine the two paths to obtain the true address of
296 : // our element.
297 : ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
298 40 : ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
299 : NoopInput = Op;
300 : }
301 : // Terminate if we couldn't find anything to look through.
302 235 : if (!NoopInput)
303 4038 : return V;
304 :
305 : V = NoopInput;
306 : }
307 : }
308 :
309 : /// Return true if this scalar return value only has bits discarded on its path
310 : /// from the "tail call" to the "ret". This includes the obvious noop
311 : /// instructions handled by getNoopInput above as well as free truncations (or
312 : /// extensions prior to the call).
313 2188 : static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
314 : SmallVectorImpl<unsigned> &RetIndices,
315 : SmallVectorImpl<unsigned> &CallIndices,
316 : bool AllowDifferingSizes,
317 : const TargetLoweringBase &TLI,
318 : const DataLayout &DL) {
319 :
320 : // Trace the sub-value needed by the return value as far back up the graph as
321 : // possible, in the hope that it will intersect with the value produced by the
322 : // call. In the simple case with no "returned" attribute, the hope is actually
323 : // that we end up back at the tail call instruction itself.
324 2188 : unsigned BitsRequired = UINT_MAX;
325 2188 : RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
326 :
327 : // If this slot in the value returned is undef, it doesn't matter what the
328 : // call puts there, it'll be fine.
329 2188 : if (isa<UndefValue>(RetVal))
330 : return true;
331 :
332 : // Now do a similar search up through the graph to find where the value
333 : // actually returned by the "tail call" comes from. In the simple case without
334 : // a "returned" attribute, the search will be blocked immediately and the loop
335 : // a Noop.
336 2186 : unsigned BitsProvided = UINT_MAX;
337 2186 : CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
338 :
339 : // There's no hope if we can't actually trace them to (the same part of!) the
340 : // same value.
341 2186 : if (CallVal != RetVal || CallIndices != RetIndices)
342 : return false;
343 :
344 : // However, intervening truncates may have made the call non-tail. Make sure
345 : // all the bits that are needed by the "ret" have been provided by the "tail
346 : // call". FIXME: with sufficiently cunning bit-tracking, we could look through
347 : // extensions too.
348 1559 : if (BitsProvided < BitsRequired ||
349 88 : (!AllowDifferingSizes && BitsProvided != BitsRequired))
350 3 : return false;
351 :
352 : return true;
353 : }
354 :
355 : /// For an aggregate type, determine whether a given index is within bounds or
356 : /// not.
357 : static bool indexReallyValid(CompositeType *T, unsigned Idx) {
358 : if (ArrayType *AT = dyn_cast<ArrayType>(T))
359 26 : return Idx < AT->getNumElements();
360 :
361 332 : return Idx < cast<StructType>(T)->getNumElements();
362 : }
363 :
364 : /// Move the given iterators to the next leaf type in depth first traversal.
365 : ///
366 : /// Performs a depth-first traversal of the type as specified by its arguments,
367 : /// stopping at the next leaf node (which may be a legitimate scalar type or an
368 : /// empty struct or array).
369 : ///
370 : /// @param SubTypes List of the partial components making up the type from
371 : /// outermost to innermost non-empty aggregate. The element currently
372 : /// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
373 : ///
374 : /// @param Path Set of extractvalue indices leading from the outermost type
375 : /// (SubTypes[0]) to the leaf node currently represented.
376 : ///
377 : /// @returns true if a new type was found, false otherwise. Calling this
378 : /// function again on a finished iterator will repeatedly return
379 : /// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
380 : /// aggregate or a non-aggregate
381 3122 : static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
382 : SmallVectorImpl<unsigned> &Path) {
383 : // First march back up the tree until we can successfully increment one of the
384 : // coordinates in Path.
385 3451 : while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
386 : Path.pop_back();
387 : SubTypes.pop_back();
388 : }
389 :
390 : // If we reached the top, then the iterator is done.
391 3122 : if (Path.empty())
392 : return false;
393 :
394 : // We know there's *some* valid leaf now, so march back down the tree picking
395 : // out the left-most element at each node.
396 137 : ++Path.back();
397 274 : Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back());
398 22 : while (DeeperType->isAggregateType()) {
399 26 : CompositeType *CT = cast<CompositeType>(DeeperType);
400 26 : if (!indexReallyValid(CT, 0))
401 4 : return true;
402 :
403 22 : SubTypes.push_back(CT);
404 22 : Path.push_back(0);
405 :
406 22 : DeeperType = CT->getTypeAtIndex(0U);
407 : }
408 :
409 : return true;
410 : }
411 :
412 : /// Find the first non-empty, scalar-like type in Next and setup the iterator
413 : /// components.
414 : ///
415 : /// Assuming Next is an aggregate of some kind, this function will traverse the
416 : /// tree from left to right (i.e. depth-first) looking for the first
417 : /// non-aggregate type which will play a role in function return.
418 : ///
419 : /// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
420 : /// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
421 : /// i32 in that type.
422 4248 : static bool firstRealType(Type *Next,
423 : SmallVectorImpl<CompositeType *> &SubTypes,
424 : SmallVectorImpl<unsigned> &Path) {
425 : // First initialise the iterator components to the first "leaf" node
426 : // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
427 : // despite nominally being an aggregate).
428 196 : while (Next->isAggregateType() &&
429 : indexReallyValid(cast<CompositeType>(Next), 0)) {
430 97 : SubTypes.push_back(cast<CompositeType>(Next));
431 97 : Path.push_back(0);
432 97 : Next = cast<CompositeType>(Next)->getTypeAtIndex(0U);
433 : }
434 :
435 : // If there's no Path now, Next was originally scalar already (or empty
436 : // leaf). We're done.
437 4248 : if (Path.empty())
438 : return true;
439 :
440 : // Otherwise, use normal iteration to keep looking through the tree until we
441 : // find a non-aggregate type.
442 192 : while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) {
443 4 : if (!advanceToNextLeafType(SubTypes, Path))
444 : return false;
445 : }
446 :
447 : return true;
448 : }
449 :
450 : /// Set the iterator data-structures to the next non-empty, non-aggregate
451 : /// subtype.
452 3116 : static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
453 : SmallVectorImpl<unsigned> &Path) {
454 : do {
455 3118 : if (!advanceToNextLeafType(SubTypes, Path))
456 : return false;
457 :
458 : assert(!Path.empty() && "found a leaf but didn't set the path?");
459 266 : } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType());
460 :
461 : return true;
462 : }
463 :
464 :
465 : /// Test if the given instruction is in a position to be optimized
466 : /// with a tail-call. This roughly means that it's in a block with
467 : /// a return and there's nothing that needs to be scheduled
468 : /// between it and the return.
469 : ///
470 : /// This function only tests target-independent requirements.
471 27100 : bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
472 : const Instruction *I = CS.getInstruction();
473 27100 : const BasicBlock *ExitBB = I->getParent();
474 27100 : const Instruction *Term = ExitBB->getTerminator();
475 : const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
476 :
477 : // The block must end in a return statement or unreachable.
478 : //
479 : // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
480 : // an unreachable, for now. The way tailcall optimization is currently
481 : // implemented means it will add an epilogue followed by a jump. That is
482 : // not profitable. Also, if the callee is a special function (e.g.
483 : // longjmp on x86), it can end up causing miscompilation that has not
484 : // been fully understood.
485 14843 : if (!Ret &&
486 14843 : (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
487 : return false;
488 :
489 : // If I will have a chain, make sure no other instruction that will have a
490 : // chain interposes between I and the return.
491 12398 : if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
492 140 : !isSafeToSpeculativelyExecute(I))
493 12258 : for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
494 26826 : if (&*BBI == I)
495 : break;
496 : // Debug info intrinsics do not get in the way of tail call optimization.
497 3889 : if (isa<DbgInfoIntrinsic>(BBI))
498 : continue;
499 4993 : if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
500 1173 : !isSafeToSpeculativelyExecute(&*BBI))
501 2734 : return false;
502 : }
503 :
504 9524 : const Function *F = ExitBB->getParent();
505 9524 : return returnTypeIsEligibleForTailCall(
506 19048 : F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
507 : }
508 :
509 3002 : bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
510 : const ReturnInst *Ret,
511 : const TargetLoweringBase &TLI,
512 : bool *AllowDifferingSizes) {
513 : // ADS may be null, so don't write to it directly.
514 : bool DummyADS;
515 3002 : bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
516 3002 : ADS = true;
517 :
518 3002 : AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
519 : AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
520 3002 : AttributeList::ReturnIndex);
521 :
522 : // NoAlias and NonNull are completely benign as far as calling convention
523 : // goes, they shouldn't affect whether the call is a tail call.
524 3002 : CallerAttrs.removeAttribute(Attribute::NoAlias);
525 3002 : CalleeAttrs.removeAttribute(Attribute::NoAlias);
526 3002 : CallerAttrs.removeAttribute(Attribute::NonNull);
527 3002 : CalleeAttrs.removeAttribute(Attribute::NonNull);
528 :
529 3002 : if (CallerAttrs.contains(Attribute::ZExt)) {
530 106 : if (!CalleeAttrs.contains(Attribute::ZExt))
531 : return false;
532 :
533 78 : ADS = false;
534 78 : CallerAttrs.removeAttribute(Attribute::ZExt);
535 78 : CalleeAttrs.removeAttribute(Attribute::ZExt);
536 2896 : } else if (CallerAttrs.contains(Attribute::SExt)) {
537 56 : if (!CalleeAttrs.contains(Attribute::SExt))
538 : return false;
539 :
540 49 : ADS = false;
541 49 : CallerAttrs.removeAttribute(Attribute::SExt);
542 49 : CalleeAttrs.removeAttribute(Attribute::SExt);
543 : }
544 :
545 : // If they're still different, there's some facet we don't understand
546 : // (currently only "inreg", but in future who knows). It may be OK but the
547 : // only safe option is to reject the tail call.
548 2967 : return CallerAttrs == CalleeAttrs;
549 : }
550 :
551 9524 : bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
552 : const Instruction *I,
553 : const ReturnInst *Ret,
554 : const TargetLoweringBase &TLI) {
555 : // If the block ends with a void return or unreachable, it doesn't matter
556 : // what the call's return type is.
557 9524 : if (!Ret || Ret->getNumOperands() == 0) return true;
558 :
559 : // If the return value is undef, it doesn't matter what the call's
560 : // return type is.
561 2226 : if (isa<UndefValue>(Ret->getOperand(0))) return true;
562 :
563 : // Make sure the attributes attached to each return are compatible.
564 : bool AllowDifferingSizes;
565 2197 : if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
566 : return false;
567 :
568 : const Value *RetVal = Ret->getOperand(0), *CallVal = I;
569 : // Intrinsic like llvm.memcpy has no return value, but the expanded
570 : // libcall may or may not have return value. On most platforms, it
571 : // will be expanded as memcpy in libc, which returns the first
572 : // argument. On other platforms like arm-none-eabi, memcpy may be
573 : // expanded as library call without return value, like __aeabi_memcpy.
574 : const CallInst *Call = cast<CallInst>(I);
575 : if (Function *F = Call->getCalledFunction()) {
576 1748 : Intrinsic::ID IID = F->getIntrinsicID();
577 : if (((IID == Intrinsic::memcpy &&
578 1733 : TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
579 : (IID == Intrinsic::memmove &&
580 1731 : TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
581 : (IID == Intrinsic::memset &&
582 1768 : TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
583 20 : RetVal == Call->getArgOperand(0))
584 : return true;
585 : }
586 :
587 : SmallVector<unsigned, 4> RetPath, CallPath;
588 : SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
589 :
590 2124 : bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
591 2124 : bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
592 :
593 : // Nothing's actually returned, it doesn't matter what the callee put there
594 : // it's a valid tail call.
595 2124 : if (RetEmpty)
596 : return true;
597 :
598 : // Iterate pairwise through each of the value types making up the tail call
599 : // and the corresponding return. For each one we want to know whether it's
600 : // essentially going directly from the tail call to the ret, via operations
601 : // that end up not generating any code.
602 : //
603 : // We allow a certain amount of covariance here. For example it's permitted
604 : // for the tail call to define more bits than the ret actually cares about
605 : // (e.g. via a truncate).
606 : do {
607 2188 : if (CallEmpty) {
608 : // We've exhausted the values produced by the tail call instruction, the
609 : // rest are essentially undef. The type doesn't really matter, but we need
610 : // *something*.
611 4 : Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
612 2 : CallVal = UndefValue::get(SlotType);
613 : }
614 :
615 : // The manipulations performed when we're looking through an insertvalue or
616 : // an extractvalue would happen at the front of the RetPath list, so since
617 : // we have to copy it anyway it's more efficient to create a reversed copy.
618 2188 : SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
619 2188 : SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
620 :
621 : // Finally, we can check whether the value produced by the tail call at this
622 : // index is compatible with the value we return.
623 2188 : if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
624 : AllowDifferingSizes, TLI,
625 : F->getParent()->getDataLayout()))
626 : return false;
627 :
628 1558 : CallEmpty = !nextRealType(CallSubTypes, CallPath);
629 1558 : } while(nextRealType(RetSubTypes, RetPath));
630 :
631 : return true;
632 : }
633 :
634 1632 : static void collectEHScopeMembers(
635 : DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
636 : const MachineBasicBlock *MBB) {
637 1632 : SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
638 5115 : while (!Worklist.empty()) {
639 : const MachineBasicBlock *Visiting = Worklist.pop_back_val();
640 : // Don't follow blocks which start new scopes.
641 3483 : if (Visiting->isEHPad() && Visiting != MBB)
642 1985 : continue;
643 :
644 : // Add this MBB to our scope.
645 2700 : auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));
646 :
647 : // Don't revisit blocks.
648 2700 : if (!P.second) {
649 : assert(P.first->second == EHScope && "MBB is part of two scopes!");
650 : continue;
651 : }
652 :
653 : // Returns are boundaries where scope transfer can occur, don't follow
654 : // successors.
655 1979 : if (Visiting->isEHScopeReturnBlock())
656 : continue;
657 :
658 3349 : for (const MachineBasicBlock *Succ : Visiting->successors())
659 1851 : Worklist.push_back(Succ);
660 : }
661 1632 : }
662 :
663 : DenseMap<const MachineBasicBlock *, int>
664 866614 : llvm::getEHScopeMembership(const MachineFunction &MF) {
665 : DenseMap<const MachineBasicBlock *, int> EHScopeMembership;
666 :
667 : // We don't have anything to do if there aren't any EH pads.
668 866615 : if (!MF.hasEHScopes())
669 : return EHScopeMembership;
670 :
671 400 : int EntryBBNumber = MF.front().getNumber();
672 400 : bool IsSEH = isAsynchronousEHPersonality(
673 400 : classifyEHPersonality(MF.getFunction().getPersonalityFn()));
674 :
675 400 : const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
676 : SmallVector<const MachineBasicBlock *, 16> EHScopeBlocks;
677 : SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
678 : SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
679 : SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
680 2632 : for (const MachineBasicBlock &MBB : MF) {
681 2232 : if (MBB.isEHScopeEntry()) {
682 601 : EHScopeBlocks.push_back(&MBB);
683 1631 : } else if (IsSEH && MBB.isEHPad()) {
684 118 : SEHCatchPads.push_back(&MBB);
685 1513 : } else if (MBB.pred_empty()) {
686 400 : UnreachableBlocks.push_back(&MBB);
687 : }
688 :
689 : MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
690 :
691 : // CatchPads are not scopes for SEH so do not consider CatchRet to
692 : // transfer control to another scope.
693 2232 : if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
694 : continue;
695 :
696 : // FIXME: SEH CatchPads are not necessarily in the parent function:
697 : // they could be inside a finally block.
698 323 : const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
699 323 : const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
700 323 : CatchRetSuccessors.push_back(
701 323 : {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
702 : }
703 :
704 : // We don't have anything to do if there aren't any EH pads.
705 400 : if (EHScopeBlocks.empty())
706 : return EHScopeMembership;
707 :
708 : // Identify all the basic blocks reachable from the function entry.
709 341 : collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front());
710 : // All blocks not part of a scope are in the parent function.
711 682 : for (const MachineBasicBlock *MBB : UnreachableBlocks)
712 341 : collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
713 : // Next, identify all the blocks inside the scopes.
714 942 : for (const MachineBasicBlock *MBB : EHScopeBlocks)
715 601 : collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB);
716 : // SEH CatchPads aren't really scopes, handle them separately.
717 367 : for (const MachineBasicBlock *MBB : SEHCatchPads)
718 26 : collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
719 : // Finally, identify all the targets of a catchret.
720 323 : for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
721 664 : CatchRetSuccessors)
722 323 : collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,
723 : CatchRetPair.first);
724 341 : return EHScopeMembership;
725 : }
|