LLVM 20.0.0git
RewriteStatepointsForGC.cpp
Go to the documentation of this file.
1//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Rewrite call/invoke instructions so as to make potential relocations
10// performed by the garbage collector explicit in the IR.
11//
12//===----------------------------------------------------------------------===//
13
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/StringRef.h"
30#include "llvm/IR/Argument.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/CallingConv.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DataLayout.h"
39#include "llvm/IR/Dominators.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/GCStrategy.h"
42#include "llvm/IR/IRBuilder.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/IR/LLVMContext.h"
50#include "llvm/IR/MDBuilder.h"
51#include "llvm/IR/Metadata.h"
52#include "llvm/IR/Module.h"
53#include "llvm/IR/Statepoint.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/User.h"
56#include "llvm/IR/Value.h"
57#include "llvm/IR/ValueHandle.h"
61#include "llvm/Support/Debug.h"
67#include <cassert>
68#include <cstddef>
69#include <cstdint>
70#include <iterator>
71#include <optional>
72#include <set>
73#include <string>
74#include <utility>
75#include <vector>
76
77#define DEBUG_TYPE "rewrite-statepoints-for-gc"
78
79using namespace llvm;
80
81// Print the liveset found at the insert location
82static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
83 cl::init(false));
84static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
85 cl::init(false));
86
87// Print out the base pointers for debugging
88static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
89 cl::init(false));
90
91// Cost threshold measuring when it is profitable to rematerialize value instead
92// of relocating it
94RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
95 cl::init(6));
96
97#ifdef EXPENSIVE_CHECKS
98static bool ClobberNonLive = true;
99#else
100static bool ClobberNonLive = false;
101#endif
102
103static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
105 cl::Hidden);
106
107static cl::opt<bool>
108 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
109 cl::Hidden, cl::init(true));
110
111static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
112 cl::Hidden, cl::init(true));
113
114/// The IR fed into RewriteStatepointsForGC may have had attributes and
115/// metadata implying dereferenceability that are no longer valid/correct after
116/// RewriteStatepointsForGC has run. This is because semantically, after
117/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
118/// heap. stripNonValidData (conservatively) restores
119/// correctness by erasing all attributes in the module that externally imply
120/// dereferenceability. Similar reasoning also applies to the noalias
121/// attributes and metadata. gc.statepoint can touch the entire heap including
122/// noalias objects.
123/// Apart from attributes and metadata, we also remove instructions that imply
124/// constant physical memory: llvm.invariant.start.
125static void stripNonValidData(Module &M);
126
127// Find the GC strategy for a function, or null if it doesn't have one.
128static std::unique_ptr<GCStrategy> findGCStrategy(Function &F);
129
131
134 bool Changed = false;
135 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
136 for (Function &F : M) {
137 // Nothing to do for declarations.
138 if (F.isDeclaration() || F.empty())
139 continue;
140
141 // Policy choice says not to rewrite - the most common reason is that we're
142 // compiling code without a GCStrategy.
144 continue;
145
148 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
149 Changed |= runOnFunction(F, DT, TTI, TLI);
150 }
151 if (!Changed)
152 return PreservedAnalyses::all();
153
154 // stripNonValidData asserts that shouldRewriteStatepointsIn
155 // returns true for at least one function in the module. Since at least
156 // one function changed, we know that the precondition is satisfied.
158
162 return PA;
163}
164
165namespace {
166
167struct GCPtrLivenessData {
168 /// Values defined in this block.
170
171 /// Values used in this block (and thus live); does not included values
172 /// killed within this block.
174
175 /// Values live into this basic block (i.e. used by any
176 /// instruction in this basic block or ones reachable from here)
178
179 /// Values live out of this basic block (i.e. live into
180 /// any successor block)
182};
183
184// The type of the internal cache used inside the findBasePointers family
185// of functions. From the callers perspective, this is an opaque type and
186// should not be inspected.
187//
188// In the actual implementation this caches two relations:
189// - The base relation itself (i.e. this pointer is based on that one)
190// - The base defining value relation (i.e. before base_phi insertion)
191// Generally, after the execution of a full findBasePointer call, only the
192// base relation will remain. Internally, we add a mixture of the two
193// types, then update all the second type to the first type
194using DefiningValueMapTy = MapVector<Value *, Value *>;
195using IsKnownBaseMapTy = MapVector<Value *, bool>;
196using PointerToBaseTy = MapVector<Value *, Value *>;
197using StatepointLiveSetTy = SetVector<Value *>;
198using RematerializedValueMapTy =
200
201struct PartiallyConstructedSafepointRecord {
202 /// The set of values known to be live across this safepoint
203 StatepointLiveSetTy LiveSet;
204
205 /// The *new* gc.statepoint instruction itself. This produces the token
206 /// that normal path gc.relocates and the gc.result are tied to.
207 GCStatepointInst *StatepointToken;
208
209 /// Instruction to which exceptional gc relocates are attached
210 /// Makes it easier to iterate through them during relocationViaAlloca.
211 Instruction *UnwindToken;
212
213 /// Record live values we are rematerialized instead of relocating.
214 /// They are not included into 'LiveSet' field.
215 /// Maps rematerialized copy to it's original value.
216 RematerializedValueMapTy RematerializedValues;
217};
218
219struct RematerizlizationCandidateRecord {
220 // Chain from derived pointer to base.
222 // Original base.
223 Value *RootOfChain;
224 // Cost of chain.
226};
228
229} // end anonymous namespace
230
232 std::optional<OperandBundleUse> DeoptBundle =
233 Call->getOperandBundle(LLVMContext::OB_deopt);
234
235 if (!DeoptBundle) {
237 "Found non-leaf call without deopt info!");
238 return {};
239 }
240
241 return DeoptBundle->Inputs;
242}
243
244/// Compute the live-in set for every basic block in the function
246 GCPtrLivenessData &Data, GCStrategy *GC);
247
248/// Given results from the dataflow liveness computation, find the set of live
249/// Values at a particular instruction.
250static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
251 StatepointLiveSetTy &out, GCStrategy *GC);
252
253static bool isGCPointerType(Type *T, GCStrategy *GC) {
254 assert(GC && "GC Strategy for isGCPointerType cannot be null");
255
256 if (!isa<PointerType>(T))
257 return false;
258
259 // conservative - same as StatepointLowering
260 return GC->isGCManagedPointer(T).value_or(true);
261}
262
263// Return true if this type is one which a) is a gc pointer or contains a GC
264// pointer and b) is of a type this code expects to encounter as a live value.
265// (The insertion code will assert that a type which matches (a) and not (b)
266// is not encountered.)
268 // We fully support gc pointers
269 if (isGCPointerType(T, GC))
270 return true;
271 // We partially support vectors of gc pointers. The code will assert if it
272 // can't handle something.
273 if (auto VT = dyn_cast<VectorType>(T))
274 if (isGCPointerType(VT->getElementType(), GC))
275 return true;
276 return false;
277}
278
279#ifndef NDEBUG
280/// Returns true if this type contains a gc pointer whether we know how to
281/// handle that type or not.
282static bool containsGCPtrType(Type *Ty, GCStrategy *GC) {
283 if (isGCPointerType(Ty, GC))
284 return true;
285 if (VectorType *VT = dyn_cast<VectorType>(Ty))
286 return isGCPointerType(VT->getScalarType(), GC);
287 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
288 return containsGCPtrType(AT->getElementType(), GC);
289 if (StructType *ST = dyn_cast<StructType>(Ty))
290 return llvm::any_of(ST->elements(),
291 [GC](Type *Ty) { return containsGCPtrType(Ty, GC); });
292 return false;
293}
294
295// Returns true if this is a type which a) is a gc pointer or contains a GC
296// pointer and b) is of a type which the code doesn't expect (i.e. first class
297// aggregates). Used to trip assertions.
299 return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC);
300}
301#endif
302
303// Return the name of the value suffixed with the provided value, or if the
304// value didn't have a name, the default value specified.
305static std::string suffixed_name_or(Value *V, StringRef Suffix,
306 StringRef DefaultName) {
307 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
308}
309
310// Conservatively identifies any definitions which might be live at the
311// given instruction. The analysis is performed immediately before the
312// given instruction. Values defined by that instruction are not considered
313// live. Values used by that instruction are considered live.
315 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
316 PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) {
317 StatepointLiveSetTy LiveSet;
318 findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC);
319
320 if (PrintLiveSet) {
321 dbgs() << "Live Variables:\n";
322 for (Value *V : LiveSet)
323 dbgs() << " " << V->getName() << " " << *V << "\n";
324 }
325 if (PrintLiveSetSize) {
326 dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n";
327 dbgs() << "Number live values: " << LiveSet.size() << "\n";
328 }
329 Result.LiveSet = LiveSet;
330}
331
332/// Returns true if V is a known base.
333static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
334
335/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
336/// in the cache before.
337static void setKnownBase(Value *V, bool IsKnownBase,
338 IsKnownBaseMapTy &KnownBases);
339
340static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
341 IsKnownBaseMapTy &KnownBases);
342
343/// Return a base defining value for the 'Index' element of the given vector
344/// instruction 'I'. If Index is null, returns a BDV for the entire vector
345/// 'I'. As an optimization, this method will try to determine when the
346/// element is known to already be a base pointer. If this can be established,
347/// the second value in the returned pair will be true. Note that either a
348/// vector or a pointer typed value can be returned. For the former, the
349/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
350/// If the later, the return pointer is a BDV (or possibly a base) for the
351/// particular element in 'I'.
352static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
353 IsKnownBaseMapTy &KnownBases) {
354 // Each case parallels findBaseDefiningValue below, see that code for
355 // detailed motivation.
356
357 auto Cached = Cache.find(I);
358 if (Cached != Cache.end())
359 return Cached->second;
360
361 if (isa<Argument>(I)) {
362 // An incoming argument to the function is a base pointer
363 Cache[I] = I;
364 setKnownBase(I, /* IsKnownBase */true, KnownBases);
365 return I;
366 }
367
368 if (isa<Constant>(I)) {
369 // Base of constant vector consists only of constant null pointers.
370 // For reasoning see similar case inside 'findBaseDefiningValue' function.
371 auto *CAZ = ConstantAggregateZero::get(I->getType());
372 Cache[I] = CAZ;
373 setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
374 return CAZ;
375 }
376
377 if (isa<LoadInst>(I)) {
378 Cache[I] = I;
379 setKnownBase(I, /* IsKnownBase */true, KnownBases);
380 return I;
381 }
382
383 if (isa<InsertElementInst>(I)) {
384 // We don't know whether this vector contains entirely base pointers or
385 // not. To be conservatively correct, we treat it as a BDV and will
386 // duplicate code as needed to construct a parallel vector of bases.
387 Cache[I] = I;
388 setKnownBase(I, /* IsKnownBase */false, KnownBases);
389 return I;
390 }
391
392 if (isa<ShuffleVectorInst>(I)) {
393 // We don't know whether this vector contains entirely base pointers or
394 // not. To be conservatively correct, we treat it as a BDV and will
395 // duplicate code as needed to construct a parallel vector of bases.
396 // TODO: There a number of local optimizations which could be applied here
397 // for particular sufflevector patterns.
398 Cache[I] = I;
399 setKnownBase(I, /* IsKnownBase */false, KnownBases);
400 return I;
401 }
402
403 // The behavior of getelementptr instructions is the same for vector and
404 // non-vector data types.
405 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
406 auto *BDV =
407 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
408 Cache[GEP] = BDV;
409 return BDV;
410 }
411
412 // The behavior of freeze instructions is the same for vector and
413 // non-vector data types.
414 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
415 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
416 Cache[Freeze] = BDV;
417 return BDV;
418 }
419
420 // If the pointer comes through a bitcast of a vector of pointers to
421 // a vector of another type of pointer, then look through the bitcast
422 if (auto *BC = dyn_cast<BitCastInst>(I)) {
423 auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
424 Cache[BC] = BDV;
425 return BDV;
426 }
427
428 // We assume that functions in the source language only return base
429 // pointers. This should probably be generalized via attributes to support
430 // both source language and internal functions.
431 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
432 Cache[I] = I;
433 setKnownBase(I, /* IsKnownBase */true, KnownBases);
434 return I;
435 }
436
437 // A PHI or Select is a base defining value. The outer findBasePointer
438 // algorithm is responsible for constructing a base value for this BDV.
439 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
440 "unknown vector instruction - no base found for vector element");
441 Cache[I] = I;
442 setKnownBase(I, /* IsKnownBase */false, KnownBases);
443 return I;
444}
445
446/// Helper function for findBasePointer - Will return a value which either a)
447/// defines the base pointer for the input, b) blocks the simple search
448/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
449/// from pointer to vector type or back.
450static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
451 IsKnownBaseMapTy &KnownBases) {
452 assert(I->getType()->isPtrOrPtrVectorTy() &&
453 "Illegal to ask for the base pointer of a non-pointer type");
454 auto Cached = Cache.find(I);
455 if (Cached != Cache.end())
456 return Cached->second;
457
458 if (I->getType()->isVectorTy())
459 return findBaseDefiningValueOfVector(I, Cache, KnownBases);
460
461 if (isa<Argument>(I)) {
462 // An incoming argument to the function is a base pointer
463 // We should have never reached here if this argument isn't an gc value
464 Cache[I] = I;
465 setKnownBase(I, /* IsKnownBase */true, KnownBases);
466 return I;
467 }
468
469 if (isa<Constant>(I)) {
470 // We assume that objects with a constant base (e.g. a global) can't move
471 // and don't need to be reported to the collector because they are always
472 // live. Besides global references, all kinds of constants (e.g. undef,
473 // constant expressions, null pointers) can be introduced by the inliner or
474 // the optimizer, especially on dynamically dead paths.
475 // Here we treat all of them as having single null base. By doing this we
476 // trying to avoid problems reporting various conflicts in a form of
477 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
478 // See constant.ll file for relevant test cases.
479
480 auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
481 Cache[I] = CPN;
482 setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
483 return CPN;
484 }
485
486 // inttoptrs in an integral address space are currently ill-defined. We
487 // treat them as defining base pointers here for consistency with the
488 // constant rule above and because we don't really have a better semantic
489 // to give them. Note that the optimizer is always free to insert undefined
490 // behavior on dynamically dead paths as well.
491 if (isa<IntToPtrInst>(I)) {
492 Cache[I] = I;
493 setKnownBase(I, /* IsKnownBase */true, KnownBases);
494 return I;
495 }
496
497 if (CastInst *CI = dyn_cast<CastInst>(I)) {
498 Value *Def = CI->stripPointerCasts();
499 // If stripping pointer casts changes the address space there is an
500 // addrspacecast in between.
501 assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
502 cast<PointerType>(CI->getType())->getAddressSpace() &&
503 "unsupported addrspacecast");
504 // If we find a cast instruction here, it means we've found a cast which is
505 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
506 // handle int->ptr conversion.
507 assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
508 auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
509 Cache[CI] = BDV;
510 return BDV;
511 }
512
513 if (isa<LoadInst>(I)) {
514 // The value loaded is an gc base itself
515 Cache[I] = I;
516 setKnownBase(I, /* IsKnownBase */true, KnownBases);
517 return I;
518 }
519
520 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
521 // The base of this GEP is the base
522 auto *BDV =
523 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
524 Cache[GEP] = BDV;
525 return BDV;
526 }
527
528 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
529 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
530 Cache[Freeze] = BDV;
531 return BDV;
532 }
533
534 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
535 switch (II->getIntrinsicID()) {
536 default:
537 // fall through to general call handling
538 break;
539 case Intrinsic::experimental_gc_statepoint:
540 llvm_unreachable("statepoints don't produce pointers");
541 case Intrinsic::experimental_gc_relocate:
542 // Rerunning safepoint insertion after safepoints are already
543 // inserted is not supported. It could probably be made to work,
544 // but why are you doing this? There's no good reason.
545 llvm_unreachable("repeat safepoint insertion is not supported");
546 case Intrinsic::gcroot:
547 // Currently, this mechanism hasn't been extended to work with gcroot.
548 // There's no reason it couldn't be, but I haven't thought about the
549 // implications much.
551 "interaction with the gcroot mechanism is not supported");
552 case Intrinsic::experimental_gc_get_pointer_base:
553 auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
554 Cache[II] = BDV;
555 return BDV;
556 }
557 }
558 // We assume that functions in the source language only return base
559 // pointers. This should probably be generalized via attributes to support
560 // both source language and internal functions.
561 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
562 Cache[I] = I;
563 setKnownBase(I, /* IsKnownBase */true, KnownBases);
564 return I;
565 }
566
567 // TODO: I have absolutely no idea how to implement this part yet. It's not
568 // necessarily hard, I just haven't really looked at it yet.
569 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
570
571 if (isa<AtomicCmpXchgInst>(I)) {
572 // A CAS is effectively a atomic store and load combined under a
573 // predicate. From the perspective of base pointers, we just treat it
574 // like a load.
575 Cache[I] = I;
576 setKnownBase(I, /* IsKnownBase */true, KnownBases);
577 return I;
578 }
579
580 if (isa<AtomicRMWInst>(I)) {
581 assert(cast<AtomicRMWInst>(I)->getOperation() == AtomicRMWInst::Xchg &&
582 "Only Xchg is allowed for pointer values");
583 // A RMW Xchg is a combined atomic load and store, so we can treat the
584 // loaded value as a base pointer.
585 Cache[I] = I;
586 setKnownBase(I, /* IsKnownBase */ true, KnownBases);
587 return I;
588 }
589
590 // The aggregate ops. Aggregates can either be in the heap or on the
591 // stack, but in either case, this is simply a field load. As a result,
592 // this is a defining definition of the base just like a load is.
593 if (isa<ExtractValueInst>(I)) {
594 Cache[I] = I;
595 setKnownBase(I, /* IsKnownBase */true, KnownBases);
596 return I;
597 }
598
599 // We should never see an insert vector since that would require we be
600 // tracing back a struct value not a pointer value.
601 assert(!isa<InsertValueInst>(I) &&
602 "Base pointer for a struct is meaningless");
603
604 // This value might have been generated by findBasePointer() called when
605 // substituting gc.get.pointer.base() intrinsic.
606 bool IsKnownBase =
607 isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
608 setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
609 Cache[I] = I;
610
611 // An extractelement produces a base result exactly when it's input does.
612 // We may need to insert a parallel instruction to extract the appropriate
613 // element out of the base vector corresponding to the input. Given this,
614 // it's analogous to the phi and select case even though it's not a merge.
615 if (isa<ExtractElementInst>(I))
616 // Note: There a lot of obvious peephole cases here. This are deliberately
617 // handled after the main base pointer inference algorithm to make writing
618 // test cases to exercise that code easier.
619 return I;
620
621 // The last two cases here don't return a base pointer. Instead, they
622 // return a value which dynamically selects from among several base
623 // derived pointers (each with it's own base potentially). It's the job of
624 // the caller to resolve these.
625 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
626 "missing instruction case in findBaseDefiningValue");
627 return I;
628}
629
630/// Returns the base defining value for this value.
631static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
632 IsKnownBaseMapTy &KnownBases) {
633 if (!Cache.contains(I)) {
634 auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
635 Cache[I] = BDV;
636 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
637 << Cache[I]->getName() << ", is known base = "
638 << KnownBases[I] << "\n");
639 }
640 assert(Cache[I] != nullptr);
641 assert(KnownBases.contains(Cache[I]) &&
642 "Cached value must be present in known bases map");
643 return Cache[I];
644}
645
646/// Return a base pointer for this value if known. Otherwise, return it's
647/// base defining value.
648static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
649 IsKnownBaseMapTy &KnownBases) {
650 Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
651 auto Found = Cache.find(Def);
652 if (Found != Cache.end()) {
653 // Either a base-of relation, or a self reference. Caller must check.
654 return Found->second;
655 }
656 // Only a BDV available
657 return Def;
658}
659
660#ifndef NDEBUG
661/// This value is a base pointer that is not generated by RS4GC, i.e. it already
662/// exists in the code.
664 // no recursion possible
665 return !isa<PHINode>(V) && !isa<SelectInst>(V) &&
666 !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
667 !isa<ShuffleVectorInst>(V);
668}
669#endif
670
671static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
672 auto It = KnownBases.find(V);
673 assert(It != KnownBases.end() && "Value not present in the map");
674 return It->second;
675}
676
677static void setKnownBase(Value *V, bool IsKnownBase,
678 IsKnownBaseMapTy &KnownBases) {
679#ifndef NDEBUG
680 auto It = KnownBases.find(V);
681 if (It != KnownBases.end())
682 assert(It->second == IsKnownBase && "Changing already present value");
683#endif
684 KnownBases[V] = IsKnownBase;
685}
686
687// Returns true if First and Second values are both scalar or both vector.
688static bool areBothVectorOrScalar(Value *First, Value *Second) {
689 return isa<VectorType>(First->getType()) ==
690 isa<VectorType>(Second->getType());
691}
692
693namespace {
694
695/// Models the state of a single base defining value in the findBasePointer
696/// algorithm for determining where a new instruction is needed to propagate
697/// the base of this BDV.
698class BDVState {
699public:
700 enum StatusTy {
701 // Starting state of lattice
702 Unknown,
703 // Some specific base value -- does *not* mean that instruction
704 // propagates the base of the object
705 // ex: gep %arg, 16 -> %arg is the base value
706 Base,
707 // Need to insert a node to represent a merge.
708 Conflict
709 };
710
711 BDVState() {
712 llvm_unreachable("missing state in map");
713 }
714
715 explicit BDVState(Value *OriginalValue)
716 : OriginalValue(OriginalValue) {}
717 explicit BDVState(Value *OriginalValue, StatusTy Status, Value *BaseValue = nullptr)
718 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) {
719 assert(Status != Base || BaseValue);
720 }
721
722 StatusTy getStatus() const { return Status; }
723 Value *getOriginalValue() const { return OriginalValue; }
724 Value *getBaseValue() const { return BaseValue; }
725
726 bool isBase() const { return getStatus() == Base; }
727 bool isUnknown() const { return getStatus() == Unknown; }
728 bool isConflict() const { return getStatus() == Conflict; }
729
730 // Values of type BDVState form a lattice, and this function implements the
731 // meet
732 // operation.
733 void meet(const BDVState &Other) {
734 auto markConflict = [&]() {
735 Status = BDVState::Conflict;
736 BaseValue = nullptr;
737 };
738 // Conflict is a final state.
739 if (isConflict())
740 return;
741 // if we are not known - just take other state.
742 if (isUnknown()) {
743 Status = Other.getStatus();
744 BaseValue = Other.getBaseValue();
745 return;
746 }
747 // We are base.
748 assert(isBase() && "Unknown state");
749 // If other is unknown - just keep our state.
750 if (Other.isUnknown())
751 return;
752 // If other is conflict - it is a final state.
753 if (Other.isConflict())
754 return markConflict();
755 // Other is base as well.
756 assert(Other.isBase() && "Unknown state");
757 // If bases are different - Conflict.
758 if (getBaseValue() != Other.getBaseValue())
759 return markConflict();
760 // We are identical, do nothing.
761 }
762
763 bool operator==(const BDVState &Other) const {
764 return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
765 Status == Other.Status;
766 }
767
768 bool operator!=(const BDVState &other) const { return !(*this == other); }
769
771 void dump() const {
772 print(dbgs());
773 dbgs() << '\n';
774 }
775
776 void print(raw_ostream &OS) const {
777 switch (getStatus()) {
778 case Unknown:
779 OS << "U";
780 break;
781 case Base:
782 OS << "B";
783 break;
784 case Conflict:
785 OS << "C";
786 break;
787 }
788 OS << " (base " << getBaseValue() << " - "
789 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
790 << " for " << OriginalValue->getName() << ":";
791 }
792
793private:
794 AssertingVH<Value> OriginalValue; // instruction this state corresponds to
795 StatusTy Status = Unknown;
796 AssertingVH<Value> BaseValue = nullptr; // Non-null only if Status == Base.
797};
798
799} // end anonymous namespace
800
801#ifndef NDEBUG
802static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
803 State.print(OS);
804 return OS;
805}
806#endif
807
808/// For a given value or instruction, figure out what base ptr its derived from.
809/// For gc objects, this is simply itself. On success, returns a value which is
810/// the base pointer. (This is reliable and can be used for relocation.) On
811/// failure, returns nullptr.
812static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
813 IsKnownBaseMapTy &KnownBases) {
814 Value *Def = findBaseOrBDV(I, Cache, KnownBases);
815
816 if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
817 return Def;
818
819 // Here's the rough algorithm:
820 // - For every SSA value, construct a mapping to either an actual base
821 // pointer or a PHI which obscures the base pointer.
822 // - Construct a mapping from PHI to unknown TOP state. Use an
823 // optimistic algorithm to propagate base pointer information. Lattice
824 // looks like:
825 // UNKNOWN
826 // b1 b2 b3 b4
827 // CONFLICT
828 // When algorithm terminates, all PHIs will either have a single concrete
829 // base or be in a conflict state.
830 // - For every conflict, insert a dummy PHI node without arguments. Add
831 // these to the base[Instruction] = BasePtr mapping. For every
832 // non-conflict, add the actual base.
833 // - For every conflict, add arguments for the base[a] of each input
834 // arguments.
835 //
836 // Note: A simpler form of this would be to add the conflict form of all
837 // PHIs without running the optimistic algorithm. This would be
838 // analogous to pessimistic data flow and would likely lead to an
839 // overall worse solution.
840
841#ifndef NDEBUG
842 auto isExpectedBDVType = [](Value *BDV) {
843 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
844 isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) ||
845 isa<ShuffleVectorInst>(BDV);
846 };
847#endif
848
849 // Once populated, will contain a mapping from each potentially non-base BDV
850 // to a lattice value (described above) which corresponds to that BDV.
851 // We use the order of insertion (DFS over the def/use graph) to provide a
852 // stable deterministic ordering for visiting DenseMaps (which are unordered)
853 // below. This is important for deterministic compilation.
855
856#ifndef NDEBUG
857 auto VerifyStates = [&]() {
858 for (auto &Entry : States) {
859 assert(Entry.first == Entry.second.getOriginalValue());
860 }
861 };
862#endif
863
864 auto visitBDVOperands = [](Value *BDV, std::function<void (Value*)> F) {
865 if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
866 for (Value *InVal : PN->incoming_values())
867 F(InVal);
868 } else if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
869 F(SI->getTrueValue());
870 F(SI->getFalseValue());
871 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
872 F(EE->getVectorOperand());
873 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)) {
874 F(IE->getOperand(0));
875 F(IE->getOperand(1));
876 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(BDV)) {
877 // For a canonical broadcast, ignore the undef argument
878 // (without this, we insert a parallel base shuffle for every broadcast)
879 F(SV->getOperand(0));
880 if (!SV->isZeroEltSplat())
881 F(SV->getOperand(1));
882 } else {
883 llvm_unreachable("unexpected BDV type");
884 }
885 };
886
887
888 // Recursively fill in all base defining values reachable from the initial
889 // one for which we don't already know a definite base value for
890 /* scope */ {
892 Worklist.push_back(Def);
893 States.insert({Def, BDVState(Def)});
894 while (!Worklist.empty()) {
895 Value *Current = Worklist.pop_back_val();
896 assert(!isOriginalBaseResult(Current) && "why did it get added?");
897
898 auto visitIncomingValue = [&](Value *InVal) {
899 Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
900 if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
901 // Known bases won't need new instructions introduced and can be
902 // ignored safely. However, this can only be done when InVal and Base
903 // are both scalar or both vector. Otherwise, we need to find a
904 // correct BDV for InVal, by creating an entry in the lattice
905 // (States).
906 return;
907 assert(isExpectedBDVType(Base) && "the only non-base values "
908 "we see should be base defining values");
909 if (States.insert(std::make_pair(Base, BDVState(Base))).second)
910 Worklist.push_back(Base);
911 };
912
913 visitBDVOperands(Current, visitIncomingValue);
914 }
915 }
916
917#ifndef NDEBUG
918 VerifyStates();
919 LLVM_DEBUG(dbgs() << "States after initialization:\n");
920 for (const auto &Pair : States) {
921 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
922 }
923#endif
924
925 // Iterate forward through the value graph pruning any node from the state
926 // list where all of the inputs are base pointers. The purpose of this is to
927 // reuse existing values when the derived pointer we were asked to materialize
928 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
929 // feeding it does.)
931 do {
932 ToRemove.clear();
933 for (auto Pair : States) {
934 Value *BDV = Pair.first;
935 auto canPruneInput = [&](Value *V) {
936 // If the input of the BDV is the BDV itself we can prune it. This is
937 // only possible if the BDV is a PHI node.
938 if (V->stripPointerCasts() == BDV)
939 return true;
940 Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
941 if (V->stripPointerCasts() != VBDV)
942 return false;
943 // The assumption is that anything not in the state list is
944 // propagates a base pointer.
945 return States.count(VBDV) == 0;
946 };
947
948 bool CanPrune = true;
949 visitBDVOperands(BDV, [&](Value *Op) {
950 CanPrune = CanPrune && canPruneInput(Op);
951 });
952 if (CanPrune)
953 ToRemove.push_back(BDV);
954 }
955 for (Value *V : ToRemove) {
956 States.erase(V);
957 // Cache the fact V is it's own base for later usage.
958 Cache[V] = V;
959 }
960 } while (!ToRemove.empty());
961
962 // Did we manage to prove that Def itself must be a base pointer?
963 if (!States.count(Def))
964 return Def;
965
966 // Return a phi state for a base defining value. We'll generate a new
967 // base state for known bases and expect to find a cached state otherwise.
968 auto GetStateForBDV = [&](Value *BaseValue, Value *Input) {
969 auto I = States.find(BaseValue);
970 if (I != States.end())
971 return I->second;
972 assert(areBothVectorOrScalar(BaseValue, Input));
973 return BDVState(BaseValue, BDVState::Base, BaseValue);
974 };
975
976 // Even though we have identified a concrete base (or a conflict) for all live
977 // pointers at this point, there are cases where the base is of an
978 // incompatible type compared to the original instruction. We conservatively
979 // mark those as conflicts to ensure that corresponding BDVs will be generated
980 // in the next steps.
981
982 // this is a rather explicit check for all cases where we should mark the
983 // state as a conflict to force the latter stages of the algorithm to emit
984 // the BDVs.
985 // TODO: in many cases the instructions emited for the conflicting states
986 // will be identical to the I itself (if the I's operate on their BDVs
987 // themselves). We should exploit this, but can't do it here since it would
988 // break the invariant about the BDVs not being known to be a base.
989 // TODO: the code also does not handle constants at all - the algorithm relies
990 // on all constants having the same BDV and therefore constant-only insns
991 // will never be in conflict, but this check is ignored here. If the
992 // constant conflicts will be to BDVs themselves, they will be identical
993 // instructions and will get optimized away (as in the above TODO)
994 auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
995 // II and EE mixes vector & scalar so is always a conflict
996 if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I))
997 return true;
998 // Shuffle vector is always a conflict as it creates new vector from
999 // existing ones.
1000 if (isa<ShuffleVectorInst>(I))
1001 return true;
1002 // Any instructions where the computed base type differs from the
1003 // instruction type. An example is where an extract instruction is used by a
1004 // select. Here the select's BDV is a vector (because of extract's BDV),
1005 // while the select itself is a scalar type. Note that the IE and EE
1006 // instruction check is not fully subsumed by the vector<->scalar check at
1007 // the end, this is due to the BDV algorithm being ignorant of BDV types at
1008 // this junction.
1009 if (!areBothVectorOrScalar(BaseValue, I))
1010 return true;
1011 return false;
1012 };
1013
1014 bool Progress = true;
1015 while (Progress) {
1016#ifndef NDEBUG
1017 const size_t OldSize = States.size();
1018#endif
1019 Progress = false;
1020 // We're only changing values in this loop, thus safe to keep iterators.
1021 // Since this is computing a fixed point, the order of visit does not
1022 // effect the result. TODO: We could use a worklist here and make this run
1023 // much faster.
1024 for (auto Pair : States) {
1025 Value *BDV = Pair.first;
1026 // Only values that do not have known bases or those that have differing
1027 // type (scalar versus vector) from a possible known base should be in the
1028 // lattice.
1029 assert((!isKnownBase(BDV, KnownBases) ||
1030 !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
1031 "why did it get added?");
1032
1033 BDVState NewState(BDV);
1034 visitBDVOperands(BDV, [&](Value *Op) {
1035 Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
1036 auto OpState = GetStateForBDV(BDV, Op);
1037 NewState.meet(OpState);
1038 });
1039
1040 // if the instruction has known base, but should in fact be marked as
1041 // conflict because of incompatible in/out types, we mark it as such
1042 // ensuring that it will propagate through the fixpoint iteration
1043 auto I = cast<Instruction>(BDV);
1044 auto BV = NewState.getBaseValue();
1045 if (BV && MarkConflict(I, BV))
1046 NewState = BDVState(I, BDVState::Conflict);
1047
1048 BDVState OldState = Pair.second;
1049 if (OldState != NewState) {
1050 Progress = true;
1051 States[BDV] = NewState;
1052 }
1053 }
1054
1055 assert(OldSize == States.size() &&
1056 "fixed point shouldn't be adding any new nodes to state");
1057 }
1058
1059#ifndef NDEBUG
1060 VerifyStates();
1061 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1062 for (const auto &Pair : States) {
1063 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
1064 }
1065
1066 // since we do the conflict marking as part of the fixpoint iteration this
1067 // loop only asserts that invariants are met
1068 for (auto Pair : States) {
1069 Instruction *I = cast<Instruction>(Pair.first);
1070 BDVState State = Pair.second;
1071 auto *BaseValue = State.getBaseValue();
1072 // Only values that do not have known bases or those that have differing
1073 // type (scalar versus vector) from a possible known base should be in the
1074 // lattice.
1075 assert(
1076 (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
1077 "why did it get added?");
1078 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1079 }
1080#endif
1081
1082 // Insert Phis for all conflicts
1083 // TODO: adjust naming patterns to avoid this order of iteration dependency
1084 for (auto Pair : States) {
1085 Instruction *I = cast<Instruction>(Pair.first);
1086 BDVState State = Pair.second;
1087 // Only values that do not have known bases or those that have differing
1088 // type (scalar versus vector) from a possible known base should be in the
1089 // lattice.
1090 assert((!isKnownBase(I, KnownBases) ||
1091 !areBothVectorOrScalar(I, State.getBaseValue())) &&
1092 "why did it get added?");
1093 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1094
1095 // Since we're joining a vector and scalar base, they can never be the
1096 // same. As a result, we should always see insert element having reached
1097 // the conflict state.
1098 assert(!isa<InsertElementInst>(I) || State.isConflict());
1099
1100 if (!State.isConflict())
1101 continue;
1102
1103 auto getMangledName = [](Instruction *I) -> std::string {
1104 if (isa<PHINode>(I)) {
1105 return suffixed_name_or(I, ".base", "base_phi");
1106 } else if (isa<SelectInst>(I)) {
1107 return suffixed_name_or(I, ".base", "base_select");
1108 } else if (isa<ExtractElementInst>(I)) {
1109 return suffixed_name_or(I, ".base", "base_ee");
1110 } else if (isa<InsertElementInst>(I)) {
1111 return suffixed_name_or(I, ".base", "base_ie");
1112 } else {
1113 return suffixed_name_or(I, ".base", "base_sv");
1114 }
1115 };
1116
1117 Instruction *BaseInst = I->clone();
1118 BaseInst->insertBefore(I);
1119 BaseInst->setName(getMangledName(I));
1120 // Add metadata marking this as a base value
1121 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1122 States[I] = BDVState(I, BDVState::Conflict, BaseInst);
1123 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1124 }
1125
1126#ifndef NDEBUG
1127 VerifyStates();
1128#endif
1129
1130 // Returns a instruction which produces the base pointer for a given
1131 // instruction. The instruction is assumed to be an input to one of the BDVs
1132 // seen in the inference algorithm above. As such, we must either already
1133 // know it's base defining value is a base, or have inserted a new
1134 // instruction to propagate the base of it's BDV and have entered that newly
1135 // introduced instruction into the state table. In either case, we are
1136 // assured to be able to determine an instruction which produces it's base
1137 // pointer.
1138 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
1139 Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
1140 Value *Base = nullptr;
1141 if (!States.count(BDV)) {
1142 assert(areBothVectorOrScalar(BDV, Input));
1143 Base = BDV;
1144 } else {
1145 // Either conflict or base.
1146 assert(States.count(BDV));
1147 Base = States[BDV].getBaseValue();
1148 }
1149 assert(Base && "Can't be null");
1150 // The cast is needed since base traversal may strip away bitcasts
1151 if (Base->getType() != Input->getType() && InsertPt)
1152 Base = new BitCastInst(Base, Input->getType(), "cast",
1153 InsertPt->getIterator());
1154 return Base;
1155 };
1156
1157 // Fixup all the inputs of the new PHIs. Visit order needs to be
1158 // deterministic and predictable because we're naming newly created
1159 // instructions.
1160 for (auto Pair : States) {
1161 Instruction *BDV = cast<Instruction>(Pair.first);
1162 BDVState State = Pair.second;
1163
1164 // Only values that do not have known bases or those that have differing
1165 // type (scalar versus vector) from a possible known base should be in the
1166 // lattice.
1167 assert((!isKnownBase(BDV, KnownBases) ||
1168 !areBothVectorOrScalar(BDV, State.getBaseValue())) &&
1169 "why did it get added?");
1170 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1171 if (!State.isConflict())
1172 continue;
1173
1174 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
1175 PHINode *PN = cast<PHINode>(BDV);
1176 const unsigned NumPHIValues = PN->getNumIncomingValues();
1177
1178 // The IR verifier requires phi nodes with multiple entries from the
1179 // same basic block to have the same incoming value for each of those
1180 // entries. Since we're inserting bitcasts in the loop, make sure we
1181 // do so at least once per incoming block.
1182 DenseMap<BasicBlock *, Value*> BlockToValue;
1183 for (unsigned i = 0; i < NumPHIValues; i++) {
1184 Value *InVal = PN->getIncomingValue(i);
1185 BasicBlock *InBB = PN->getIncomingBlock(i);
1186 if (!BlockToValue.count(InBB))
1187 BlockToValue[InBB] = getBaseForInput(InVal, InBB->getTerminator());
1188 else {
1189#ifndef NDEBUG
1190 Value *OldBase = BlockToValue[InBB];
1191 Value *Base = getBaseForInput(InVal, nullptr);
1192
1193 // We can't use `stripPointerCasts` instead of this function because
1194 // `stripPointerCasts` doesn't handle vectors of pointers.
1195 auto StripBitCasts = [](Value *V) -> Value * {
1196 while (auto *BC = dyn_cast<BitCastInst>(V))
1197 V = BC->getOperand(0);
1198 return V;
1199 };
1200 // In essence this assert states: the only way two values
1201 // incoming from the same basic block may be different is by
1202 // being different bitcasts of the same value. A cleanup
1203 // that remains TODO is changing findBaseOrBDV to return an
1204 // llvm::Value of the correct type (and still remain pure).
1205 // This will remove the need to add bitcasts.
1206 assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
1207 "findBaseOrBDV should be pure!");
1208#endif
1209 }
1210 Value *Base = BlockToValue[InBB];
1211 BasePHI->setIncomingValue(i, Base);
1212 }
1213 } else if (SelectInst *BaseSI =
1214 dyn_cast<SelectInst>(State.getBaseValue())) {
1215 SelectInst *SI = cast<SelectInst>(BDV);
1216
1217 // Find the instruction which produces the base for each input.
1218 // We may need to insert a bitcast.
1219 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
1220 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
1221 } else if (auto *BaseEE =
1222 dyn_cast<ExtractElementInst>(State.getBaseValue())) {
1223 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1224 // Find the instruction which produces the base for each input. We may
1225 // need to insert a bitcast.
1226 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1227 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
1228 auto *BdvIE = cast<InsertElementInst>(BDV);
1229 auto UpdateOperand = [&](int OperandIdx) {
1230 Value *InVal = BdvIE->getOperand(OperandIdx);
1231 Value *Base = getBaseForInput(InVal, BaseIE);
1232 BaseIE->setOperand(OperandIdx, Base);
1233 };
1234 UpdateOperand(0); // vector operand
1235 UpdateOperand(1); // scalar operand
1236 } else {
1237 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
1238 auto *BdvSV = cast<ShuffleVectorInst>(BDV);
1239 auto UpdateOperand = [&](int OperandIdx) {
1240 Value *InVal = BdvSV->getOperand(OperandIdx);
1241 Value *Base = getBaseForInput(InVal, BaseSV);
1242 BaseSV->setOperand(OperandIdx, Base);
1243 };
1244 UpdateOperand(0); // vector operand
1245 if (!BdvSV->isZeroEltSplat())
1246 UpdateOperand(1); // vector operand
1247 else {
1248 // Never read, so just use poison
1249 Value *InVal = BdvSV->getOperand(1);
1250 BaseSV->setOperand(1, PoisonValue::get(InVal->getType()));
1251 }
1252 }
1253 }
1254
1255#ifndef NDEBUG
1256 VerifyStates();
1257#endif
1258
1259 // get the data layout to compare the sizes of base/derived pointer values
1260 [[maybe_unused]] auto &DL =
1261 cast<llvm::Instruction>(Def)->getDataLayout();
1262 // Cache all of our results so we can cheaply reuse them
1263 // NOTE: This is actually two caches: one of the base defining value
1264 // relation and one of the base pointer relation! FIXME
1265 for (auto Pair : States) {
1266 auto *BDV = Pair.first;
1267 Value *Base = Pair.second.getBaseValue();
1268 assert(BDV && Base);
1269 // Whenever we have a derived ptr(s), their base
1270 // ptr(s) must be of the same size, not necessarily the same type
1271 assert(DL.getTypeAllocSize(BDV->getType()) ==
1272 DL.getTypeAllocSize(Base->getType()) &&
1273 "Derived and base values should have same size");
1274 // Only values that do not have known bases or those that have differing
1275 // type (scalar versus vector) from a possible known base should be in the
1276 // lattice.
1277 assert(
1278 (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
1279 "why did it get added?");
1280
1281 LLVM_DEBUG(
1282 dbgs() << "Updating base value cache"
1283 << " for: " << BDV->getName() << " from: "
1284 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
1285 << " to: " << Base->getName() << "\n");
1286
1287 Cache[BDV] = Base;
1288 }
1289 assert(Cache.count(Def));
1290 return Cache[Def];
1291}
1292
1293// For a set of live pointers (base and/or derived), identify the base
1294// pointer of the object which they are derived from. This routine will
1295// mutate the IR graph as needed to make the 'base' pointer live at the
1296// definition site of 'derived'. This ensures that any use of 'derived' can
1297// also use 'base'. This may involve the insertion of a number of
1298// additional PHI nodes.
1299//
1300// preconditions: live is a set of pointer type Values
1301//
1302// side effects: may insert PHI nodes into the existing CFG, will preserve
1303// CFG, will not remove or mutate any existing nodes
1304//
1305// post condition: PointerToBase contains one (derived, base) pair for every
1306// pointer in live. Note that derived can be equal to base if the original
1307// pointer was a base pointer.
1308static void findBasePointers(const StatepointLiveSetTy &live,
1309 PointerToBaseTy &PointerToBase, DominatorTree *DT,
1310 DefiningValueMapTy &DVCache,
1311 IsKnownBaseMapTy &KnownBases) {
1312 for (Value *ptr : live) {
1313 Value *base = findBasePointer(ptr, DVCache, KnownBases);
1314 assert(base && "failed to find base pointer");
1315 PointerToBase[ptr] = base;
1316 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1317 DT->dominates(cast<Instruction>(base)->getParent(),
1318 cast<Instruction>(ptr)->getParent())) &&
1319 "The base we found better dominate the derived pointer");
1320 }
1321}
1322
1323/// Find the required based pointers (and adjust the live set) for the given
1324/// parse point.
1325static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1326 CallBase *Call,
1327 PartiallyConstructedSafepointRecord &result,
1328 PointerToBaseTy &PointerToBase,
1329 IsKnownBaseMapTy &KnownBases) {
1330 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
1331 // We assume that all pointers passed to deopt are base pointers; as an
1332 // optimization, we can use this to avoid separately materializing the base
1333 // pointer graph. This is only relevant since we're very conservative about
1334 // generating new conflict nodes during base pointer insertion. If we were
1335 // smarter there, this would be irrelevant.
1336 if (auto Opt = Call->getOperandBundle(LLVMContext::OB_deopt))
1337 for (Value *V : Opt->Inputs) {
1338 if (!PotentiallyDerivedPointers.count(V))
1339 continue;
1340 PotentiallyDerivedPointers.remove(V);
1341 PointerToBase[V] = V;
1342 }
1343 findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
1344 KnownBases);
1345}
1346
1347/// Given an updated version of the dataflow liveness results, update the
1348/// liveset and base pointer maps for the call site CS.
1349static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1350 CallBase *Call,
1351 PartiallyConstructedSafepointRecord &result,
1352 PointerToBaseTy &PointerToBase,
1353 GCStrategy *GC);
1354
1358 PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1359 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1360 // again. The old values are still live and will help it stabilize quickly.
1361 GCPtrLivenessData RevisedLivenessData;
1362 computeLiveInValues(DT, F, RevisedLivenessData, GC);
1363 for (size_t i = 0; i < records.size(); i++) {
1364 struct PartiallyConstructedSafepointRecord &info = records[i];
1365 recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase,
1366 GC);
1367 }
1368}
1369
1370// Utility function which clones all instructions from "ChainToBase"
1371// and inserts them before "InsertBefore". Returns rematerialized value
1372// which should be used after statepoint.
1374 Instruction *InsertBefore,
1375 Value *RootOfChain,
1376 Value *AlternateLiveBase) {
1377 Instruction *LastClonedValue = nullptr;
1378 Instruction *LastValue = nullptr;
1379 // Walk backwards to visit top-most instructions first.
1380 for (Instruction *Instr :
1381 make_range(ChainToBase.rbegin(), ChainToBase.rend())) {
1382 // Only GEP's and casts are supported as we need to be careful to not
1383 // introduce any new uses of pointers not in the liveset.
1384 // Note that it's fine to introduce new uses of pointers which were
1385 // otherwise not used after this statepoint.
1386 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
1387
1388 Instruction *ClonedValue = Instr->clone();
1389 ClonedValue->insertBefore(InsertBefore);
1390 ClonedValue->setName(Instr->getName() + ".remat");
1391
1392 // If it is not first instruction in the chain then it uses previously
1393 // cloned value. We should update it to use cloned value.
1394 if (LastClonedValue) {
1395 assert(LastValue);
1396 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
1397#ifndef NDEBUG
1398 for (auto *OpValue : ClonedValue->operand_values()) {
1399 // Assert that cloned instruction does not use any instructions from
1400 // this chain other than LastClonedValue
1401 assert(!is_contained(ChainToBase, OpValue) &&
1402 "incorrect use in rematerialization chain");
1403 // Assert that the cloned instruction does not use the RootOfChain
1404 // or the AlternateLiveBase.
1405 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
1406 }
1407#endif
1408 } else {
1409 // For the first instruction, replace the use of unrelocated base i.e.
1410 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1411 // live set. They have been proved to be the same PHI nodes. Note
1412 // that the *only* use of the RootOfChain in the ChainToBase list is
1413 // the first Value in the list.
1414 if (RootOfChain != AlternateLiveBase)
1415 ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
1416 }
1417
1418 LastClonedValue = ClonedValue;
1419 LastValue = Instr;
1420 }
1421 assert(LastClonedValue);
1422 return LastClonedValue;
1423}
1424
1425// When inserting gc.relocate and gc.result calls, we need to ensure there are
1426// no uses of the original value / return value between the gc.statepoint and
1427// the gc.relocate / gc.result call. One case which can arise is a phi node
1428// starting one of the successor blocks. We also need to be able to insert the
1429// gc.relocates only on the path which goes through the statepoint. We might
1430// need to split an edge to make this possible.
1431static BasicBlock *
1433 DominatorTree &DT) {
1434 BasicBlock *Ret = BB;
1435 if (!BB->getUniquePredecessor())
1436 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1437
1438 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1439 // from it
1441 assert(!isa<PHINode>(Ret->begin()) &&
1442 "All PHI nodes should have been removed!");
1443
1444 // At this point, we can safely insert a gc.relocate or gc.result as the first
1445 // instruction in Ret if needed.
1446 return Ret;
1447}
1448
1449// List of all function attributes which must be stripped when lowering from
1450// abstract machine model to physical machine model. Essentially, these are
1451// all the effects a safepoint might have which we ignored in the abstract
1452// machine model for purposes of optimization. We have to strip these on
1453// both function declarations and call sites.
1455 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
1456
1457// Create new attribute set containing only attributes which can be transferred
1458// from the original call to the safepoint.
1459static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic,
1460 AttributeList StatepointAL) {
1461 AttributeList OrigAL = Call->getAttributes();
1462 if (OrigAL.isEmpty())
1463 return StatepointAL;
1464
1465 // Remove the readonly, readnone, and statepoint function attributes.
1466 LLVMContext &Ctx = Call->getContext();
1467 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
1468 for (auto Attr : FnAttrsToStrip)
1469 FnAttrs.removeAttribute(Attr);
1470
1471 for (Attribute A : OrigAL.getFnAttrs()) {
1473 FnAttrs.removeAttribute(A);
1474 }
1475
1476 StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs);
1477
1478 // The memory intrinsics do not have a 1:1 correspondence of the original
1479 // call arguments to the produced statepoint. Do not transfer the argument
1480 // attributes to avoid putting them on incorrect arguments.
1481 if (IsMemIntrinsic)
1482 return StatepointAL;
1483
1484 // Attach the argument attributes from the original call at the corresponding
1485 // arguments in the statepoint. Note that any argument attributes that are
1486 // invalid after lowering are stripped in stripNonValidDataFromBody.
1487 for (unsigned I : llvm::seq(Call->arg_size()))
1488 StatepointAL = StatepointAL.addParamAttributes(
1490 AttrBuilder(Ctx, OrigAL.getParamAttrs(I)));
1491
1492 // Return attributes are later attached to the gc.result intrinsic.
1493 return StatepointAL;
1494}
1495
1496/// Helper function to place all gc relocates necessary for the given
1497/// statepoint.
1498/// Inputs:
1499/// liveVariables - list of variables to be relocated.
1500/// basePtrs - base pointers.
1501/// statepointToken - statepoint instruction to which relocates should be
1502/// bound.
1503/// Builder - Llvm IR builder to be used to construct new calls.
1505 ArrayRef<Value *> BasePtrs,
1506 Instruction *StatepointToken,
1507 IRBuilder<> &Builder, GCStrategy *GC) {
1508 if (LiveVariables.empty())
1509 return;
1510
1511 auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1512 auto ValIt = llvm::find(LiveVec, Val);
1513 assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1514 size_t Index = std::distance(LiveVec.begin(), ValIt);
1515 assert(Index < LiveVec.size() && "Bug in std::find?");
1516 return Index;
1517 };
1518 Module *M = StatepointToken->getModule();
1519
1520 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1521 // element type is i8 addrspace(1)*). We originally generated unique
1522 // declarations for each pointer type, but this proved problematic because
1523 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1524 // towards a single unified pointer type anyways, we can just cast everything
1525 // to an i8* of the right address space. A bitcast is added later to convert
1526 // gc_relocate to the actual value's type.
1527 auto getGCRelocateDecl = [&](Type *Ty) {
1529 auto AS = Ty->getScalarType()->getPointerAddressSpace();
1530 Type *NewTy = PointerType::get(M->getContext(), AS);
1531 if (auto *VT = dyn_cast<VectorType>(Ty))
1532 NewTy = FixedVectorType::get(NewTy,
1533 cast<FixedVectorType>(VT)->getNumElements());
1535 M, Intrinsic::experimental_gc_relocate, {NewTy});
1536 };
1537
1538 // Lazily populated map from input types to the canonicalized form mentioned
1539 // in the comment above. This should probably be cached somewhere more
1540 // broadly.
1541 DenseMap<Type *, Function *> TypeToDeclMap;
1542
1543 for (unsigned i = 0; i < LiveVariables.size(); i++) {
1544 // Generate the gc.relocate call and save the result
1545 Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i]));
1546 Value *LiveIdx = Builder.getInt32(i);
1547
1548 Type *Ty = LiveVariables[i]->getType();
1549 if (!TypeToDeclMap.count(Ty))
1550 TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
1551 Function *GCRelocateDecl = TypeToDeclMap[Ty];
1552
1553 // only specify a debug name if we can give a useful one
1554 CallInst *Reloc = Builder.CreateCall(
1555 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1556 suffixed_name_or(LiveVariables[i], ".relocated", ""));
1557 // Trick CodeGen into thinking there are lots of free registers at this
1558 // fake call.
1560 }
1561}
1562
1563namespace {
1564
1565/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1566/// avoids having to worry about keeping around dangling pointers to Values.
1567class DeferredReplacement {
1570 bool IsDeoptimize = false;
1571
1572 DeferredReplacement() = default;
1573
1574public:
1575 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1576 assert(Old != New && Old && New &&
1577 "Cannot RAUW equal values or to / from null!");
1578
1579 DeferredReplacement D;
1580 D.Old = Old;
1581 D.New = New;
1582 return D;
1583 }
1584
1585 static DeferredReplacement createDelete(Instruction *ToErase) {
1586 DeferredReplacement D;
1587 D.Old = ToErase;
1588 return D;
1589 }
1590
1591 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1592#ifndef NDEBUG
1593 auto *F = cast<CallInst>(Old)->getCalledFunction();
1594 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1595 "Only way to construct a deoptimize deferred replacement");
1596#endif
1597 DeferredReplacement D;
1598 D.Old = Old;
1599 D.IsDeoptimize = true;
1600 return D;
1601 }
1602
1603 /// Does the task represented by this instance.
1604 void doReplacement() {
1605 Instruction *OldI = Old;
1606 Instruction *NewI = New;
1607
1608 assert(OldI != NewI && "Disallowed at construction?!");
1609 assert((!IsDeoptimize || !New) &&
1610 "Deoptimize intrinsics are not replaced!");
1611
1612 Old = nullptr;
1613 New = nullptr;
1614
1615 if (NewI)
1616 OldI->replaceAllUsesWith(NewI);
1617
1618 if (IsDeoptimize) {
1619 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1620 // not necessarily be followed by the matching return.
1621 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
1622 new UnreachableInst(RI->getContext(), RI->getIterator());
1623 RI->eraseFromParent();
1624 }
1625
1626 OldI->eraseFromParent();
1627 }
1628};
1629
1630} // end anonymous namespace
1631
1633 const char *DeoptLowering = "deopt-lowering";
1634 if (Call->hasFnAttr(DeoptLowering)) {
1635 // FIXME: Calls have a *really* confusing interface around attributes
1636 // with values.
1637 const AttributeList &CSAS = Call->getAttributes();
1638 if (CSAS.hasFnAttr(DeoptLowering))
1639 return CSAS.getFnAttr(DeoptLowering).getValueAsString();
1640 Function *F = Call->getCalledFunction();
1641 assert(F && F->hasFnAttribute(DeoptLowering));
1642 return F->getFnAttribute(DeoptLowering).getValueAsString();
1643 }
1644 return "live-through";
1645}
1646
1647static void
1649 const SmallVectorImpl<Value *> &BasePtrs,
1651 PartiallyConstructedSafepointRecord &Result,
1652 std::vector<DeferredReplacement> &Replacements,
1653 const PointerToBaseTy &PointerToBase,
1654 GCStrategy *GC) {
1655 assert(BasePtrs.size() == LiveVariables.size());
1656
1657 // Then go ahead and use the builder do actually do the inserts. We insert
1658 // immediately before the previous instruction under the assumption that all
1659 // arguments will be available here. We can't insert afterwards since we may
1660 // be replacing a terminator.
1661 IRBuilder<> Builder(Call);
1662
1665 uint32_t NumPatchBytes = 0;
1667
1668 SmallVector<Value *, 8> CallArgs(Call->args());
1669 std::optional<ArrayRef<Use>> DeoptArgs;
1670 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
1671 DeoptArgs = Bundle->Inputs;
1672 std::optional<ArrayRef<Use>> TransitionArgs;
1673 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
1674 TransitionArgs = Bundle->Inputs;
1675 // TODO: This flag no longer serves a purpose and can be removed later
1677 }
1678
1679 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1680 // with a return value, we lower then as never returning calls to
1681 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1682 bool IsDeoptimize = false;
1683 bool IsMemIntrinsic = false;
1684
1686 parseStatepointDirectivesFromAttrs(Call->getAttributes());
1687 if (SD.NumPatchBytes)
1688 NumPatchBytes = *SD.NumPatchBytes;
1689 if (SD.StatepointID)
1690 StatepointID = *SD.StatepointID;
1691
1692 // Pass through the requested lowering if any. The default is live-through.
1693 StringRef DeoptLowering = getDeoptLowering(Call);
1694 if (DeoptLowering == "live-in")
1696 else {
1697 assert(DeoptLowering == "live-through" && "Unsupported value!");
1698 }
1699
1700 FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
1701 if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
1702 auto IID = F->getIntrinsicID();
1703 if (IID == Intrinsic::experimental_deoptimize) {
1704 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1705 // __llvm_deoptimize symbol. We want to resolve this now, since the
1706 // verifier does not allow taking the address of an intrinsic function.
1707
1708 SmallVector<Type *, 8> DomainTy;
1709 for (Value *Arg : CallArgs)
1710 DomainTy.push_back(Arg->getType());
1711 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1712 /* isVarArg = */ false);
1713
1714 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1715 // calls to @llvm.experimental.deoptimize with different argument types in
1716 // the same module. This is fine -- we assume the frontend knew what it
1717 // was doing when generating this kind of IR.
1718 CallTarget = F->getParent()
1719 ->getOrInsertFunction("__llvm_deoptimize", FTy);
1720
1721 IsDeoptimize = true;
1722 } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
1723 IID == Intrinsic::memmove_element_unordered_atomic) {
1724 IsMemIntrinsic = true;
1725
1726 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1727 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1728 // Specifically, these calls should be lowered to the
1729 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1730 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1731 // verifier does not allow taking the address of an intrinsic function.
1732 //
1733 // Moreover we need to shuffle the arguments for the call in order to
1734 // accommodate GC. The underlying source and destination objects might be
1735 // relocated during copy operation should the GC occur. To relocate the
1736 // derived source and destination pointers the implementation of the
1737 // intrinsic should know the corresponding base pointers.
1738 //
1739 // To make the base pointers available pass them explicitly as arguments:
1740 // memcpy(dest_derived, source_derived, ...) =>
1741 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1742 auto &Context = Call->getContext();
1743 auto &DL = Call->getDataLayout();
1744 auto GetBaseAndOffset = [&](Value *Derived) {
1745 Value *Base = nullptr;
1746 // Optimizations in unreachable code might substitute the real pointer
1747 // with undef, poison or null-derived constant. Return null base for
1748 // them to be consistent with the handling in the main algorithm in
1749 // findBaseDefiningValue.
1750 if (isa<Constant>(Derived))
1751 Base =
1752 ConstantPointerNull::get(cast<PointerType>(Derived->getType()));
1753 else {
1754 assert(PointerToBase.count(Derived));
1755 Base = PointerToBase.find(Derived)->second;
1756 }
1757 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
1758 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
1759 Value *Base_int = Builder.CreatePtrToInt(
1760 Base, Type::getIntNTy(Context, IntPtrSize));
1761 Value *Derived_int = Builder.CreatePtrToInt(
1762 Derived, Type::getIntNTy(Context, IntPtrSize));
1763 return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
1764 };
1765
1766 auto *Dest = CallArgs[0];
1767 Value *DestBase, *DestOffset;
1768 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
1769
1770 auto *Source = CallArgs[1];
1771 Value *SourceBase, *SourceOffset;
1772 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
1773
1774 auto *LengthInBytes = CallArgs[2];
1775 auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
1776
1777 CallArgs.clear();
1778 CallArgs.push_back(DestBase);
1779 CallArgs.push_back(DestOffset);
1780 CallArgs.push_back(SourceBase);
1781 CallArgs.push_back(SourceOffset);
1782 CallArgs.push_back(LengthInBytes);
1783
1784 SmallVector<Type *, 8> DomainTy;
1785 for (Value *Arg : CallArgs)
1786 DomainTy.push_back(Arg->getType());
1787 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1788 /* isVarArg = */ false);
1789
1790 auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
1791 uint64_t ElementSize = ElementSizeCI->getZExtValue();
1792 if (IID == Intrinsic::memcpy_element_unordered_atomic) {
1793 switch (ElementSize) {
1794 case 1:
1795 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1796 case 2:
1797 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1798 case 4:
1799 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1800 case 8:
1801 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1802 case 16:
1803 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1804 default:
1805 llvm_unreachable("unexpected element size!");
1806 }
1807 }
1808 assert(IID == Intrinsic::memmove_element_unordered_atomic);
1809 switch (ElementSize) {
1810 case 1:
1811 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1812 case 2:
1813 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1814 case 4:
1815 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1816 case 8:
1817 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1818 case 16:
1819 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1820 default:
1821 llvm_unreachable("unexpected element size!");
1822 }
1823 };
1824
1825 CallTarget =
1826 F->getParent()
1827 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
1828 }
1829 }
1830
1831 // Create the statepoint given all the arguments
1832 GCStatepointInst *Token = nullptr;
1833 if (auto *CI = dyn_cast<CallInst>(Call)) {
1834 CallInst *SPCall = Builder.CreateGCStatepointCall(
1835 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1836 TransitionArgs, DeoptArgs, GCLive, "safepoint_token");
1837
1838 SPCall->setTailCallKind(CI->getTailCallKind());
1839 SPCall->setCallingConv(CI->getCallingConv());
1840
1841 // Set up function attrs directly on statepoint and return attrs later for
1842 // gc_result intrinsic.
1843 SPCall->setAttributes(
1844 legalizeCallAttributes(CI, IsMemIntrinsic, SPCall->getAttributes()));
1845
1846 Token = cast<GCStatepointInst>(SPCall);
1847
1848 // Put the following gc_result and gc_relocate calls immediately after the
1849 // the old call (which we're about to delete)
1850 assert(CI->getNextNode() && "Not a terminator, must have next!");
1851 Builder.SetInsertPoint(CI->getNextNode());
1852 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
1853 } else {
1854 auto *II = cast<InvokeInst>(Call);
1855
1856 // Insert the new invoke into the old block. We'll remove the old one in a
1857 // moment at which point this will become the new terminator for the
1858 // original block.
1859 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
1860 StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
1861 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1862 GCLive, "statepoint_token");
1863
1864 SPInvoke->setCallingConv(II->getCallingConv());
1865
1866 // Set up function attrs directly on statepoint and return attrs later for
1867 // gc_result intrinsic.
1868 SPInvoke->setAttributes(
1869 legalizeCallAttributes(II, IsMemIntrinsic, SPInvoke->getAttributes()));
1870
1871 Token = cast<GCStatepointInst>(SPInvoke);
1872
1873 // Generate gc relocates in exceptional path
1874 BasicBlock *UnwindBlock = II->getUnwindDest();
1875 assert(!isa<PHINode>(UnwindBlock->begin()) &&
1876 UnwindBlock->getUniquePredecessor() &&
1877 "can't safely insert in this block!");
1878
1879 Builder.SetInsertPoint(UnwindBlock, UnwindBlock->getFirstInsertionPt());
1880 Builder.SetCurrentDebugLocation(II->getDebugLoc());
1881
1882 // Attach exceptional gc relocates to the landingpad.
1883 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1884 Result.UnwindToken = ExceptionalToken;
1885
1886 CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC);
1887
1888 // Generate gc relocates and returns for normal block
1889 BasicBlock *NormalDest = II->getNormalDest();
1890 assert(!isa<PHINode>(NormalDest->begin()) &&
1891 NormalDest->getUniquePredecessor() &&
1892 "can't safely insert in this block!");
1893
1894 Builder.SetInsertPoint(NormalDest, NormalDest->getFirstInsertionPt());
1895
1896 // gc relocates will be generated later as if it were regular call
1897 // statepoint
1898 }
1899 assert(Token && "Should be set in one of the above branches!");
1900
1901 if (IsDeoptimize) {
1902 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1903 // transform the tail-call like structure to a call to a void function
1904 // followed by unreachable to get better codegen.
1905 Replacements.push_back(
1906 DeferredReplacement::createDeoptimizeReplacement(Call));
1907 } else {
1908 Token->setName("statepoint_token");
1909 if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
1910 StringRef Name = Call->hasName() ? Call->getName() : "";
1911 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
1912 GCResult->setAttributes(
1914 Call->getAttributes().getRetAttrs()));
1915
1916 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1917 // live set of some other safepoint, in which case that safepoint's
1918 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1919 // llvm::Instruction. Instead, we defer the replacement and deletion to
1920 // after the live sets have been made explicit in the IR, and we no longer
1921 // have raw pointers to worry about.
1922 Replacements.emplace_back(
1923 DeferredReplacement::createRAUW(Call, GCResult));
1924 } else {
1925 Replacements.emplace_back(DeferredReplacement::createDelete(Call));
1926 }
1927 }
1928
1929 Result.StatepointToken = Token;
1930
1931 // Second, create a gc.relocate for every live variable
1932 CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC);
1933}
1934
1935// Replace an existing gc.statepoint with a new one and a set of gc.relocates
1936// which make the relocations happening at this safepoint explicit.
1937//
1938// WARNING: Does not do any fixup to adjust users of the original live
1939// values. That's the callers responsibility.
1940static void
1942 PartiallyConstructedSafepointRecord &Result,
1943 std::vector<DeferredReplacement> &Replacements,
1944 const PointerToBaseTy &PointerToBase, GCStrategy *GC) {
1945 const auto &LiveSet = Result.LiveSet;
1946
1947 // Convert to vector for efficient cross referencing.
1948 SmallVector<Value *, 64> BaseVec, LiveVec;
1949 LiveVec.reserve(LiveSet.size());
1950 BaseVec.reserve(LiveSet.size());
1951 for (Value *L : LiveSet) {
1952 LiveVec.push_back(L);
1953 assert(PointerToBase.count(L));
1954 Value *Base = PointerToBase.find(L)->second;
1955 BaseVec.push_back(Base);
1956 }
1957 assert(LiveVec.size() == BaseVec.size());
1958
1959 // Do the actual rewriting and delete the old statepoint
1960 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
1961 PointerToBase, GC);
1962}
1963
1964// Helper function for the relocationViaAlloca.
1965//
1966// It receives iterator to the statepoint gc relocates and emits a store to the
1967// assigned location (via allocaMap) for the each one of them. It adds the
1968// visited values into the visitedLiveValues set, which we will later use them
1969// for validation checking.
1970static void
1973 DenseSet<Value *> &VisitedLiveValues) {
1974 for (User *U : GCRelocs) {
1975 GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
1976 if (!Relocate)
1977 continue;
1978
1979 Value *OriginalValue = Relocate->getDerivedPtr();
1980 assert(AllocaMap.count(OriginalValue));
1981 Value *Alloca = AllocaMap[OriginalValue];
1982
1983 // Emit store into the related alloca.
1984 assert(Relocate->getNextNode() &&
1985 "Should always have one since it's not a terminator");
1986 new StoreInst(Relocate, Alloca, std::next(Relocate->getIterator()));
1987
1988#ifndef NDEBUG
1989 VisitedLiveValues.insert(OriginalValue);
1990#endif
1991 }
1992}
1993
1994// Helper function for the "relocationViaAlloca". Similar to the
1995// "insertRelocationStores" but works for rematerialized values.
1997 const RematerializedValueMapTy &RematerializedValues,
1999 DenseSet<Value *> &VisitedLiveValues) {
2000 for (auto RematerializedValuePair: RematerializedValues) {
2001 Instruction *RematerializedValue = RematerializedValuePair.first;
2002 Value *OriginalValue = RematerializedValuePair.second;
2003
2004 assert(AllocaMap.count(OriginalValue) &&
2005 "Can not find alloca for rematerialized value");
2006 Value *Alloca = AllocaMap[OriginalValue];
2007
2008 new StoreInst(RematerializedValue, Alloca,
2009 std::next(RematerializedValue->getIterator()));
2010
2011#ifndef NDEBUG
2012 VisitedLiveValues.insert(OriginalValue);
2013#endif
2014 }
2015}
2016
2017/// Do all the relocation update via allocas and mem2reg
2021#ifndef NDEBUG
2022 // record initial number of (static) allocas; we'll check we have the same
2023 // number when we get done.
2024 int InitialAllocaNum = 0;
2025 for (Instruction &I : F.getEntryBlock())
2026 if (isa<AllocaInst>(I))
2027 InitialAllocaNum++;
2028#endif
2029
2030 // TODO-PERF: change data structures, reserve
2032 SmallVector<AllocaInst *, 200> PromotableAllocas;
2033 // Used later to chack that we have enough allocas to store all values
2034 std::size_t NumRematerializedValues = 0;
2035 PromotableAllocas.reserve(Live.size());
2036
2037 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2038 // "PromotableAllocas"
2039 const DataLayout &DL = F.getDataLayout();
2040 auto emitAllocaFor = [&](Value *LiveValue) {
2041 AllocaInst *Alloca =
2042 new AllocaInst(LiveValue->getType(), DL.getAllocaAddrSpace(), "",
2043 F.getEntryBlock().getFirstNonPHIIt());
2044 AllocaMap[LiveValue] = Alloca;
2045 PromotableAllocas.push_back(Alloca);
2046 };
2047
2048 // Emit alloca for each live gc pointer
2049 for (Value *V : Live)
2050 emitAllocaFor(V);
2051
2052 // Emit allocas for rematerialized values
2053 for (const auto &Info : Records)
2054 for (auto RematerializedValuePair : Info.RematerializedValues) {
2055 Value *OriginalValue = RematerializedValuePair.second;
2056 if (AllocaMap.contains(OriginalValue))
2057 continue;
2058
2059 emitAllocaFor(OriginalValue);
2060 ++NumRematerializedValues;
2061 }
2062
2063 // The next two loops are part of the same conceptual operation. We need to
2064 // insert a store to the alloca after the original def and at each
2065 // redefinition. We need to insert a load before each use. These are split
2066 // into distinct loops for performance reasons.
2067
2068 // Update gc pointer after each statepoint: either store a relocated value or
2069 // null (if no relocated value was found for this gc pointer and it is not a
2070 // gc_result). This must happen before we update the statepoint with load of
2071 // alloca otherwise we lose the link between statepoint and old def.
2072 for (const auto &Info : Records) {
2073 Value *Statepoint = Info.StatepointToken;
2074
2075 // This will be used for consistency check
2076 DenseSet<Value *> VisitedLiveValues;
2077
2078 // Insert stores for normal statepoint gc relocates
2079 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
2080
2081 // In case if it was invoke statepoint
2082 // we will insert stores for exceptional path gc relocates.
2083 if (isa<InvokeInst>(Statepoint)) {
2084 insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
2085 VisitedLiveValues);
2086 }
2087
2088 // Do similar thing with rematerialized values
2089 insertRematerializationStores(Info.RematerializedValues, AllocaMap,
2090 VisitedLiveValues);
2091
2092 if (ClobberNonLive) {
2093 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2094 // the gc.statepoint. This will turn some subtle GC problems into
2095 // slightly easier to debug SEGVs. Note that on large IR files with
2096 // lots of gc.statepoints this is extremely costly both memory and time
2097 // wise.
2099 for (auto Pair : AllocaMap) {
2100 Value *Def = Pair.first;
2101 AllocaInst *Alloca = Pair.second;
2102
2103 // This value was relocated
2104 if (VisitedLiveValues.count(Def)) {
2105 continue;
2106 }
2107 ToClobber.push_back(Alloca);
2108 }
2109
2110 auto InsertClobbersAt = [&](BasicBlock::iterator IP) {
2111 for (auto *AI : ToClobber) {
2112 auto AT = AI->getAllocatedType();
2113 Constant *CPN;
2114 if (AT->isVectorTy())
2116 else
2117 CPN = ConstantPointerNull::get(cast<PointerType>(AT));
2118 new StoreInst(CPN, AI, IP);
2119 }
2120 };
2121
2122 // Insert the clobbering stores. These may get intermixed with the
2123 // gc.results and gc.relocates, but that's fine.
2124 if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
2125 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
2126 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
2127 } else {
2128 InsertClobbersAt(
2129 std::next(cast<Instruction>(Statepoint)->getIterator()));
2130 }
2131 }
2132 }
2133
2134 // Update use with load allocas and add store for gc_relocated.
2135 for (auto Pair : AllocaMap) {
2136 Value *Def = Pair.first;
2137 AllocaInst *Alloca = Pair.second;
2138
2139 // We pre-record the uses of allocas so that we dont have to worry about
2140 // later update that changes the user information..
2141
2143 // PERF: trade a linear scan for repeated reallocation
2144 Uses.reserve(Def->getNumUses());
2145 for (User *U : Def->users()) {
2146 if (!isa<ConstantExpr>(U)) {
2147 // If the def has a ConstantExpr use, then the def is either a
2148 // ConstantExpr use itself or null. In either case
2149 // (recursively in the first, directly in the second), the oop
2150 // it is ultimately dependent on is null and this particular
2151 // use does not need to be fixed up.
2152 Uses.push_back(cast<Instruction>(U));
2153 }
2154 }
2155
2157 auto Last = llvm::unique(Uses);
2158 Uses.erase(Last, Uses.end());
2159
2160 for (Instruction *Use : Uses) {
2161 if (isa<PHINode>(Use)) {
2162 PHINode *Phi = cast<PHINode>(Use);
2163 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
2164 if (Def == Phi->getIncomingValue(i)) {
2165 LoadInst *Load = new LoadInst(
2166 Alloca->getAllocatedType(), Alloca, "",
2167 Phi->getIncomingBlock(i)->getTerminator()->getIterator());
2168 Phi->setIncomingValue(i, Load);
2169 }
2170 }
2171 } else {
2172 LoadInst *Load = new LoadInst(Alloca->getAllocatedType(), Alloca, "",
2173 Use->getIterator());
2174 Use->replaceUsesOfWith(Def, Load);
2175 }
2176 }
2177
2178 // Emit store for the initial gc value. Store must be inserted after load,
2179 // otherwise store will be in alloca's use list and an extra load will be
2180 // inserted before it.
2181 StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false,
2182 DL.getABITypeAlign(Def->getType()));
2183 if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
2184 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
2185 // InvokeInst is a terminator so the store need to be inserted into its
2186 // normal destination block.
2187 BasicBlock *NormalDest = Invoke->getNormalDest();
2188 Store->insertBefore(NormalDest->getFirstNonPHI());
2189 } else {
2190 assert(!Inst->isTerminator() &&
2191 "The only terminator that can produce a value is "
2192 "InvokeInst which is handled above.");
2193 Store->insertAfter(Inst);
2194 }
2195 } else {
2196 assert(isa<Argument>(Def));
2197 Store->insertAfter(cast<Instruction>(Alloca));
2198 }
2199 }
2200
2201 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
2202 "we must have the same allocas with lives");
2203 (void) NumRematerializedValues;
2204 if (!PromotableAllocas.empty()) {
2205 // Apply mem2reg to promote alloca to SSA
2206 PromoteMemToReg(PromotableAllocas, DT);
2207 }
2208
2209#ifndef NDEBUG
2210 for (auto &I : F.getEntryBlock())
2211 if (isa<AllocaInst>(I))
2212 InitialAllocaNum--;
2213 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
2214#endif
2215}
2216
2217/// Implement a unique function which doesn't require we sort the input
2218/// vector. Doing so has the effect of changing the output of a couple of
2219/// tests in ways which make them less useful in testing fused safepoints.
2220template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
2221 SmallSet<T, 8> Seen;
2222 erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
2223}
2224
2225/// Insert holders so that each Value is obviously live through the entire
2226/// lifetime of the call.
2227static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
2228 SmallVectorImpl<CallInst *> &Holders) {
2229 if (Values.empty())
2230 // No values to hold live, might as well not insert the empty holder
2231 return;
2232
2233 Module *M = Call->getModule();
2234 // Use a dummy vararg function to actually hold the values live
2235 FunctionCallee Func = M->getOrInsertFunction(
2236 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
2237 if (isa<CallInst>(Call)) {
2238 // For call safepoints insert dummy calls right after safepoint
2239 Holders.push_back(
2240 CallInst::Create(Func, Values, "", std::next(Call->getIterator())));
2241 return;
2242 }
2243 // For invoke safepooints insert dummy calls both in normal and
2244 // exceptional destination blocks
2245 auto *II = cast<InvokeInst>(Call);
2247 Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
2249 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
2250}
2251
2255 GCStrategy *GC) {
2256 GCPtrLivenessData OriginalLivenessData;
2257 computeLiveInValues(DT, F, OriginalLivenessData, GC);
2258 for (size_t i = 0; i < records.size(); i++) {
2259 struct PartiallyConstructedSafepointRecord &info = records[i];
2260 analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC);
2261 }
2262}
2263
2264// Helper function for the "rematerializeLiveValues". It walks use chain
2265// starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2266// the base or a value it cannot process. Only "simple" values are processed
2267// (currently it is GEP's and casts). The returned root is examined by the
2268// callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2269// with all visited values.
2271 SmallVectorImpl<Instruction*> &ChainToBase,
2272 Value *CurrentValue) {
2273 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2274 ChainToBase.push_back(GEP);
2275 return findRematerializableChainToBasePointer(ChainToBase,
2276 GEP->getPointerOperand());
2277 }
2278
2279 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2280 if (!CI->isNoopCast(CI->getDataLayout()))
2281 return CI;
2282
2283 ChainToBase.push_back(CI);
2284 return findRematerializableChainToBasePointer(ChainToBase,
2285 CI->getOperand(0));
2286 }
2287
2288 // We have reached the root of the chain, which is either equal to the base or
2289 // is the first unsupported value along the use chain.
2290 return CurrentValue;
2291}
2292
2293// Helper function for the "rematerializeLiveValues". Compute cost of the use
2294// chain we are going to rematerialize.
2295static InstructionCost
2299
2300 for (Instruction *Instr : Chain) {
2301 if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2302 assert(CI->isNoopCast(CI->getDataLayout()) &&
2303 "non noop cast is found during rematerialization");
2304
2305 Type *SrcTy = CI->getOperand(0)->getType();
2306 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
2309
2310 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2311 // Cost of the address calculation
2312 Type *ValTy = GEP->getSourceElementType();
2314
2315 // And cost of the GEP itself
2316 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2317 // allowed for the external usage)
2318 if (!GEP->hasAllConstantIndices())
2319 Cost += 2;
2320
2321 } else {
2322 llvm_unreachable("unsupported instruction type during rematerialization");
2323 }
2324 }
2325
2326 return Cost;
2327}
2328
2329static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
2330 unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
2331 if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
2332 OrigRootPhi.getParent() != AlternateRootPhi.getParent())
2333 return false;
2334 // Map of incoming values and their corresponding basic blocks of
2335 // OrigRootPhi.
2336 SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
2337 for (unsigned i = 0; i < PhiNum; i++)
2338 CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
2339 OrigRootPhi.getIncomingBlock(i);
2340
2341 // Both current and base PHIs should have same incoming values and
2342 // the same basic blocks corresponding to the incoming values.
2343 for (unsigned i = 0; i < PhiNum; i++) {
2344 auto CIVI =
2345 CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
2346 if (CIVI == CurrentIncomingValues.end())
2347 return false;
2348 BasicBlock *CurrentIncomingBB = CIVI->second;
2349 if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
2350 return false;
2351 }
2352 return true;
2353}
2354
2355// Find derived pointers that can be recomputed cheap enough and fill
2356// RematerizationCandidates with such candidates.
2357static void
2358findRematerializationCandidates(PointerToBaseTy PointerToBase,
2359 RematCandTy &RematerizationCandidates,
2361 const unsigned int ChainLengthThreshold = 10;
2362
2363 for (auto P2B : PointerToBase) {
2364 auto *Derived = P2B.first;
2365 auto *Base = P2B.second;
2366 // Consider only derived pointers.
2367 if (Derived == Base)
2368 continue;
2369
2370 // For each live pointer find its defining chain.
2372 Value *RootOfChain =
2373 findRematerializableChainToBasePointer(ChainToBase, Derived);
2374
2375 // Nothing to do, or chain is too long
2376 if ( ChainToBase.size() == 0 ||
2377 ChainToBase.size() > ChainLengthThreshold)
2378 continue;
2379
2380 // Handle the scenario where the RootOfChain is not equal to the
2381 // Base Value, but they are essentially the same phi values.
2382 if (RootOfChain != PointerToBase[Derived]) {
2383 PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
2384 PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
2385 if (!OrigRootPhi || !AlternateRootPhi)
2386 continue;
2387 // PHI nodes that have the same incoming values, and belonging to the same
2388 // basic blocks are essentially the same SSA value. When the original phi
2389 // has incoming values with different base pointers, the original phi is
2390 // marked as conflict, and an additional `AlternateRootPhi` with the same
2391 // incoming values get generated by the findBasePointer function. We need
2392 // to identify the newly generated AlternateRootPhi (.base version of phi)
2393 // and RootOfChain (the original phi node itself) are the same, so that we
2394 // can rematerialize the gep and casts. This is a workaround for the
2395 // deficiency in the findBasePointer algorithm.
2396 if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
2397 continue;
2398 }
2399 // Compute cost of this chain.
2401 // TODO: We can also account for cases when we will be able to remove some
2402 // of the rematerialized values by later optimization passes. I.e if
2403 // we rematerialized several intersecting chains. Or if original values
2404 // don't have any uses besides this statepoint.
2405
2406 // Ok, there is a candidate.
2407 RematerizlizationCandidateRecord Record;
2408 Record.ChainToBase = ChainToBase;
2409 Record.RootOfChain = RootOfChain;
2410 Record.Cost = Cost;
2411 RematerizationCandidates.insert({ Derived, Record });
2412 }
2413}
2414
2415// Try to rematerialize derived pointers immediately before their uses
2416// (instead of rematerializing after every statepoint it is live through).
2417// This can be beneficial when derived pointer is live across many
2418// statepoints, but uses are rare.
2420 RematCandTy &RematerizationCandidates,
2422 PointerToBaseTy &PointerToBase) {
2423 if (!RematDerivedAtUses)
2424 return;
2425
2426 SmallVector<Instruction *, 32> LiveValuesToBeDeleted;
2427
2428 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2429 << "Num statepoints: " << Records.size() << '\n');
2430
2431 for (auto &It : RematerizationCandidates) {
2432 Instruction *Cand = cast<Instruction>(It.first);
2433 auto &Record = It.second;
2434
2436 continue;
2437
2438 if (Cand->user_empty())
2439 continue;
2440
2441 if (Cand->hasOneUse())
2442 if (auto *U = dyn_cast<Instruction>(Cand->getUniqueUndroppableUser()))
2443 if (U->getParent() == Cand->getParent())
2444 continue;
2445
2446 // Rematerialization before PHI nodes is not implemented.
2447 if (llvm::any_of(Cand->users(),
2448 [](const auto *U) { return isa<PHINode>(U); }))
2449 continue;
2450
2451 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand << " ... ");
2452
2453 // Count of rematerialization instructions we introduce is equal to number
2454 // of candidate uses.
2455 // Count of rematerialization instructions we eliminate is equal to number
2456 // of statepoints it is live through.
2457 // Consider transformation profitable if latter is greater than former
2458 // (in other words, we create less than eliminate).
2459 unsigned NumLiveStatepoints = llvm::count_if(
2460 Records, [Cand](const auto &R) { return R.LiveSet.contains(Cand); });
2461 unsigned NumUses = Cand->getNumUses();
2462
2463 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses << " Num live statepoints: "
2464 << NumLiveStatepoints << " ");
2465
2466 if (NumLiveStatepoints < NumUses) {
2467 LLVM_DEBUG(dbgs() << "not profitable\n");
2468 continue;
2469 }
2470
2471 // If rematerialization is 'free', then favor rematerialization at
2472 // uses as it generally shortens live ranges.
2473 // TODO: Short (size ==1) chains only?
2474 if (NumLiveStatepoints == NumUses && Record.Cost > 0) {
2475 LLVM_DEBUG(dbgs() << "not profitable\n");
2476 continue;
2477 }
2478
2479 LLVM_DEBUG(dbgs() << "looks profitable\n");
2480
2481 // ChainToBase may contain another remat candidate (as a sub chain) which
2482 // has been rewritten by now. Need to recollect chain to have up to date
2483 // value.
2484 // TODO: sort records in findRematerializationCandidates() in
2485 // decreasing chain size order?
2486 if (Record.ChainToBase.size() > 1) {
2487 Record.ChainToBase.clear();
2489 }
2490
2491 // Current rematerialization algorithm is very simple: we rematerialize
2492 // immediately before EVERY use, even if there are several uses in same
2493 // block or if use is local to Cand Def. The reason is that this allows
2494 // us to avoid recomputing liveness without complicated analysis:
2495 // - If we did not eliminate all uses of original Candidate, we do not
2496 // know exaclty in what BBs it is still live.
2497 // - If we rematerialize once per BB, we need to find proper insertion
2498 // place (first use in block, but after Def) and analyze if there is
2499 // statepoint between uses in the block.
2500 while (!Cand->user_empty()) {
2501 Instruction *UserI = cast<Instruction>(*Cand->user_begin());
2502 Instruction *RematChain = rematerializeChain(
2503 Record.ChainToBase, UserI, Record.RootOfChain, PointerToBase[Cand]);
2504 UserI->replaceUsesOfWith(Cand, RematChain);
2505 PointerToBase[RematChain] = PointerToBase[Cand];
2506 }
2507 LiveValuesToBeDeleted.push_back(Cand);
2508 }
2509
2510 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted.size()
2511 << " derived pointers\n");
2512 for (auto *Cand : LiveValuesToBeDeleted) {
2513 assert(Cand->use_empty() && "Unexpected user remain");
2514 RematerizationCandidates.erase(Cand);
2515 for (auto &R : Records) {
2516 assert(!R.LiveSet.contains(Cand) ||
2517 R.LiveSet.contains(PointerToBase[Cand]));
2518 R.LiveSet.remove(Cand);
2519 }
2520 }
2521
2522 // Recollect not rematerialized chains - we might have rewritten
2523 // their sub-chains.
2524 if (!LiveValuesToBeDeleted.empty()) {
2525 for (auto &P : RematerizationCandidates) {
2526 auto &R = P.second;
2527 if (R.ChainToBase.size() > 1) {
2528 R.ChainToBase.clear();
2529 findRematerializableChainToBasePointer(R.ChainToBase, P.first);
2530 }
2531 }
2532 }
2533}
2534
2535// From the statepoint live set pick values that are cheaper to recompute then
2536// to relocate. Remove this values from the live set, rematerialize them after
2537// statepoint and record them in "Info" structure. Note that similar to
2538// relocated values we don't do any user adjustments here.
2540 PartiallyConstructedSafepointRecord &Info,
2541 PointerToBaseTy &PointerToBase,
2542 RematCandTy &RematerizationCandidates,
2544 // Record values we are going to delete from this statepoint live set.
2545 // We can not di this in following loop due to iterator invalidation.
2546 SmallVector<Value *, 32> LiveValuesToBeDeleted;
2547
2548 for (Value *LiveValue : Info.LiveSet) {
2549 auto It = RematerizationCandidates.find(LiveValue);
2550 if (It == RematerizationCandidates.end())
2551 continue;
2552
2553 RematerizlizationCandidateRecord &Record = It->second;
2554
2556 // For invokes we need to rematerialize each chain twice - for normal and
2557 // for unwind basic blocks. Model this by multiplying cost by two.
2558 if (isa<InvokeInst>(Call))
2559 Cost *= 2;
2560
2561 // If it's too expensive - skip it.
2563 continue;
2564
2565 // Remove value from the live set
2566 LiveValuesToBeDeleted.push_back(LiveValue);
2567
2568 // Clone instructions and record them inside "Info" structure.
2569
2570 // Different cases for calls and invokes. For invokes we need to clone
2571 // instructions both on normal and unwind path.
2572 if (isa<CallInst>(Call)) {
2573 Instruction *InsertBefore = Call->getNextNode();
2574 assert(InsertBefore);
2575 Instruction *RematerializedValue =
2576 rematerializeChain(Record.ChainToBase, InsertBefore,
2577 Record.RootOfChain, PointerToBase[LiveValue]);
2578 Info.RematerializedValues[RematerializedValue] = LiveValue;
2579 } else {
2580 auto *Invoke = cast<InvokeInst>(Call);
2581
2582 Instruction *NormalInsertBefore =
2583 &*Invoke->getNormalDest()->getFirstInsertionPt();
2584 Instruction *UnwindInsertBefore =
2585 &*Invoke->getUnwindDest()->getFirstInsertionPt();
2586
2587 Instruction *NormalRematerializedValue =
2588 rematerializeChain(Record.ChainToBase, NormalInsertBefore,
2589 Record.RootOfChain, PointerToBase[LiveValue]);
2590 Instruction *UnwindRematerializedValue =
2591 rematerializeChain(Record.ChainToBase, UnwindInsertBefore,
2592 Record.RootOfChain, PointerToBase[LiveValue]);
2593
2594 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2595 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2596 }
2597 }
2598
2599 // Remove rematerialized values from the live set.
2600 for (auto *LiveValue: LiveValuesToBeDeleted) {
2601 Info.LiveSet.remove(LiveValue);
2602 }
2603}
2604
2606 SmallVectorImpl<CallInst *> &Intrinsics,
2607 DefiningValueMapTy &DVCache,
2608 IsKnownBaseMapTy &KnownBases) {
2609 auto &Context = F.getContext();
2610 auto &DL = F.getDataLayout();
2611 bool Changed = false;
2612
2613 for (auto *Callsite : Intrinsics)
2614 switch (Callsite->getIntrinsicID()) {
2615 case Intrinsic::experimental_gc_get_pointer_base: {
2616 Changed = true;
2617 Value *Base =
2618 findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
2619 assert(!DVCache.count(Callsite));
2620 Callsite->replaceAllUsesWith(Base);
2621 if (!Base->hasName())
2622 Base->takeName(Callsite);
2623 Callsite->eraseFromParent();
2624 break;
2625 }
2626 case Intrinsic::experimental_gc_get_pointer_offset: {
2627 Changed = true;
2628 Value *Derived = Callsite->getOperand(0);
2629 Value *Base = findBasePointer(Derived, DVCache, KnownBases);
2630 assert(!DVCache.count(Callsite));
2631 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
2632 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
2633 IRBuilder<> Builder(Callsite);
2634 Value *BaseInt =
2635 Builder.CreatePtrToInt(Base, Type::getIntNTy(Context, IntPtrSize),
2636 suffixed_name_or(Base, ".int", ""));
2637 Value *DerivedInt =
2638 Builder.CreatePtrToInt(Derived, Type::getIntNTy(Context, IntPtrSize),
2639 suffixed_name_or(Derived, ".int", ""));
2640 Value *Offset = Builder.CreateSub(DerivedInt, BaseInt);
2641 Callsite->replaceAllUsesWith(Offset);
2642 Offset->takeName(Callsite);
2643 Callsite->eraseFromParent();
2644 break;
2645 }
2646 default:
2647 llvm_unreachable("Unknown intrinsic");
2648 }
2649
2650 return Changed;
2651}
2652
2656 DefiningValueMapTy &DVCache,
2657 IsKnownBaseMapTy &KnownBases) {
2658 std::unique_ptr<GCStrategy> GC = findGCStrategy(F);
2659
2660#ifndef NDEBUG
2661 // Validate the input
2662 std::set<CallBase *> Uniqued;
2663 Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2664 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2665
2666 for (CallBase *Call : ToUpdate)
2667 assert(Call->getFunction() == &F);
2668#endif
2669
2670 // When inserting gc.relocates for invokes, we need to be able to insert at
2671 // the top of the successor blocks. See the comment on
2672 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2673 // may restructure the CFG.
2674 for (CallBase *Call : ToUpdate) {
2675 auto *II = dyn_cast<InvokeInst>(Call);
2676 if (!II)
2677 continue;
2678 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2679 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2680 }
2681
2682 // A list of dummy calls added to the IR to keep various values obviously
2683 // live in the IR. We'll remove all of these when done.
2685
2686 // Insert a dummy call with all of the deopt operands we'll need for the
2687 // actual safepoint insertion as arguments. This ensures reference operands
2688 // in the deopt argument list are considered live through the safepoint (and
2689 // thus makes sure they get relocated.)
2690 for (CallBase *Call : ToUpdate) {
2691 SmallVector<Value *, 64> DeoptValues;
2692
2693 for (Value *Arg : GetDeoptBundleOperands(Call)) {
2694 assert(!isUnhandledGCPointerType(Arg->getType(), GC.get()) &&
2695 "support for FCA unimplemented");
2696 if (isHandledGCPointerType(Arg->getType(), GC.get()))
2697 DeoptValues.push_back(Arg);
2698 }
2699
2700 insertUseHolderAfter(Call, DeoptValues, Holders);
2701 }
2702
2704
2705 // A) Identify all gc pointers which are statically live at the given call
2706 // site.
2707 findLiveReferences(F, DT, ToUpdate, Records, GC.get());
2708
2709 /// Global mapping from live pointers to a base-defining-value.
2710 PointerToBaseTy PointerToBase;
2711
2712 // B) Find the base pointers for each live pointer
2713 for (size_t i = 0; i < Records.size(); i++) {
2714 PartiallyConstructedSafepointRecord &info = Records[i];
2715 findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
2716 }
2717 if (PrintBasePointers) {
2718 errs() << "Base Pairs (w/o Relocation):\n";
2719 for (auto &Pair : PointerToBase) {
2720 errs() << " derived ";
2721 Pair.first->printAsOperand(errs(), false);
2722 errs() << " base ";
2723 Pair.second->printAsOperand(errs(), false);
2724 errs() << "\n";
2725 ;
2726 }
2727 }
2728
2729 // The base phi insertion logic (for any safepoint) may have inserted new
2730 // instructions which are now live at some safepoint. The simplest such
2731 // example is:
2732 // loop:
2733 // phi a <-- will be a new base_phi here
2734 // safepoint 1 <-- that needs to be live here
2735 // gep a + 1
2736 // safepoint 2
2737 // br loop
2738 // We insert some dummy calls after each safepoint to definitely hold live
2739 // the base pointers which were identified for that safepoint. We'll then
2740 // ask liveness for _every_ base inserted to see what is now live. Then we
2741 // remove the dummy calls.
2742 Holders.reserve(Holders.size() + Records.size());
2743 for (size_t i = 0; i < Records.size(); i++) {
2744 PartiallyConstructedSafepointRecord &Info = Records[i];
2745
2747 for (auto *Derived : Info.LiveSet) {
2748 assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
2749 Bases.push_back(PointerToBase[Derived]);
2750 }
2751
2752 insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2753 }
2754
2755 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2756 // need to rerun liveness. We may *also* have inserted new defs, but that's
2757 // not the key issue.
2758 recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC.get());
2759
2760 if (PrintBasePointers) {
2761 errs() << "Base Pairs: (w/Relocation)\n";
2762 for (auto Pair : PointerToBase) {
2763 errs() << " derived ";
2764 Pair.first->printAsOperand(errs(), false);
2765 errs() << " base ";
2766 Pair.second->printAsOperand(errs(), false);
2767 errs() << "\n";
2768 }
2769 }
2770
2771 // It is possible that non-constant live variables have a constant base. For
2772 // example, a GEP with a variable offset from a global. In this case we can
2773 // remove it from the liveset. We already don't add constants to the liveset
2774 // because we assume they won't move at runtime and the GC doesn't need to be
2775 // informed about them. The same reasoning applies if the base is constant.
2776 // Note that the relocation placement code relies on this filtering for
2777 // correctness as it expects the base to be in the liveset, which isn't true
2778 // if the base is constant.
2779 for (auto &Info : Records) {
2780 Info.LiveSet.remove_if([&](Value *LiveV) {
2781 assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
2782 return isa<Constant>(PointerToBase[LiveV]);
2783 });
2784 }
2785
2786 for (CallInst *CI : Holders)
2787 CI->eraseFromParent();
2788
2789 Holders.clear();
2790
2791 // Compute the cost of possible re-materialization of derived pointers.
2792 RematCandTy RematerizationCandidates;
2793 findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
2794
2795 // In order to reduce live set of statepoint we might choose to rematerialize
2796 // some values instead of relocating them. This is purely an optimization and
2797 // does not influence correctness.
2798 // First try rematerialization at uses, then after statepoints.
2799 rematerializeLiveValuesAtUses(RematerizationCandidates, Records,
2800 PointerToBase);
2801 for (size_t i = 0; i < Records.size(); i++)
2802 rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
2803 RematerizationCandidates, TTI);
2804
2805 // We need this to safely RAUW and delete call or invoke return values that
2806 // may themselves be live over a statepoint. For details, please see usage in
2807 // makeStatepointExplicitImpl.
2808 std::vector<DeferredReplacement> Replacements;
2809
2810 // Now run through and replace the existing statepoints with new ones with
2811 // the live variables listed. We do not yet update uses of the values being
2812 // relocated. We have references to live variables that need to
2813 // survive to the last iteration of this loop. (By construction, the
2814 // previous statepoint can not be a live variable, thus we can and remove
2815 // the old statepoint calls as we go.)
2816 for (size_t i = 0; i < Records.size(); i++)
2817 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
2818 PointerToBase, GC.get());
2819
2820 ToUpdate.clear(); // prevent accident use of invalid calls.
2821
2822 for (auto &PR : Replacements)
2823 PR.doReplacement();
2824
2825 Replacements.clear();
2826
2827 for (auto &Info : Records) {
2828 // These live sets may contain state Value pointers, since we replaced calls
2829 // with operand bundles with calls wrapped in gc.statepoint, and some of
2830 // those calls may have been def'ing live gc pointers. Clear these out to
2831 // avoid accidentally using them.
2832 //
2833 // TODO: We should create a separate data structure that does not contain
2834 // these live sets, and migrate to using that data structure from this point
2835 // onward.
2836 Info.LiveSet.clear();
2837 }
2838 PointerToBase.clear();
2839
2840 // Do all the fixups of the original live variables to their relocated selves
2842 for (const PartiallyConstructedSafepointRecord &Info : Records) {
2843 // We can't simply save the live set from the original insertion. One of
2844 // the live values might be the result of a call which needs a safepoint.
2845 // That Value* no longer exists and we need to use the new gc_result.
2846 // Thankfully, the live set is embedded in the statepoint (and updated), so
2847 // we just grab that.
2848 llvm::append_range(Live, Info.StatepointToken->gc_live());
2849#ifndef NDEBUG
2850 // Do some basic validation checking on our liveness results before
2851 // performing relocation. Relocation can and will turn mistakes in liveness
2852 // results into non-sensical code which is must harder to debug.
2853 // TODO: It would be nice to test consistency as well
2854 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2855 "statepoint must be reachable or liveness is meaningless");
2856 for (Value *V : Info.StatepointToken->gc_live()) {
2857 if (!isa<Instruction>(V))
2858 // Non-instruction values trivial dominate all possible uses
2859 continue;
2860 auto *LiveInst = cast<Instruction>(V);
2861 assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2862 "unreachable values should never be live");
2863 assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2864 "basic SSA liveness expectation violated by liveness analysis");
2865 }
2866#endif
2867 }
2869
2870#ifndef NDEBUG
2871 // Validation check
2872 for (auto *Ptr : Live)
2873 assert(isHandledGCPointerType(Ptr->getType(), GC.get()) &&
2874 "must be a gc pointer type");
2875#endif
2876
2877 relocationViaAlloca(F, DT, Live, Records);
2878 return !Records.empty();
2879}
2880
2881// List of all parameter and return attributes which must be stripped when
2882// lowering from the abstract machine model. Note that we list attributes
2883// here which aren't valid as return attributes, that is okay.
2885 AttributeMask R;
2886 R.addAttribute(Attribute::Dereferenceable);
2887 R.addAttribute(Attribute::DereferenceableOrNull);
2888 R.addAttribute(Attribute::ReadNone);
2889 R.addAttribute(Attribute::ReadOnly);
2890 R.addAttribute(Attribute::WriteOnly);
2891 R.addAttribute(Attribute::NoAlias);
2892 R.addAttribute(Attribute::NoFree);
2893 return R;
2894}
2895
2897 LLVMContext &Ctx = F.getContext();
2898
2899 // Intrinsics are very delicate. Lowering sometimes depends the presence
2900 // of certain attributes for correctness, but we may have also inferred
2901 // additional ones in the abstract machine model which need stripped. This
2902 // assumes that the attributes defined in Intrinsic.td are conservatively
2903 // correct for both physical and abstract model.
2904 if (Intrinsic::ID id = F.getIntrinsicID()) {
2905 F.setAttributes(Intrinsic::getAttributes(Ctx, id));
2906 return;
2907 }
2908
2910 for (Argument &A : F.args())
2911 if (isa<PointerType>(A.getType()))
2912 F.removeParamAttrs(A.getArgNo(), R);
2913
2914 if (isa<PointerType>(F.getReturnType()))
2915 F.removeRetAttrs(R);
2916
2917 for (auto Attr : FnAttrsToStrip)
2918 F.removeFnAttr(Attr);
2919}
2920
2921/// Certain metadata on instructions are invalid after running RS4GC.
2922/// Optimizations that run after RS4GC can incorrectly use this metadata to
2923/// optimize functions. We drop such metadata on the instruction.
2925 if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
2926 return;
2927 // These are the attributes that are still valid on loads and stores after
2928 // RS4GC.
2929 // The metadata implying dereferenceability and noalias are (conservatively)
2930 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2931 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2932 // touch the entire heap including noalias objects. Note: The reasoning is
2933 // same as stripping the dereferenceability and noalias attributes that are
2934 // analogous to the metadata counterparts.
2935 // We also drop the invariant.load metadata on the load because that metadata
2936 // implies the address operand to the load points to memory that is never
2937 // changed once it became dereferenceable. This is no longer true after RS4GC.
2938 // Similar reasoning applies to invariant.group metadata, which applies to
2939 // loads within a group.
2940 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2941 LLVMContext::MD_range,
2942 LLVMContext::MD_alias_scope,
2943 LLVMContext::MD_nontemporal,
2944 LLVMContext::MD_nonnull,
2945 LLVMContext::MD_align,
2946 LLVMContext::MD_type};
2947
2948 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2949 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2950}
2951
2953 if (F.empty())
2954 return;
2955
2956 LLVMContext &Ctx = F.getContext();
2957 MDBuilder Builder(Ctx);
2958
2959 // Set of invariantstart instructions that we need to remove.
2960 // Use this to avoid invalidating the instruction iterator.
2961 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions;
2962
2963 for (Instruction &I : instructions(F)) {
2964 // invariant.start on memory location implies that the referenced memory
2965 // location is constant and unchanging. This is no longer true after
2966 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2967 // which frees the entire heap and the presence of invariant.start allows
2968 // the optimizer to sink the load of a memory location past a statepoint,
2969 // which is incorrect.
2970 if (auto *II = dyn_cast<IntrinsicInst>(&I))
2971 if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2972 InvariantStartInstructions.push_back(II);
2973 continue;
2974 }
2975
2976 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) {
2977 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag);
2978 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2979 }
2980
2982
2984 if (auto *Call = dyn_cast<CallBase>(&I)) {
2985 for (int i = 0, e = Call->arg_size(); i != e; i++)
2986 if (isa<PointerType>(Call->getArgOperand(i)->getType()))
2987 Call->removeParamAttrs(i, R);
2988 if (isa<PointerType>(Call->getType()))
2989 Call->removeRetAttrs(R);
2990 }
2991 }
2992
2993 // Delete the invariant.start instructions and RAUW poison.
2994 for (auto *II : InvariantStartInstructions) {
2995 II->replaceAllUsesWith(PoisonValue::get(II->getType()));
2996 II->eraseFromParent();
2997 }
2998}
2999
3000/// Looks up the GC strategy for a given function, returning null if the
3001/// function doesn't have a GC tag. The strategy is stored in the cache.
3002static std::unique_ptr<GCStrategy> findGCStrategy(Function &F) {
3003 if (!F.hasGC())
3004 return nullptr;
3005
3006 return getGCStrategy(F.getGC());
3007}
3008
3009/// Returns true if this function should be rewritten by this pass. The main
3010/// point of this function is as an extension point for custom logic.
3012 if (!F.hasGC())
3013 return false;
3014
3015 std::unique_ptr<GCStrategy> Strategy = findGCStrategy(F);
3016
3017 assert(Strategy && "GC strategy is required by function, but was not found");
3018
3019 return Strategy->useRS4GC();
3020}
3021
3022static void stripNonValidData(Module &M) {
3023#ifndef NDEBUG
3024 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
3025#endif
3026
3027 for (Function &F : M)
3029
3030 for (Function &F : M)
3032}
3033
3036 const TargetLibraryInfo &TLI) {
3037 assert(!F.isDeclaration() && !F.empty() &&
3038 "need function body to rewrite statepoints in");
3039 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
3040
3041 auto NeedsRewrite = [&TLI](Instruction &I) {
3042 if (const auto *Call = dyn_cast<CallBase>(&I)) {
3043 if (isa<GCStatepointInst>(Call))
3044 return false;
3045 if (callsGCLeafFunction(Call, TLI))
3046 return false;
3047
3048 // Normally it's up to the frontend to make sure that non-leaf calls also
3049 // have proper deopt state if it is required. We make an exception for
3050 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3051 // these are non-leaf by default. They might be generated by the optimizer
3052 // which doesn't know how to produce a proper deopt state. So if we see a
3053 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3054 // copy and don't produce a statepoint.
3055 if (!AllowStatepointWithNoDeoptInfo && !Call->hasDeoptState()) {
3056 assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
3057 "Don't expect any other calls here!");
3058 return false;
3059 }
3060 return true;
3061 }
3062 return false;
3063 };
3064
3065 // Delete any unreachable statepoints so that we don't have unrewritten
3066 // statepoints surviving this pass. This makes testing easier and the
3067 // resulting IR less confusing to human readers.
3068 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3069 bool MadeChange = removeUnreachableBlocks(F, &DTU);
3070 // Flush the Dominator Tree.
3071 DTU.getDomTree();
3072
3073 // Gather all the statepoints which need rewritten. Be careful to only
3074 // consider those in reachable code since we need to ask dominance queries
3075 // when rewriting. We'll delete the unreachable ones in a moment.
3076 SmallVector<CallBase *, 64> ParsePointNeeded;
3077 SmallVector<CallInst *, 64> Intrinsics;
3078 for (Instruction &I : instructions(F)) {
3079 // TODO: only the ones with the flag set!
3080 if (NeedsRewrite(I)) {
3081 // NOTE removeUnreachableBlocks() is stronger than
3082 // DominatorTree::isReachableFromEntry(). In other words
3083 // removeUnreachableBlocks can remove some blocks for which
3084 // isReachableFromEntry() returns true.
3085 assert(DT.isReachableFromEntry(I.getParent()) &&
3086 "no unreachable blocks expected");
3087 ParsePointNeeded.push_back(cast<CallBase>(&I));
3088 }
3089 if (auto *CI = dyn_cast<CallInst>(&I))
3090 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base ||
3091 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset)
3092 Intrinsics.emplace_back(CI);
3093 }
3094
3095 // Return early if no work to do.
3096 if (ParsePointNeeded.empty() && Intrinsics.empty())
3097 return MadeChange;
3098
3099 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3100 // These are created by LCSSA. They have the effect of increasing the size
3101 // of liveness sets for no good reason. It may be harder to do this post
3102 // insertion since relocations and base phis can confuse things.
3103 for (BasicBlock &BB : F)
3104 if (BB.getUniquePredecessor())
3105 MadeChange |= FoldSingleEntryPHINodes(&BB);
3106
3107 // Before we start introducing relocations, we want to tweak the IR a bit to
3108 // avoid unfortunate code generation effects. The main example is that we
3109 // want to try to make sure the comparison feeding a branch is after any
3110 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3111 // values feeding a branch after relocation. This is semantically correct,
3112 // but results in extra register pressure since both the pre-relocation and
3113 // post-relocation copies must be available in registers. For code without
3114 // relocations this is handled elsewhere, but teaching the scheduler to
3115 // reverse the transform we're about to do would be slightly complex.
3116 // Note: This may extend the live range of the inputs to the icmp and thus
3117 // increase the liveset of any statepoint we move over. This is profitable
3118 // as long as all statepoints are in rare blocks. If we had in-register
3119 // lowering for live values this would be a much safer transform.
3120 auto getConditionInst = [](Instruction *TI) -> Instruction * {
3121 if (auto *BI = dyn_cast<BranchInst>(TI))
3122 if (BI->isConditional())
3123 return dyn_cast<Instruction>(BI->getCondition());
3124 // TODO: Extend this to handle switches
3125 return nullptr;
3126 };
3127 for (BasicBlock &BB : F) {
3128 Instruction *TI = BB.getTerminator();
3129 if (auto *Cond = getConditionInst(TI))
3130 // TODO: Handle more than just ICmps here. We should be able to move
3131 // most instructions without side effects or memory access.
3132 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
3133 MadeChange = true;
3134 Cond->moveBefore(TI);
3135 }
3136 }
3137
3138 // Nasty workaround - The base computation code in the main algorithm doesn't
3139 // consider the fact that a GEP can be used to convert a scalar to a vector.
3140 // The right fix for this is to integrate GEPs into the base rewriting
3141 // algorithm properly, this is just a short term workaround to prevent
3142 // crashes by canonicalizing such GEPs into fully vector GEPs.
3143 for (Instruction &I : instructions(F)) {
3144 if (!isa<GetElementPtrInst>(I))
3145 continue;
3146
3147 unsigned VF = 0;
3148 for (unsigned i = 0; i < I.getNumOperands(); i++)
3149 if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) {
3150 assert(VF == 0 ||
3151 VF == cast<FixedVectorType>(OpndVTy)->getNumElements());
3152 VF = cast<FixedVectorType>(OpndVTy)->getNumElements();
3153 }
3154
3155 // It's the vector to scalar traversal through the pointer operand which
3156 // confuses base pointer rewriting, so limit ourselves to that case.
3157 if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
3158 IRBuilder<> B(&I);
3159 auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
3160 I.setOperand(0, Splat);
3161 MadeChange = true;
3162 }
3163 }
3164
3165 // Cache the 'defining value' relation used in the computation and
3166 // insertion of base phis and selects. This ensures that we don't insert
3167 // large numbers of duplicate base_phis. Use one cache for both
3168 // inlineGetBaseAndOffset() and insertParsePoints().
3169 DefiningValueMapTy DVCache;
3170
3171 // Mapping between a base values and a flag indicating whether it's a known
3172 // base or not.
3173 IsKnownBaseMapTy KnownBases;
3174
3175 if (!Intrinsics.empty())
3176 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3177 // live references.
3178 MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
3179
3180 if (!ParsePointNeeded.empty())
3181 MadeChange |=
3182 insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
3183
3184 return MadeChange;
3185}
3186
3187// liveness computation via standard dataflow
3188// -------------------------------------------------------------------
3189
3190// TODO: Consider using bitvectors for liveness, the set of potentially
3191// interesting values should be small and easy to pre-compute.
3192
3193/// Compute the live-in set for the location rbegin starting from
3194/// the live-out set of the basic block
3197 SetVector<Value *> &LiveTmp, GCStrategy *GC) {
3198 for (auto &I : make_range(Begin, End)) {
3199 // KILL/Def - Remove this definition from LiveIn
3200 LiveTmp.remove(&I);
3201
3202 // Don't consider *uses* in PHI nodes, we handle their contribution to
3203 // predecessor blocks when we seed the LiveOut sets
3204 if (isa<PHINode>(I))
3205 continue;
3206
3207 // USE - Add to the LiveIn set for this instruction
3208 for (Value *V : I.operands()) {
3209 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3210 "support for FCA unimplemented");
3211 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) {
3212 // The choice to exclude all things constant here is slightly subtle.
3213 // There are two independent reasons:
3214 // - We assume that things which are constant (from LLVM's definition)
3215 // do not move at runtime. For example, the address of a global
3216 // variable is fixed, even though it's contents may not be.
3217 // - Second, we can't disallow arbitrary inttoptr constants even
3218 // if the language frontend does. Optimization passes are free to
3219 // locally exploit facts without respect to global reachability. This
3220 // can create sections of code which are dynamically unreachable and
3221 // contain just about anything. (see constants.ll in tests)
3222 LiveTmp.insert(V);
3223 }
3224 }
3225 }
3226}
3227
3229 GCStrategy *GC) {
3230 for (BasicBlock *Succ : successors(BB)) {
3231 for (auto &I : *Succ) {
3232 PHINode *PN = dyn_cast<PHINode>(&I);
3233 if (!PN)
3234 break;
3235
3236 Value *V = PN->getIncomingValueForBlock(BB);
3237 assert(!isUnhandledGCPointerType(V->getType(), GC) &&
3238 "support for FCA unimplemented");
3239 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V))
3240 LiveTmp.insert(V);
3241 }
3242 }
3243}
3244
3246 SetVector<Value *> KillSet;
3247 for (Instruction &I : *BB)
3248 if (isHandledGCPointerType(I.getType(), GC))
3249 KillSet.insert(&I);
3250 return KillSet;
3251}
3252
3253#ifndef NDEBUG
3254/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3255/// validation check for the liveness computation.
3257 Instruction *TI, bool TermOkay = false) {
3258 for (Value *V : Live) {
3259 if (auto *I = dyn_cast<Instruction>(V)) {
3260 // The terminator can be a member of the LiveOut set. LLVM's definition
3261 // of instruction dominance states that V does not dominate itself. As
3262 // such, we need to special case this to allow it.
3263 if (TermOkay && TI == I)
3264 continue;
3265 assert(DT.dominates(I, TI) &&
3266 "basic SSA liveness expectation violated by liveness analysis");
3267 }
3268 }
3269}
3270
3271/// Check that all the liveness sets used during the computation of liveness
3272/// obey basic SSA properties. This is useful for finding cases where we miss
3273/// a def.
3274static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
3275 BasicBlock &BB) {
3276 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
3277 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
3278 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
3279}
3280#endif
3281
3283 GCPtrLivenessData &Data, GCStrategy *GC) {
3285
3286 // Seed the liveness for each individual block
3287 for (BasicBlock &BB : F) {
3288 Data.KillSet[&BB] = computeKillSet(&BB, GC);
3289 Data.LiveSet[&BB].clear();
3290 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC);
3291
3292#ifndef NDEBUG
3293 for (Value *Kill : Data.KillSet[&BB])
3294 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
3295#endif
3296
3297 Data.LiveOut[&BB] = SetVector<Value *>();
3298 computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC);
3299 Data.LiveIn[&BB] = Data.LiveSet[&BB];
3300 Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
3301 Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
3302 if (!Data.LiveIn[&BB].empty())
3303 Worklist.insert(pred_begin(&BB), pred_end(&BB));
3304 }
3305
3306 // Propagate that liveness until stable
3307 while (!Worklist.empty()) {
3308 BasicBlock *BB = Worklist.pop_back_val();
3309
3310 // Compute our new liveout set, then exit early if it hasn't changed despite
3311 // the contribution of our successor.
3312 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3313 const auto OldLiveOutSize = LiveOut.size();
3314 for (BasicBlock *Succ : successors(BB)) {
3315 assert(Data.LiveIn.count(Succ));
3316 LiveOut.set_union(Data.LiveIn[Succ]);
3317 }
3318 // assert OutLiveOut is a subset of LiveOut
3319 if (OldLiveOutSize == LiveOut.size()) {
3320 // If the sets are the same size, then we didn't actually add anything
3321 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3322 // hasn't changed.
3323 continue;
3324 }
3325 Data.LiveOut[BB] = LiveOut;
3326
3327 // Apply the effects of this basic block
3328 SetVector<Value *> LiveTmp = LiveOut;
3329 LiveTmp.set_union(Data.LiveSet[BB]);
3330 LiveTmp.set_subtract(Data.KillSet[BB]);
3331
3332 assert(Data.LiveIn.count(BB));
3333 const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB];
3334 // assert: OldLiveIn is a subset of LiveTmp
3335 if (OldLiveIn.size() != LiveTmp.size()) {
3336 Data.LiveIn[BB] = LiveTmp;
3337 Worklist.insert(pred_begin(BB), pred_end(BB));
3338 }
3339 } // while (!Worklist.empty())
3340
3341#ifndef NDEBUG
3342 // Verify our output against SSA properties. This helps catch any
3343 // missing kills during the above iteration.
3344 for (BasicBlock &BB : F)
3345 checkBasicSSA(DT, Data, BB);
3346#endif
3347}
3348
3349static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
3350 StatepointLiveSetTy &Out, GCStrategy *GC) {
3351 BasicBlock *BB = Inst->getParent();
3352
3353 // Note: The copy is intentional and required
3354 assert(Data.LiveOut.count(BB));
3355 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3356
3357 // We want to handle the statepoint itself oddly. It's
3358 // call result is not live (normal), nor are it's arguments
3359 // (unless they're used again later). This adjustment is
3360 // specifically what we need to relocate
3361 computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut,
3362 GC);
3363 LiveOut.remove(Inst);
3364 Out.insert(LiveOut.begin(), LiveOut.end());
3365}
3366
3367static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
3368 CallBase *Call,
3369 PartiallyConstructedSafepointRecord &Info,
3370 PointerToBaseTy &PointerToBase,
3371 GCStrategy *GC) {
3372 StatepointLiveSetTy Updated;
3373 findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC);
3374
3375 // We may have base pointers which are now live that weren't before. We need
3376 // to update the PointerToBase structure to reflect this.
3377 for (auto *V : Updated)
3378 PointerToBase.insert({ V, V });
3379
3380 Info.LiveSet = Updated;
3381}
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Checks Use for liveness in LiveValues If Use is not live
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
uint32_t Index
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static void makeStatepointExplicitImpl(CallBase *Call, const SmallVectorImpl< Value * > &BasePtrs, const SmallVectorImpl< Value * > &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, PointerToBaseTy &PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static void findRematerializationCandidates(PointerToBaseTy PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static std::unique_ptr< GCStrategy > findGCStrategy(Function &F)
Looks up the GC strategy for a given function, returning null if the function doesn't have a GC tag.
static Instruction * rematerializeChain(ArrayRef< Instruction * > ChainToBase, Instruction *InsertBefore, Value *RootOfChain, Value *AlternateLiveBase)
static void unique_unsorted(SmallVectorImpl< T > &Vec)
Implement a unique function which doesn't require we sort the input vector.
static void stripNonValidDataFromBody(Function &F)
static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases)
Returns true if V is a known base.
static Value * findBasePointer(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
For a given value or instruction, figure out what base ptr its derived from.
static cl::opt< bool, true > ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden)
static void insertRelocationStores(iterator_range< Value::user_iterator > GCRelocs, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static BasicBlock * normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, DominatorTree &DT)
static void analyzeParsePointLiveness(DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Result, GCStrategy *GC)
static void computeLiveOutSeed(BasicBlock *BB, SetVector< Value * > &LiveTmp, GCStrategy *GC)
static void relocationViaAlloca(Function &F, DominatorTree &DT, ArrayRef< Value * > Live, ArrayRef< PartiallyConstructedSafepointRecord > Records)
Do all the relocation update via allocas and mem2reg.
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi)
static cl::opt< unsigned > RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, cl::init(6))
static Value * findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base pointer for this value if known.
static Value * findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Returns the base defining value for this value.
static void insertUseHolderAfter(CallBase *Call, const ArrayRef< Value * > Values, SmallVectorImpl< CallInst * > &Holders)
Insert holders so that each Value is obviously live through the entire lifetime of the call.
static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic, AttributeList StatepointAL)
static void insertRematerializationStores(const RematerializedValueMapTy &RematerializedValues, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static bool insertParsePoints(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, SmallVectorImpl< CallBase * > &ToUpdate, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static void findBasePointers(const StatepointLiveSetTy &live, PointerToBaseTy &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static bool shouldRewriteStatepointsIn(Function &F)
Returns true if this function should be rewritten by this pass.
static cl::opt< bool > RematDerivedAtUses("rs4gc-remat-derived-at-uses", cl::Hidden, cl::init(true))
static ArrayRef< Use > GetDeoptBundleOperands(const CallBase *Call)
static void stripNonValidAttributesFromPrototype(Function &F)
static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, StatepointLiveSetTy &out, GCStrategy *GC)
Given results from the dataflow liveness computation, find the set of live Values at a particular ins...
static void computeLiveInValues(DominatorTree &DT, Function &F, GCPtrLivenessData &Data, GCStrategy *GC)
Compute the live-in set for every basic block in the function.
static void stripInvalidMetadataFromInstruction(Instruction &I)
Certain metadata on instructions are invalid after running RS4GC.
static constexpr Attribute::AttrKind FnAttrsToStrip[]
static bool areBothVectorOrScalar(Value *First, Value *Second)
static void rematerializeLiveValuesAtUses(RematCandTy &RematerizationCandidates, MutableArrayRef< PartiallyConstructedSafepointRecord > Records, PointerToBaseTy &PointerToBase)
static bool isHandledGCPointerType(Type *T, GCStrategy *GC)
static Value * findRematerializableChainToBasePointer(SmallVectorImpl< Instruction * > &ChainToBase, Value *CurrentValue)
static cl::opt< bool > PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, cl::init(false))
static Value * findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base defining value for the 'Index' element of the given vector instruction 'I'.
static void stripNonValidData(Module &M)
The IR fed into RewriteStatepointsForGC may have had attributes and metadata implying dereferenceabil...
static InstructionCost chainToBasePointerCost(SmallVectorImpl< Instruction * > &Chain, TargetTransformInfo &TTI)
static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC)
static SetVector< Value * > computeKillSet(BasicBlock *BB, GCStrategy *GC)
static bool ClobberNonLive
static cl::opt< bool > PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false))
static bool isOriginalBaseResult(Value *V)
This value is a base pointer that is not generated by RS4GC, i.e.
static cl::opt< bool > PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false))
static void setKnownBase(Value *V, bool IsKnownBase, IsKnownBaseMapTy &KnownBases)
Caches the IsKnownBase flag for a value and asserts that it wasn't present in the cache before.
static cl::opt< bool > AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true))
static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static std::string suffixed_name_or(Value *V, StringRef Suffix, StringRef DefaultName)
static void CreateGCRelocates(ArrayRef< Value * > LiveVariables, ArrayRef< Value * > BasePtrs, Instruction *StatepointToken, IRBuilder<> &Builder, GCStrategy *GC)
Helper function to place all gc relocates necessary for the given statepoint.
static void checkBasicSSA(DominatorTree &DT, SetVector< Value * > &Live, Instruction *TI, bool TermOkay=false)
Check that the items in 'Live' dominate 'TI'.
static StringRef getDeoptLowering(CallBase *Call)
static void findLiveReferences(Function &F, DominatorTree &DT, ArrayRef< CallBase * > toUpdate, MutableArrayRef< struct PartiallyConstructedSafepointRecord > records, GCStrategy *GC)
static AttributeMask getParamAndReturnAttributesToRemove()
static bool inlineGetBaseAndOffset(Function &F, SmallVectorImpl< CallInst * > &Intrinsics, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static Value * findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Helper function for findBasePointer - Will return a value which either a) defines the base pointer fo...
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, PointerToBaseTy &PointerToBase, GCStrategy *GC)
Given an updated version of the dataflow liveness results, update the liveset and base pointer maps f...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file contains some templates that are useful if you are working with the STL at all.
verify safepoint Safepoint IR static false bool isGCPointerType(Type *T)
static bool containsGCPtrType(Type *Ty)
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:1021
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
Definition: Attributes.h:595
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Attribute getFnAttr(Attribute::AttrKind Kind) const
Return the attribute object that exists for the function.
Definition: Attributes.h:888
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo, const AttrBuilder &B) const
Add an argument attribute to the list.
Definition: Attributes.h:645
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:86
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const LandingPadInst * getLandingPadInst() const
Return the landingpad instruction associated with the landing pad.
Definition: BasicBlock.cpp:681
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
reverse_iterator rbegin()
Definition: BasicBlock.h:464
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:179
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class represents a no-op cast from one type to another.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1411
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1428
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1826
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Represents calls to the gc.relocate intrinsic.
Value * getDerivedPtr() const
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
GCStrategy describes a garbage collector algorithm's code generation requirements,...
Definition: GCStrategy.h:63
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
CallInst * CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, ArrayRef< Value * > CallArgs, std::optional< ArrayRef< Value * > > DeoptArgs, ArrayRef< Value * > GCArgs, const Twine &Name="")
Create a call to the experimental.gc.statepoint intrinsic to start a new statepoint sequence.
Definition: IRBuilder.cpp:749
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
CallInst * CreateGCResult(Instruction *Statepoint, Type *ResultType, const Twine &Name="")
Create a call to the experimental.gc.result intrinsic to extract the result from a call wrapped in a ...
Definition: IRBuilder.cpp:838
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
InvokeInst * CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef< Value * > InvokeArgs, std::optional< ArrayRef< Value * > > DeoptArgs, ArrayRef< Value * > GCArgs, const Twine &Name="")
Create an invoke to the experimental.gc.statepoint intrinsic to start a new statepoint sequence.
Definition: IRBuilder.cpp:805
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
MDNode * createMutableTBAAAccessTag(MDNode *Tag)
Return mutable version of the given mutable or immutable TBAA access tag.
Definition: MDBuilder.cpp:315
Metadata node.
Definition: Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool set_union(const STy &S)
Compute This := This u S, return whether 'This' changed.
Definition: SetVector.h:303
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
void set_subtract(const STy &S)
Compute This := This - B TODO: We should be able to use set_subtract from SetOperations....
Definition: SetVector.h:318
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:229
Class to represent struct types.
Definition: DerivedTypes.h:218
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
iterator_range< value_op_iterator > operand_values()
Definition: User.h:312
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
Definition: Value.cpp:179
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
bool user_empty() const
Definition: Value.h:385
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:463
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
auto pred_end(const MachineBasicBlock *BB)
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeList AS)
Parse out statepoint directives from the function attributes present in AS.
Definition: Statepoint.cpp:24
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
@ DeoptLiveIn
Mark the deopt arguments associated with the statepoint as only being "live-in".
@ GCTransition
Indicates that this statepoint is a transition from GC-aware code to code that is not GC-aware.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto pred_begin(const MachineBasicBlock *BB)
std::unique_ptr< GCStrategy > getGCStrategy(const StringRef Name)
Lookup the GCStrategy object associated with the given gc name.
Definition: GCStrategy.cpp:24
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
InstructionCost Cost
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
bool isStatepointDirectiveAttr(Attribute Attr)
Return true if the Attr is an attribute that is a statepoint directive.
Definition: Statepoint.cpp:18
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition: Local.cpp:3274
bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI)
Return true if this call calls a gc leaf function.
Definition: Local.cpp:3619
bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, const TargetLibraryInfo &)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Call sites that get wrapped by a gc.statepoint (currently only in RewriteStatepointsForGC and potenti...
Definition: Statepoint.h:235
std::optional< uint32_t > NumPatchBytes
Definition: Statepoint.h:236
std::optional< uint64_t > StatepointID
Definition: Statepoint.h:237
static const uint64_t DefaultStatepointID
Definition: Statepoint.h:239