LLVM 20.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/Metadata.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PassManager.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
76#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
88#include <algorithm>
89#include <cassert>
90#include <cstddef>
91#include <cstdint>
92#include <cstring>
93#include <iterator>
94#include <string>
95#include <tuple>
96#include <utility>
97#include <variant>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "sroa"
103
104STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
105STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
106STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
107STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
108STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
109STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
110STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
111STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
112STATISTIC(NumLoadsPredicated,
113 "Number of loads rewritten into predicated loads to allow promotion");
115 NumStoresPredicated,
116 "Number of stores rewritten into predicated loads to allow promotion");
117STATISTIC(NumDeleted, "Number of instructions deleted");
118STATISTIC(NumVectorized, "Number of vectorized aggregates");
119
120/// Disable running mem2reg during SROA in order to test or debug SROA.
121static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
122 cl::Hidden);
123namespace {
124
125class AllocaSliceRewriter;
126class AllocaSlices;
127class Partition;
128
129class SelectHandSpeculativity {
130 unsigned char Storage = 0; // None are speculatable by default.
131 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
132 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
133public:
134 SelectHandSpeculativity() = default;
135 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
136 bool isSpeculatable(bool isTrueVal) const;
137 bool areAllSpeculatable() const;
138 bool areAnySpeculatable() const;
139 bool areNoneSpeculatable() const;
140 // For interop as int half of PointerIntPair.
141 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
142 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
143};
144static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
145
146using PossiblySpeculatableLoad =
148using UnspeculatableStore = StoreInst *;
149using RewriteableMemOp =
150 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
151using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
152
153/// An optimization pass providing Scalar Replacement of Aggregates.
154///
155/// This pass takes allocations which can be completely analyzed (that is, they
156/// don't escape) and tries to turn them into scalar SSA values. There are
157/// a few steps to this process.
158///
159/// 1) It takes allocations of aggregates and analyzes the ways in which they
160/// are used to try to split them into smaller allocations, ideally of
161/// a single scalar data type. It will split up memcpy and memset accesses
162/// as necessary and try to isolate individual scalar accesses.
163/// 2) It will transform accesses into forms which are suitable for SSA value
164/// promotion. This can be replacing a memset with a scalar store of an
165/// integer value, or it can involve speculating operations on a PHI or
166/// select to be a PHI or select of the results.
167/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
168/// onto insert and extract operations on a vector value, and convert them to
169/// this form. By doing so, it will enable promotion of vector aggregates to
170/// SSA vector values.
171class SROA {
172 LLVMContext *const C;
173 DomTreeUpdater *const DTU;
174 AssumptionCache *const AC;
175 const bool PreserveCFG;
176
177 /// Worklist of alloca instructions to simplify.
178 ///
179 /// Each alloca in the function is added to this. Each new alloca formed gets
180 /// added to it as well to recursively simplify unless that alloca can be
181 /// directly promoted. Finally, each time we rewrite a use of an alloca other
182 /// the one being actively rewritten, we add it back onto the list if not
183 /// already present to ensure it is re-visited.
185
186 /// A collection of instructions to delete.
187 /// We try to batch deletions to simplify code and make things a bit more
188 /// efficient. We also make sure there is no dangling pointers.
189 SmallVector<WeakVH, 8> DeadInsts;
190
191 /// Post-promotion worklist.
192 ///
193 /// Sometimes we discover an alloca which has a high probability of becoming
194 /// viable for SROA after a round of promotion takes place. In those cases,
195 /// the alloca is enqueued here for re-processing.
196 ///
197 /// Note that we have to be very careful to clear allocas out of this list in
198 /// the event they are deleted.
199 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
200
201 /// A collection of alloca instructions we can directly promote.
204 PromotableAllocas;
205
206 /// A worklist of PHIs to speculate prior to promoting allocas.
207 ///
208 /// All of these PHIs have been checked for the safety of speculation and by
209 /// being speculated will allow promoting allocas currently in the promotable
210 /// queue.
211 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
212
213 /// A worklist of select instructions to rewrite prior to promoting
214 /// allocas.
216
217 /// Select instructions that use an alloca and are subsequently loaded can be
218 /// rewritten to load both input pointers and then select between the result,
219 /// allowing the load of the alloca to be promoted.
220 /// From this:
221 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
222 /// %V = load <type>, ptr %P2
223 /// to:
224 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
225 /// %V2 = load <type>, ptr %Other
226 /// %V = select i1 %cond, <type> %V1, <type> %V2
227 ///
228 /// We can do this to a select if its only uses are loads
229 /// and if either the operand to the select can be loaded unconditionally,
230 /// or if we are allowed to perform CFG modifications.
231 /// If found an intervening bitcast with a single use of the load,
232 /// allow the promotion.
233 static std::optional<RewriteableMemOps>
234 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
235
236public:
237 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
238 SROAOptions PreserveCFG_)
239 : C(C), DTU(DTU), AC(AC),
240 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
241
242 /// Main run method used by both the SROAPass and by the legacy pass.
243 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
244
245private:
246 friend class AllocaSliceRewriter;
247
248 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
249 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
250 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
251 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
252 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
253 void clobberUse(Use &U);
254 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
255 bool promoteAllocas(Function &F);
256};
257
258} // end anonymous namespace
259
260/// Calculate the fragment of a variable to use when slicing a store
261/// based on the slice dimensions, existing fragment, and base storage
262/// fragment.
263/// Results:
264/// UseFrag - Use Target as the new fragment.
265/// UseNoFrag - The new slice already covers the whole variable.
266/// Skip - The new alloca slice doesn't include this variable.
267/// FIXME: Can we use calculateFragmentIntersect instead?
268namespace {
269enum FragCalcResult { UseFrag, UseNoFrag, Skip };
270}
271static FragCalcResult
273 uint64_t NewStorageSliceOffsetInBits,
274 uint64_t NewStorageSliceSizeInBits,
275 std::optional<DIExpression::FragmentInfo> StorageFragment,
276 std::optional<DIExpression::FragmentInfo> CurrentFragment,
278 // If the base storage describes part of the variable apply the offset and
279 // the size constraint.
280 if (StorageFragment) {
281 Target.SizeInBits =
282 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
283 Target.OffsetInBits =
284 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
285 } else {
286 Target.SizeInBits = NewStorageSliceSizeInBits;
287 Target.OffsetInBits = NewStorageSliceOffsetInBits;
288 }
289
290 // If this slice extracts the entirety of an independent variable from a
291 // larger alloca, do not produce a fragment expression, as the variable is
292 // not fragmented.
293 if (!CurrentFragment) {
294 if (auto Size = Variable->getSizeInBits()) {
295 // Treat the current fragment as covering the whole variable.
296 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
297 if (Target == CurrentFragment)
298 return UseNoFrag;
299 }
300 }
301
302 // No additional work to do if there isn't a fragment already, or there is
303 // but it already exactly describes the new assignment.
304 if (!CurrentFragment || *CurrentFragment == Target)
305 return UseFrag;
306
307 // Reject the target fragment if it doesn't fit wholly within the current
308 // fragment. TODO: We could instead chop up the target to fit in the case of
309 // a partial overlap.
310 if (Target.startInBits() < CurrentFragment->startInBits() ||
311 Target.endInBits() > CurrentFragment->endInBits())
312 return Skip;
313
314 // Target fits within the current fragment, return it.
315 return UseFrag;
316}
317
319 return DebugVariable(DVI->getVariable(), std::nullopt,
320 DVI->getDebugLoc().getInlinedAt());
321}
323 return DebugVariable(DVR->getVariable(), std::nullopt,
324 DVR->getDebugLoc().getInlinedAt());
325}
326
327/// Helpers for handling new and old debug info modes in migrateDebugInfo.
328/// These overloads unwrap a DbgInstPtr {Instruction* | DbgRecord*} union based
329/// on the \p Unused parameter type.
331 (void)Unused;
332 return static_cast<DbgVariableRecord *>(cast<DbgRecord *>(P));
333}
335 (void)Unused;
336 return static_cast<DbgAssignIntrinsic *>(cast<Instruction *>(P));
337}
338
339/// Find linked dbg.assign and generate a new one with the correct
340/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
341/// value component is copied from the old dbg.assign to the new.
342/// \param OldAlloca Alloca for the variable before splitting.
343/// \param IsSplit True if the store (not necessarily alloca)
344/// is being split.
345/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
346/// \param SliceSizeInBits New number of bits being written to.
347/// \param OldInst Instruction that is being split.
348/// \param Inst New instruction performing this part of the
349/// split store.
350/// \param Dest Store destination.
351/// \param Value Stored value.
352/// \param DL Datalayout.
353static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
354 uint64_t OldAllocaOffsetInBits,
355 uint64_t SliceSizeInBits, Instruction *OldInst,
356 Instruction *Inst, Value *Dest, Value *Value,
357 const DataLayout &DL) {
358 auto MarkerRange = at::getAssignmentMarkers(OldInst);
359 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
360 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
361 if (MarkerRange.empty() && DVRAssignMarkerRange.empty())
362 return;
363
364 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
365 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
366 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
367 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
368 << "\n");
369 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
370 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
371 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
372 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
373 if (Value)
374 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
375
376 /// Map of aggregate variables to their fragment associated with OldAlloca.
378 BaseFragments;
379 for (auto *DAI : at::getAssignmentMarkers(OldAlloca))
380 BaseFragments[getAggregateVariable(DAI)] =
381 DAI->getExpression()->getFragmentInfo();
382 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
383 BaseFragments[getAggregateVariable(DVR)] =
384 DVR->getExpression()->getFragmentInfo();
385
386 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
387 // one). It shouldn't already have one: assert this assumption.
388 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
389 DIAssignID *NewID = nullptr;
390 auto &Ctx = Inst->getContext();
391 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
392 assert(OldAlloca->isStaticAlloca());
393
394 auto MigrateDbgAssign = [&](auto *DbgAssign) {
395 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
396 << "\n");
397 auto *Expr = DbgAssign->getExpression();
398 bool SetKillLocation = false;
399
400 if (IsSplit) {
401 std::optional<DIExpression::FragmentInfo> BaseFragment;
402 {
403 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
404 if (R == BaseFragments.end())
405 return;
406 BaseFragment = R->second;
407 }
408 std::optional<DIExpression::FragmentInfo> CurrentFragment =
409 Expr->getFragmentInfo();
410 DIExpression::FragmentInfo NewFragment;
411 FragCalcResult Result = calculateFragment(
412 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
413 BaseFragment, CurrentFragment, NewFragment);
414
415 if (Result == Skip)
416 return;
417 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
418 if (CurrentFragment) {
419 // Rewrite NewFragment to be relative to the existing one (this is
420 // what createFragmentExpression wants). CalculateFragment has
421 // already resolved the size for us. FIXME: Should it return the
422 // relative fragment too?
423 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
424 }
425 // Add the new fragment info to the existing expression if possible.
427 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
428 Expr = *E;
429 } else {
430 // Otherwise, add the new fragment info to an empty expression and
431 // discard the value component of this dbg.assign as the value cannot
432 // be computed with the new fragment.
434 DIExpression::get(Expr->getContext(), {}),
435 NewFragment.OffsetInBits, NewFragment.SizeInBits);
436 SetKillLocation = true;
437 }
438 }
439 }
440
441 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
442 if (!NewID) {
443 NewID = DIAssignID::getDistinct(Ctx);
444 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 }
446
447 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
448 auto *NewAssign = UnwrapDbgInstPtr(
449 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
450 Dest, DIExpression::get(Expr->getContext(), {}),
451 DbgAssign->getDebugLoc()),
452 DbgAssign);
453
454 // If we've updated the value but the original dbg.assign has an arglist
455 // then kill it now - we can't use the requested new value.
456 // We can't replace the DIArgList with the new value as it'd leave
457 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
458 // an arglist). And we can't keep the DIArgList in case the linked store
459 // is being split - in which case the DIArgList + expression may no longer
460 // be computing the correct value.
461 // This should be a very rare situation as it requires the value being
462 // stored to differ from the dbg.assign (i.e., the value has been
463 // represented differently in the debug intrinsic for some reason).
464 SetKillLocation |=
465 Value && (DbgAssign->hasArgList() ||
466 !DbgAssign->getExpression()->isSingleLocationExpression());
467 if (SetKillLocation)
468 NewAssign->setKillLocation();
469
470 // We could use more precision here at the cost of some additional (code)
471 // complexity - if the original dbg.assign was adjacent to its store, we
472 // could position this new dbg.assign adjacent to its store rather than the
473 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
474 // what we get now:
475 // split store !1
476 // split store !2
477 // dbg.assign !1
478 // dbg.assign !2
479 // This (current behaviour) results results in debug assignments being
480 // noted as slightly offset (in code) from the store. In practice this
481 // should have little effect on the debugging experience due to the fact
482 // that all the split stores should get the same line number.
483 NewAssign->moveBefore(DbgAssign);
484
485 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
486 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
487 };
488
489 for_each(MarkerRange, MigrateDbgAssign);
490 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
491}
492
493namespace {
494
495/// A custom IRBuilder inserter which prefixes all names, but only in
496/// Assert builds.
497class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
498 std::string Prefix;
499
500 Twine getNameWithPrefix(const Twine &Name) const {
501 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
502 }
503
504public:
505 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
506
507 void InsertHelper(Instruction *I, const Twine &Name,
508 BasicBlock::iterator InsertPt) const override {
510 InsertPt);
511 }
512};
513
514/// Provide a type for IRBuilder that drops names in release builds.
516
517/// A used slice of an alloca.
518///
519/// This structure represents a slice of an alloca used by some instruction. It
520/// stores both the begin and end offsets of this use, a pointer to the use
521/// itself, and a flag indicating whether we can classify the use as splittable
522/// or not when forming partitions of the alloca.
523class Slice {
524 /// The beginning offset of the range.
525 uint64_t BeginOffset = 0;
526
527 /// The ending offset, not included in the range.
528 uint64_t EndOffset = 0;
529
530 /// Storage for both the use of this slice and whether it can be
531 /// split.
532 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
533
534public:
535 Slice() = default;
536
537 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
538 : BeginOffset(BeginOffset), EndOffset(EndOffset),
539 UseAndIsSplittable(U, IsSplittable) {}
540
541 uint64_t beginOffset() const { return BeginOffset; }
542 uint64_t endOffset() const { return EndOffset; }
543
544 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
545 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
546
547 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
548
549 bool isDead() const { return getUse() == nullptr; }
550 void kill() { UseAndIsSplittable.setPointer(nullptr); }
551
552 /// Support for ordering ranges.
553 ///
554 /// This provides an ordering over ranges such that start offsets are
555 /// always increasing, and within equal start offsets, the end offsets are
556 /// decreasing. Thus the spanning range comes first in a cluster with the
557 /// same start position.
558 bool operator<(const Slice &RHS) const {
559 if (beginOffset() < RHS.beginOffset())
560 return true;
561 if (beginOffset() > RHS.beginOffset())
562 return false;
563 if (isSplittable() != RHS.isSplittable())
564 return !isSplittable();
565 if (endOffset() > RHS.endOffset())
566 return true;
567 return false;
568 }
569
570 /// Support comparison with a single offset to allow binary searches.
571 friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
572 uint64_t RHSOffset) {
573 return LHS.beginOffset() < RHSOffset;
574 }
575 friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
576 const Slice &RHS) {
577 return LHSOffset < RHS.beginOffset();
578 }
579
580 bool operator==(const Slice &RHS) const {
581 return isSplittable() == RHS.isSplittable() &&
582 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
583 }
584 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
585};
586
587/// Representation of the alloca slices.
588///
589/// This class represents the slices of an alloca which are formed by its
590/// various uses. If a pointer escapes, we can't fully build a representation
591/// for the slices used and we reflect that in this structure. The uses are
592/// stored, sorted by increasing beginning offset and with unsplittable slices
593/// starting at a particular offset before splittable slices.
594class AllocaSlices {
595public:
596 /// Construct the slices of a particular alloca.
597 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
598
599 /// Test whether a pointer to the allocation escapes our analysis.
600 ///
601 /// If this is true, the slices are never fully built and should be
602 /// ignored.
603 bool isEscaped() const { return PointerEscapingInstr; }
604 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
605
606 /// Support for iterating over the slices.
607 /// @{
608 using iterator = SmallVectorImpl<Slice>::iterator;
609 using range = iterator_range<iterator>;
610
611 iterator begin() { return Slices.begin(); }
612 iterator end() { return Slices.end(); }
613
615 using const_range = iterator_range<const_iterator>;
616
617 const_iterator begin() const { return Slices.begin(); }
618 const_iterator end() const { return Slices.end(); }
619 /// @}
620
621 /// Erase a range of slices.
622 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
623
624 /// Insert new slices for this alloca.
625 ///
626 /// This moves the slices into the alloca's slices collection, and re-sorts
627 /// everything so that the usual ordering properties of the alloca's slices
628 /// hold.
629 void insert(ArrayRef<Slice> NewSlices) {
630 int OldSize = Slices.size();
631 Slices.append(NewSlices.begin(), NewSlices.end());
632 auto SliceI = Slices.begin() + OldSize;
633 std::stable_sort(SliceI, Slices.end());
634 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
635 }
636
637 // Forward declare the iterator and range accessor for walking the
638 // partitions.
639 class partition_iterator;
641
642 /// Access the dead users for this alloca.
643 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
644
645 /// Access Uses that should be dropped if the alloca is promotable.
646 ArrayRef<Use *> getDeadUsesIfPromotable() const {
647 return DeadUseIfPromotable;
648 }
649
650 /// Access the dead operands referring to this alloca.
651 ///
652 /// These are operands which have cannot actually be used to refer to the
653 /// alloca as they are outside its range and the user doesn't correct for
654 /// that. These mostly consist of PHI node inputs and the like which we just
655 /// need to replace with undef.
656 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
657
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
660 void printSlice(raw_ostream &OS, const_iterator I,
661 StringRef Indent = " ") const;
662 void printUse(raw_ostream &OS, const_iterator I,
663 StringRef Indent = " ") const;
664 void print(raw_ostream &OS) const;
665 void dump(const_iterator I) const;
666 void dump() const;
667#endif
668
669private:
670 template <typename DerivedT, typename RetT = void> class BuilderBase;
671 class SliceBuilder;
672
673 friend class AllocaSlices::SliceBuilder;
674
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
676 /// Handle to alloca instruction to simplify method interfaces.
677 AllocaInst &AI;
678#endif
679
680 /// The instruction responsible for this alloca not having a known set
681 /// of slices.
682 ///
683 /// When an instruction (potentially) escapes the pointer to the alloca, we
684 /// store a pointer to that here and abort trying to form slices of the
685 /// alloca. This will be null if the alloca slices are analyzed successfully.
686 Instruction *PointerEscapingInstr;
687 Instruction *PointerEscapingInstrReadOnly;
688
689 /// The slices of the alloca.
690 ///
691 /// We store a vector of the slices formed by uses of the alloca here. This
692 /// vector is sorted by increasing begin offset, and then the unsplittable
693 /// slices before the splittable ones. See the Slice inner class for more
694 /// details.
696
697 /// Instructions which will become dead if we rewrite the alloca.
698 ///
699 /// Note that these are not separated by slice. This is because we expect an
700 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
701 /// all these instructions can simply be removed and replaced with poison as
702 /// they come from outside of the allocated space.
704
705 /// Uses which will become dead if can promote the alloca.
706 SmallVector<Use *, 8> DeadUseIfPromotable;
707
708 /// Operands which will become dead if we rewrite the alloca.
709 ///
710 /// These are operands that in their particular use can be replaced with
711 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
712 /// to PHI nodes and the like. They aren't entirely dead (there might be
713 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
714 /// want to swap this particular input for poison to simplify the use lists of
715 /// the alloca.
716 SmallVector<Use *, 8> DeadOperands;
717};
718
719/// A partition of the slices.
720///
721/// An ephemeral representation for a range of slices which can be viewed as
722/// a partition of the alloca. This range represents a span of the alloca's
723/// memory which cannot be split, and provides access to all of the slices
724/// overlapping some part of the partition.
725///
726/// Objects of this type are produced by traversing the alloca's slices, but
727/// are only ephemeral and not persistent.
728class Partition {
729private:
730 friend class AllocaSlices;
732
733 using iterator = AllocaSlices::iterator;
734
735 /// The beginning and ending offsets of the alloca for this
736 /// partition.
737 uint64_t BeginOffset = 0, EndOffset = 0;
738
739 /// The start and end iterators of this partition.
740 iterator SI, SJ;
741
742 /// A collection of split slice tails overlapping the partition.
743 SmallVector<Slice *, 4> SplitTails;
744
745 /// Raw constructor builds an empty partition starting and ending at
746 /// the given iterator.
747 Partition(iterator SI) : SI(SI), SJ(SI) {}
748
749public:
750 /// The start offset of this partition.
751 ///
752 /// All of the contained slices start at or after this offset.
753 uint64_t beginOffset() const { return BeginOffset; }
754
755 /// The end offset of this partition.
756 ///
757 /// All of the contained slices end at or before this offset.
758 uint64_t endOffset() const { return EndOffset; }
759
760 /// The size of the partition.
761 ///
762 /// Note that this can never be zero.
763 uint64_t size() const {
764 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
765 return EndOffset - BeginOffset;
766 }
767
768 /// Test whether this partition contains no slices, and merely spans
769 /// a region occupied by split slices.
770 bool empty() const { return SI == SJ; }
771
772 /// \name Iterate slices that start within the partition.
773 /// These may be splittable or unsplittable. They have a begin offset >= the
774 /// partition begin offset.
775 /// @{
776 // FIXME: We should probably define a "concat_iterator" helper and use that
777 // to stitch together pointee_iterators over the split tails and the
778 // contiguous iterators of the partition. That would give a much nicer
779 // interface here. We could then additionally expose filtered iterators for
780 // split, unsplit, and unsplittable splices based on the usage patterns.
781 iterator begin() const { return SI; }
782 iterator end() const { return SJ; }
783 /// @}
784
785 /// Get the sequence of split slice tails.
786 ///
787 /// These tails are of slices which start before this partition but are
788 /// split and overlap into the partition. We accumulate these while forming
789 /// partitions.
790 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
791};
792
793} // end anonymous namespace
794
795/// An iterator over partitions of the alloca's slices.
796///
797/// This iterator implements the core algorithm for partitioning the alloca's
798/// slices. It is a forward iterator as we don't support backtracking for
799/// efficiency reasons, and re-use a single storage area to maintain the
800/// current set of split slices.
801///
802/// It is templated on the slice iterator type to use so that it can operate
803/// with either const or non-const slice iterators.
805 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
806 Partition> {
807 friend class AllocaSlices;
808
809 /// Most of the state for walking the partitions is held in a class
810 /// with a nice interface for examining them.
811 Partition P;
812
813 /// We need to keep the end of the slices to know when to stop.
814 AllocaSlices::iterator SE;
815
816 /// We also need to keep track of the maximum split end offset seen.
817 /// FIXME: Do we really?
818 uint64_t MaxSplitSliceEndOffset = 0;
819
820 /// Sets the partition to be empty at given iterator, and sets the
821 /// end iterator.
822 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
823 : P(SI), SE(SE) {
824 // If not already at the end, advance our state to form the initial
825 // partition.
826 if (SI != SE)
827 advance();
828 }
829
830 /// Advance the iterator to the next partition.
831 ///
832 /// Requires that the iterator not be at the end of the slices.
833 void advance() {
834 assert((P.SI != SE || !P.SplitTails.empty()) &&
835 "Cannot advance past the end of the slices!");
836
837 // Clear out any split uses which have ended.
838 if (!P.SplitTails.empty()) {
839 if (P.EndOffset >= MaxSplitSliceEndOffset) {
840 // If we've finished all splits, this is easy.
841 P.SplitTails.clear();
842 MaxSplitSliceEndOffset = 0;
843 } else {
844 // Remove the uses which have ended in the prior partition. This
845 // cannot change the max split slice end because we just checked that
846 // the prior partition ended prior to that max.
847 llvm::erase_if(P.SplitTails,
848 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
849 assert(llvm::any_of(P.SplitTails,
850 [&](Slice *S) {
851 return S->endOffset() == MaxSplitSliceEndOffset;
852 }) &&
853 "Could not find the current max split slice offset!");
854 assert(llvm::all_of(P.SplitTails,
855 [&](Slice *S) {
856 return S->endOffset() <= MaxSplitSliceEndOffset;
857 }) &&
858 "Max split slice end offset is not actually the max!");
859 }
860 }
861
862 // If P.SI is already at the end, then we've cleared the split tail and
863 // now have an end iterator.
864 if (P.SI == SE) {
865 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
866 return;
867 }
868
869 // If we had a non-empty partition previously, set up the state for
870 // subsequent partitions.
871 if (P.SI != P.SJ) {
872 // Accumulate all the splittable slices which started in the old
873 // partition into the split list.
874 for (Slice &S : P)
875 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
876 P.SplitTails.push_back(&S);
877 MaxSplitSliceEndOffset =
878 std::max(S.endOffset(), MaxSplitSliceEndOffset);
879 }
880
881 // Start from the end of the previous partition.
882 P.SI = P.SJ;
883
884 // If P.SI is now at the end, we at most have a tail of split slices.
885 if (P.SI == SE) {
886 P.BeginOffset = P.EndOffset;
887 P.EndOffset = MaxSplitSliceEndOffset;
888 return;
889 }
890
891 // If the we have split slices and the next slice is after a gap and is
892 // not splittable immediately form an empty partition for the split
893 // slices up until the next slice begins.
894 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
895 !P.SI->isSplittable()) {
896 P.BeginOffset = P.EndOffset;
897 P.EndOffset = P.SI->beginOffset();
898 return;
899 }
900 }
901
902 // OK, we need to consume new slices. Set the end offset based on the
903 // current slice, and step SJ past it. The beginning offset of the
904 // partition is the beginning offset of the next slice unless we have
905 // pre-existing split slices that are continuing, in which case we begin
906 // at the prior end offset.
907 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
908 P.EndOffset = P.SI->endOffset();
909 ++P.SJ;
910
911 // There are two strategies to form a partition based on whether the
912 // partition starts with an unsplittable slice or a splittable slice.
913 if (!P.SI->isSplittable()) {
914 // When we're forming an unsplittable region, it must always start at
915 // the first slice and will extend through its end.
916 assert(P.BeginOffset == P.SI->beginOffset());
917
918 // Form a partition including all of the overlapping slices with this
919 // unsplittable slice.
920 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
921 if (!P.SJ->isSplittable())
922 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
923 ++P.SJ;
924 }
925
926 // We have a partition across a set of overlapping unsplittable
927 // partitions.
928 return;
929 }
930
931 // If we're starting with a splittable slice, then we need to form
932 // a synthetic partition spanning it and any other overlapping splittable
933 // splices.
934 assert(P.SI->isSplittable() && "Forming a splittable partition!");
935
936 // Collect all of the overlapping splittable slices.
937 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
938 P.SJ->isSplittable()) {
939 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
940 ++P.SJ;
941 }
942
943 // Back upiP.EndOffset if we ended the span early when encountering an
944 // unsplittable slice. This synthesizes the early end offset of
945 // a partition spanning only splittable slices.
946 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
947 assert(!P.SJ->isSplittable());
948 P.EndOffset = P.SJ->beginOffset();
949 }
950 }
951
952public:
953 bool operator==(const partition_iterator &RHS) const {
954 assert(SE == RHS.SE &&
955 "End iterators don't match between compared partition iterators!");
956
957 // The observed positions of partitions is marked by the P.SI iterator and
958 // the emptiness of the split slices. The latter is only relevant when
959 // P.SI == SE, as the end iterator will additionally have an empty split
960 // slices list, but the prior may have the same P.SI and a tail of split
961 // slices.
962 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
963 assert(P.SJ == RHS.P.SJ &&
964 "Same set of slices formed two different sized partitions!");
965 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
966 "Same slice position with differently sized non-empty split "
967 "slice tails!");
968 return true;
969 }
970 return false;
971 }
972
974 advance();
975 return *this;
976 }
977
978 Partition &operator*() { return P; }
979};
980
981/// A forward range over the partitions of the alloca's slices.
982///
983/// This accesses an iterator range over the partitions of the alloca's
984/// slices. It computes these partitions on the fly based on the overlapping
985/// offsets of the slices and the ability to split them. It will visit "empty"
986/// partitions to cover regions of the alloca only accessed via split
987/// slices.
988iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
989 return make_range(partition_iterator(begin(), end()),
990 partition_iterator(end(), end()));
991}
992
994 // If the condition being selected on is a constant or the same value is
995 // being selected between, fold the select. Yes this does (rarely) happen
996 // early on.
997 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
998 return SI.getOperand(1 + CI->isZero());
999 if (SI.getOperand(1) == SI.getOperand(2))
1000 return SI.getOperand(1);
1001
1002 return nullptr;
1003}
1004
1005/// A helper that folds a PHI node or a select.
1007 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1008 // If PN merges together the same value, return that value.
1009 return PN->hasConstantValue();
1010 }
1011 return foldSelectInst(cast<SelectInst>(I));
1012}
1013
1014/// Builder for the alloca slices.
1015///
1016/// This class builds a set of alloca slices by recursively visiting the uses
1017/// of an alloca and making a slice for each load and store at each offset.
1018class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1019 friend class PtrUseVisitor<SliceBuilder>;
1020 friend class InstVisitor<SliceBuilder>;
1021
1023
1024 const uint64_t AllocSize;
1025 AllocaSlices &AS;
1026
1027 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1029
1030 /// Set to de-duplicate dead instructions found in the use walk.
1031 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1032
1033public:
1034 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1036 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1037 AS(AS) {}
1038
1039private:
1040 void markAsDead(Instruction &I) {
1041 if (VisitedDeadInsts.insert(&I).second)
1042 AS.DeadUsers.push_back(&I);
1043 }
1044
1045 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1046 bool IsSplittable = false) {
1047 // Completely skip uses which have a zero size or start either before or
1048 // past the end of the allocation.
1049 if (Size == 0 || Offset.uge(AllocSize)) {
1050 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1051 << Offset
1052 << " which has zero size or starts outside of the "
1053 << AllocSize << " byte alloca:\n"
1054 << " alloca: " << AS.AI << "\n"
1055 << " use: " << I << "\n");
1056 return markAsDead(I);
1057 }
1058
1059 uint64_t BeginOffset = Offset.getZExtValue();
1060 uint64_t EndOffset = BeginOffset + Size;
1061
1062 // Clamp the end offset to the end of the allocation. Note that this is
1063 // formulated to handle even the case where "BeginOffset + Size" overflows.
1064 // This may appear superficially to be something we could ignore entirely,
1065 // but that is not so! There may be widened loads or PHI-node uses where
1066 // some instructions are dead but not others. We can't completely ignore
1067 // them, and so have to record at least the information here.
1068 assert(AllocSize >= BeginOffset); // Established above.
1069 if (Size > AllocSize - BeginOffset) {
1070 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1071 << Offset << " to remain within the " << AllocSize
1072 << " byte alloca:\n"
1073 << " alloca: " << AS.AI << "\n"
1074 << " use: " << I << "\n");
1075 EndOffset = AllocSize;
1076 }
1077
1078 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1079 }
1080
1081 void visitBitCastInst(BitCastInst &BC) {
1082 if (BC.use_empty())
1083 return markAsDead(BC);
1084
1085 return Base::visitBitCastInst(BC);
1086 }
1087
1088 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 if (ASC.use_empty())
1090 return markAsDead(ASC);
1091
1092 return Base::visitAddrSpaceCastInst(ASC);
1093 }
1094
1095 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 if (GEPI.use_empty())
1097 return markAsDead(GEPI);
1098
1099 return Base::visitGetElementPtrInst(GEPI);
1100 }
1101
1102 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1103 uint64_t Size, bool IsVolatile) {
1104 // We allow splitting of non-volatile loads and stores where the type is an
1105 // integer type. These may be used to implement 'memcpy' or other "transfer
1106 // of bits" patterns.
1107 bool IsSplittable =
1109
1110 insertUse(I, Offset, Size, IsSplittable);
1111 }
1112
1113 void visitLoadInst(LoadInst &LI) {
1114 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1115 "All simple FCA loads should have been pre-split");
1116
1117 if (!IsOffsetKnown)
1118 return PI.setAborted(&LI);
1119
1121 if (Size.isScalable())
1122 return PI.setAborted(&LI);
1123
1124 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1125 LI.isVolatile());
1126 }
1127
1128 void visitStoreInst(StoreInst &SI) {
1129 Value *ValOp = SI.getValueOperand();
1130 if (ValOp == *U)
1131 return PI.setEscapedAndAborted(&SI);
1132 if (!IsOffsetKnown)
1133 return PI.setAborted(&SI);
1134
1135 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1136 if (StoreSize.isScalable())
1137 return PI.setAborted(&SI);
1138
1139 uint64_t Size = StoreSize.getFixedValue();
1140
1141 // If this memory access can be shown to *statically* extend outside the
1142 // bounds of the allocation, it's behavior is undefined, so simply
1143 // ignore it. Note that this is more strict than the generic clamping
1144 // behavior of insertUse. We also try to handle cases which might run the
1145 // risk of overflow.
1146 // FIXME: We should instead consider the pointer to have escaped if this
1147 // function is being instrumented for addressing bugs or race conditions.
1148 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1149 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1150 << Offset << " which extends past the end of the "
1151 << AllocSize << " byte alloca:\n"
1152 << " alloca: " << AS.AI << "\n"
1153 << " use: " << SI << "\n");
1154 return markAsDead(SI);
1155 }
1156
1157 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1158 "All simple FCA stores should have been pre-split");
1159 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1160 }
1161
1162 void visitMemSetInst(MemSetInst &II) {
1163 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1164 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1165 if ((Length && Length->getValue() == 0) ||
1166 (IsOffsetKnown && Offset.uge(AllocSize)))
1167 // Zero-length mem transfer intrinsics can be ignored entirely.
1168 return markAsDead(II);
1169
1170 if (!IsOffsetKnown)
1171 return PI.setAborted(&II);
1172
1173 insertUse(II, Offset,
1174 Length ? Length->getLimitedValue()
1175 : AllocSize - Offset.getLimitedValue(),
1176 (bool)Length);
1177 }
1178
1179 void visitMemTransferInst(MemTransferInst &II) {
1180 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1181 if (Length && Length->getValue() == 0)
1182 // Zero-length mem transfer intrinsics can be ignored entirely.
1183 return markAsDead(II);
1184
1185 // Because we can visit these intrinsics twice, also check to see if the
1186 // first time marked this instruction as dead. If so, skip it.
1187 if (VisitedDeadInsts.count(&II))
1188 return;
1189
1190 if (!IsOffsetKnown)
1191 return PI.setAborted(&II);
1192
1193 // This side of the transfer is completely out-of-bounds, and so we can
1194 // nuke the entire transfer. However, we also need to nuke the other side
1195 // if already added to our partitions.
1196 // FIXME: Yet another place we really should bypass this when
1197 // instrumenting for ASan.
1198 if (Offset.uge(AllocSize)) {
1200 MemTransferSliceMap.find(&II);
1201 if (MTPI != MemTransferSliceMap.end())
1202 AS.Slices[MTPI->second].kill();
1203 return markAsDead(II);
1204 }
1205
1206 uint64_t RawOffset = Offset.getLimitedValue();
1207 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1208
1209 // Check for the special case where the same exact value is used for both
1210 // source and dest.
1211 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1212 // For non-volatile transfers this is a no-op.
1213 if (!II.isVolatile())
1214 return markAsDead(II);
1215
1216 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1217 }
1218
1219 // If we have seen both source and destination for a mem transfer, then
1220 // they both point to the same alloca.
1221 bool Inserted;
1223 std::tie(MTPI, Inserted) =
1224 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1225 unsigned PrevIdx = MTPI->second;
1226 if (!Inserted) {
1227 Slice &PrevP = AS.Slices[PrevIdx];
1228
1229 // Check if the begin offsets match and this is a non-volatile transfer.
1230 // In that case, we can completely elide the transfer.
1231 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1232 PrevP.kill();
1233 return markAsDead(II);
1234 }
1235
1236 // Otherwise we have an offset transfer within the same alloca. We can't
1237 // split those.
1238 PrevP.makeUnsplittable();
1239 }
1240
1241 // Insert the use now that we've fixed up the splittable nature.
1242 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1243
1244 // Check that we ended up with a valid index in the map.
1245 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1246 "Map index doesn't point back to a slice with this user.");
1247 }
1248
1249 // Disable SRoA for any intrinsics except for lifetime invariants and
1250 // invariant group.
1251 // FIXME: What about debug intrinsics? This matches old behavior, but
1252 // doesn't make sense.
1253 void visitIntrinsicInst(IntrinsicInst &II) {
1254 if (II.isDroppable()) {
1255 AS.DeadUseIfPromotable.push_back(U);
1256 return;
1257 }
1258
1259 if (!IsOffsetKnown)
1260 return PI.setAborted(&II);
1261
1262 if (II.isLifetimeStartOrEnd()) {
1263 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
1264 uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
1265 Length->getLimitedValue());
1266 insertUse(II, Offset, Size, true);
1267 return;
1268 }
1269
1270 if (II.isLaunderOrStripInvariantGroup()) {
1271 insertUse(II, Offset, AllocSize, true);
1273 return;
1274 }
1275
1277 }
1278
1279 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1280 // We consider any PHI or select that results in a direct load or store of
1281 // the same offset to be a viable use for slicing purposes. These uses
1282 // are considered unsplittable and the size is the maximum loaded or stored
1283 // size.
1286 Visited.insert(Root);
1287 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1288 const DataLayout &DL = Root->getDataLayout();
1289 // If there are no loads or stores, the access is dead. We mark that as
1290 // a size zero access.
1291 Size = 0;
1292 do {
1293 Instruction *I, *UsedI;
1294 std::tie(UsedI, I) = Uses.pop_back_val();
1295
1296 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1297 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1298 if (LoadSize.isScalable()) {
1299 PI.setAborted(LI);
1300 return nullptr;
1301 }
1302 Size = std::max(Size, LoadSize.getFixedValue());
1303 continue;
1304 }
1305 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1306 Value *Op = SI->getOperand(0);
1307 if (Op == UsedI)
1308 return SI;
1309 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1310 if (StoreSize.isScalable()) {
1311 PI.setAborted(SI);
1312 return nullptr;
1313 }
1314 Size = std::max(Size, StoreSize.getFixedValue());
1315 continue;
1316 }
1317
1318 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1319 if (!GEP->hasAllZeroIndices())
1320 return GEP;
1321 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1322 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
1323 return I;
1324 }
1325
1326 for (User *U : I->users())
1327 if (Visited.insert(cast<Instruction>(U)).second)
1328 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1329 } while (!Uses.empty());
1330
1331 return nullptr;
1332 }
1333
1334 void visitPHINodeOrSelectInst(Instruction &I) {
1335 assert(isa<PHINode>(I) || isa<SelectInst>(I));
1336 if (I.use_empty())
1337 return markAsDead(I);
1338
1339 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1340 // instructions in this BB, which may be required during rewriting. Bail out
1341 // on these cases.
1342 if (isa<PHINode>(I) &&
1343 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
1344 return PI.setAborted(&I);
1345
1346 // TODO: We could use simplifyInstruction here to fold PHINodes and
1347 // SelectInsts. However, doing so requires to change the current
1348 // dead-operand-tracking mechanism. For instance, suppose neither loading
1349 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1350 // trap either. However, if we simply replace %U with undef using the
1351 // current dead-operand-tracking mechanism, "load (select undef, undef,
1352 // %other)" may trap because the select may return the first operand
1353 // "undef".
1354 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1355 if (Result == *U)
1356 // If the result of the constant fold will be the pointer, recurse
1357 // through the PHI/select as if we had RAUW'ed it.
1358 enqueueUsers(I);
1359 else
1360 // Otherwise the operand to the PHI/select is dead, and we can replace
1361 // it with poison.
1362 AS.DeadOperands.push_back(U);
1363
1364 return;
1365 }
1366
1367 if (!IsOffsetKnown)
1368 return PI.setAborted(&I);
1369
1370 // See if we already have computed info on this node.
1371 uint64_t &Size = PHIOrSelectSizes[&I];
1372 if (!Size) {
1373 // This is a new PHI/Select, check for an unsafe use of it.
1374 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1375 return PI.setAborted(UnsafeI);
1376 }
1377
1378 // For PHI and select operands outside the alloca, we can't nuke the entire
1379 // phi or select -- the other side might still be relevant, so we special
1380 // case them here and use a separate structure to track the operands
1381 // themselves which should be replaced with poison.
1382 // FIXME: This should instead be escaped in the event we're instrumenting
1383 // for address sanitization.
1384 if (Offset.uge(AllocSize)) {
1385 AS.DeadOperands.push_back(U);
1386 return;
1387 }
1388
1389 insertUse(I, Offset, Size);
1390 }
1391
1392 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1393
1394 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1395
1396 /// Disable SROA entirely if there are unhandled users of the alloca.
1397 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1398
1399 void visitCallBase(CallBase &CB) {
1400 // If the call operand is NoCapture ReadOnly, then we mark it as
1401 // EscapedReadOnly.
1402 if (CB.isDataOperand(U) &&
1403 CB.doesNotCapture(U->getOperandNo()) &&
1404 CB.onlyReadsMemory(U->getOperandNo())) {
1406 return;
1407 }
1408
1410 }
1411};
1412
1413AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1414 :
1415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1416 AI(AI),
1417#endif
1418 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1419 SliceBuilder PB(DL, AI, *this);
1420 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1421 if (PtrI.isEscaped() || PtrI.isAborted()) {
1422 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1423 // possibly by just storing the PtrInfo in the AllocaSlices.
1424 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1425 : PtrI.getAbortingInst();
1426 assert(PointerEscapingInstr && "Did not track a bad instruction");
1427 return;
1428 }
1429 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1430
1431 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1432
1433 // Sort the uses. This arranges for the offsets to be in ascending order,
1434 // and the sizes to be in descending order.
1435 llvm::stable_sort(Slices);
1436}
1437
1438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1439
1440void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1441 StringRef Indent) const {
1442 printSlice(OS, I, Indent);
1443 OS << "\n";
1444 printUse(OS, I, Indent);
1445}
1446
1447void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1448 StringRef Indent) const {
1449 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1450 << " slice #" << (I - begin())
1451 << (I->isSplittable() ? " (splittable)" : "");
1452}
1453
1454void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1455 StringRef Indent) const {
1456 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1457}
1458
1459void AllocaSlices::print(raw_ostream &OS) const {
1460 if (PointerEscapingInstr) {
1461 OS << "Can't analyze slices for alloca: " << AI << "\n"
1462 << " A pointer to this alloca escaped by:\n"
1463 << " " << *PointerEscapingInstr << "\n";
1464 return;
1465 }
1466
1467 if (PointerEscapingInstrReadOnly)
1468 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1469
1470 OS << "Slices of alloca: " << AI << "\n";
1471 for (const_iterator I = begin(), E = end(); I != E; ++I)
1472 print(OS, I);
1473}
1474
1475LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1476 print(dbgs(), I);
1477}
1478LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1479
1480#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481
1482/// Walk the range of a partitioning looking for a common type to cover this
1483/// sequence of slices.
1484static std::pair<Type *, IntegerType *>
1485findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1486 uint64_t EndOffset) {
1487 Type *Ty = nullptr;
1488 bool TyIsCommon = true;
1489 IntegerType *ITy = nullptr;
1490
1491 // Note that we need to look at *every* alloca slice's Use to ensure we
1492 // always get consistent results regardless of the order of slices.
1493 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1494 Use *U = I->getUse();
1495 if (isa<IntrinsicInst>(*U->getUser()))
1496 continue;
1497 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1498 continue;
1499
1500 Type *UserTy = nullptr;
1501 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1502 UserTy = LI->getType();
1503 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1504 UserTy = SI->getValueOperand()->getType();
1505 }
1506
1507 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1508 // If the type is larger than the partition, skip it. We only encounter
1509 // this for split integer operations where we want to use the type of the
1510 // entity causing the split. Also skip if the type is not a byte width
1511 // multiple.
1512 if (UserITy->getBitWidth() % 8 != 0 ||
1513 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1514 continue;
1515
1516 // Track the largest bitwidth integer type used in this way in case there
1517 // is no common type.
1518 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1519 ITy = UserITy;
1520 }
1521
1522 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1523 // depend on types skipped above.
1524 if (!UserTy || (Ty && Ty != UserTy))
1525 TyIsCommon = false; // Give up on anything but an iN type.
1526 else
1527 Ty = UserTy;
1528 }
1529
1530 return {TyIsCommon ? Ty : nullptr, ITy};
1531}
1532
1533/// PHI instructions that use an alloca and are subsequently loaded can be
1534/// rewritten to load both input pointers in the pred blocks and then PHI the
1535/// results, allowing the load of the alloca to be promoted.
1536/// From this:
1537/// %P2 = phi [i32* %Alloca, i32* %Other]
1538/// %V = load i32* %P2
1539/// to:
1540/// %V1 = load i32* %Alloca -> will be mem2reg'd
1541/// ...
1542/// %V2 = load i32* %Other
1543/// ...
1544/// %V = phi [i32 %V1, i32 %V2]
1545///
1546/// We can do this to a select if its only uses are loads and if the operands
1547/// to the select can be loaded unconditionally.
1548///
1549/// FIXME: This should be hoisted into a generic utility, likely in
1550/// Transforms/Util/Local.h
1552 const DataLayout &DL = PN.getDataLayout();
1553
1554 // For now, we can only do this promotion if the load is in the same block
1555 // as the PHI, and if there are no stores between the phi and load.
1556 // TODO: Allow recursive phi users.
1557 // TODO: Allow stores.
1558 BasicBlock *BB = PN.getParent();
1559 Align MaxAlign;
1560 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1561 Type *LoadType = nullptr;
1562 for (User *U : PN.users()) {
1563 LoadInst *LI = dyn_cast<LoadInst>(U);
1564 if (!LI || !LI->isSimple())
1565 return false;
1566
1567 // For now we only allow loads in the same block as the PHI. This is
1568 // a common case that happens when instcombine merges two loads through
1569 // a PHI.
1570 if (LI->getParent() != BB)
1571 return false;
1572
1573 if (LoadType) {
1574 if (LoadType != LI->getType())
1575 return false;
1576 } else {
1577 LoadType = LI->getType();
1578 }
1579
1580 // Ensure that there are no instructions between the PHI and the load that
1581 // could store.
1582 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1583 if (BBI->mayWriteToMemory())
1584 return false;
1585
1586 MaxAlign = std::max(MaxAlign, LI->getAlign());
1587 }
1588
1589 if (!LoadType)
1590 return false;
1591
1592 APInt LoadSize =
1593 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1594
1595 // We can only transform this if it is safe to push the loads into the
1596 // predecessor blocks. The only thing to watch out for is that we can't put
1597 // a possibly trapping load in the predecessor if it is a critical edge.
1598 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1600 Value *InVal = PN.getIncomingValue(Idx);
1601
1602 // If the value is produced by the terminator of the predecessor (an
1603 // invoke) or it has side-effects, there is no valid place to put a load
1604 // in the predecessor.
1605 if (TI == InVal || TI->mayHaveSideEffects())
1606 return false;
1607
1608 // If the predecessor has a single successor, then the edge isn't
1609 // critical.
1610 if (TI->getNumSuccessors() == 1)
1611 continue;
1612
1613 // If this pointer is always safe to load, or if we can prove that there
1614 // is already a load in the block, then we can move the load to the pred
1615 // block.
1616 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1617 continue;
1618
1619 return false;
1620 }
1621
1622 return true;
1623}
1624
1625static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1626 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1627
1628 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1629 Type *LoadTy = SomeLoad->getType();
1630 IRB.SetInsertPoint(&PN);
1631 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1632 PN.getName() + ".sroa.speculated");
1633
1634 // Get the AA tags and alignment to use from one of the loads. It does not
1635 // matter which one we get and if any differ.
1636 AAMDNodes AATags = SomeLoad->getAAMetadata();
1637 Align Alignment = SomeLoad->getAlign();
1638
1639 // Rewrite all loads of the PN to use the new PHI.
1640 while (!PN.use_empty()) {
1641 LoadInst *LI = cast<LoadInst>(PN.user_back());
1642 LI->replaceAllUsesWith(NewPN);
1643 LI->eraseFromParent();
1644 }
1645
1646 // Inject loads into all of the pred blocks.
1647 DenseMap<BasicBlock *, Value *> InjectedLoads;
1648 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1649 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1650 Value *InVal = PN.getIncomingValue(Idx);
1651
1652 // A PHI node is allowed to have multiple (duplicated) entries for the same
1653 // basic block, as long as the value is the same. So if we already injected
1654 // a load in the predecessor, then we should reuse the same load for all
1655 // duplicated entries.
1656 if (Value *V = InjectedLoads.lookup(Pred)) {
1657 NewPN->addIncoming(V, Pred);
1658 continue;
1659 }
1660
1661 Instruction *TI = Pred->getTerminator();
1662 IRB.SetInsertPoint(TI);
1663
1664 LoadInst *Load = IRB.CreateAlignedLoad(
1665 LoadTy, InVal, Alignment,
1666 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1667 ++NumLoadsSpeculated;
1668 if (AATags)
1669 Load->setAAMetadata(AATags);
1670 NewPN->addIncoming(Load, Pred);
1671 InjectedLoads[Pred] = Load;
1672 }
1673
1674 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1675 PN.eraseFromParent();
1676}
1677
1678SelectHandSpeculativity &
1679SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1680 if (isTrueVal)
1681 Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage, true);
1682 else
1683 Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage, true);
1684 return *this;
1685}
1686
1687bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1688 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1689 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1690}
1691
1692bool SelectHandSpeculativity::areAllSpeculatable() const {
1693 return isSpeculatable(/*isTrueVal=*/true) &&
1694 isSpeculatable(/*isTrueVal=*/false);
1695}
1696
1697bool SelectHandSpeculativity::areAnySpeculatable() const {
1698 return isSpeculatable(/*isTrueVal=*/true) ||
1699 isSpeculatable(/*isTrueVal=*/false);
1700}
1701bool SelectHandSpeculativity::areNoneSpeculatable() const {
1702 return !areAnySpeculatable();
1703}
1704
1705static SelectHandSpeculativity
1707 assert(LI.isSimple() && "Only for simple loads");
1708 SelectHandSpeculativity Spec;
1709
1710 const DataLayout &DL = SI.getDataLayout();
1711 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1713 &LI))
1714 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1715 else if (PreserveCFG)
1716 return Spec;
1717
1718 return Spec;
1719}
1720
1721std::optional<RewriteableMemOps>
1722SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1723 RewriteableMemOps Ops;
1724
1725 for (User *U : SI.users()) {
1726 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1727 U = *BC->user_begin();
1728
1729 if (auto *Store = dyn_cast<StoreInst>(U)) {
1730 // Note that atomic stores can be transformed; atomic semantics do not
1731 // have any meaning for a local alloca. Stores are not speculatable,
1732 // however, so if we can't turn it into a predicated store, we are done.
1733 if (Store->isVolatile() || PreserveCFG)
1734 return {}; // Give up on this `select`.
1735 Ops.emplace_back(Store);
1736 continue;
1737 }
1738
1739 auto *LI = dyn_cast<LoadInst>(U);
1740
1741 // Note that atomic loads can be transformed;
1742 // atomic semantics do not have any meaning for a local alloca.
1743 if (!LI || LI->isVolatile())
1744 return {}; // Give up on this `select`.
1745
1746 PossiblySpeculatableLoad Load(LI);
1747 if (!LI->isSimple()) {
1748 // If the `load` is not simple, we can't speculatively execute it,
1749 // but we could handle this via a CFG modification. But can we?
1750 if (PreserveCFG)
1751 return {}; // Give up on this `select`.
1752 Ops.emplace_back(Load);
1753 continue;
1754 }
1755
1756 SelectHandSpeculativity Spec =
1758 if (PreserveCFG && !Spec.areAllSpeculatable())
1759 return {}; // Give up on this `select`.
1760
1761 Load.setInt(Spec);
1762 Ops.emplace_back(Load);
1763 }
1764
1765 return Ops;
1766}
1767
1769 IRBuilderTy &IRB) {
1770 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1771
1772 Value *TV = SI.getTrueValue();
1773 Value *FV = SI.getFalseValue();
1774 // Replace the given load of the select with a select of two loads.
1775
1776 assert(LI.isSimple() && "We only speculate simple loads");
1777
1778 IRB.SetInsertPoint(&LI);
1779
1780 LoadInst *TL =
1781 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1782 LI.getName() + ".sroa.speculate.load.true");
1783 LoadInst *FL =
1784 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1785 LI.getName() + ".sroa.speculate.load.false");
1786 NumLoadsSpeculated += 2;
1787
1788 // Transfer alignment and AA info if present.
1789 TL->setAlignment(LI.getAlign());
1790 FL->setAlignment(LI.getAlign());
1791
1792 AAMDNodes Tags = LI.getAAMetadata();
1793 if (Tags) {
1794 TL->setAAMetadata(Tags);
1795 FL->setAAMetadata(Tags);
1796 }
1797
1798 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1799 LI.getName() + ".sroa.speculated");
1800
1801 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1802 LI.replaceAllUsesWith(V);
1803}
1804
1805template <typename T>
1807 SelectHandSpeculativity Spec,
1808 DomTreeUpdater &DTU) {
1809 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1810 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1811 BasicBlock *Head = I.getParent();
1812 Instruction *ThenTerm = nullptr;
1813 Instruction *ElseTerm = nullptr;
1814 if (Spec.areNoneSpeculatable())
1815 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1816 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1817 else {
1818 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1819 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1820 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1821 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1822 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1823 }
1824 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1825 Spec = {}; // Do not use `Spec` beyond this point.
1826 BasicBlock *Tail = I.getParent();
1827 Tail->setName(Head->getName() + ".cont");
1828 PHINode *PN;
1829 if (isa<LoadInst>(I))
1830 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1831 for (BasicBlock *SuccBB : successors(Head)) {
1832 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1833 int SuccIdx = IsThen ? 0 : 1;
1834 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1835 auto &CondMemOp = cast<T>(*I.clone());
1836 if (NewMemOpBB != Head) {
1837 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1838 if (isa<LoadInst>(I))
1839 ++NumLoadsPredicated;
1840 else
1841 ++NumStoresPredicated;
1842 } else {
1843 CondMemOp.dropUBImplyingAttrsAndMetadata();
1844 ++NumLoadsSpeculated;
1845 }
1846 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1847 Value *Ptr = SI.getOperand(1 + SuccIdx);
1848 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1849 if (isa<LoadInst>(I)) {
1850 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1851 PN->addIncoming(&CondMemOp, NewMemOpBB);
1852 } else
1853 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1854 }
1855 if (isa<LoadInst>(I)) {
1856 PN->takeName(&I);
1857 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1858 I.replaceAllUsesWith(PN);
1859 }
1860}
1861
1863 SelectHandSpeculativity Spec,
1864 DomTreeUpdater &DTU) {
1865 if (auto *LI = dyn_cast<LoadInst>(&I))
1866 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1867 else if (auto *SI = dyn_cast<StoreInst>(&I))
1868 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1869 else
1870 llvm_unreachable_internal("Only for load and store.");
1871}
1872
1874 const RewriteableMemOps &Ops,
1875 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1876 bool CFGChanged = false;
1877 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1878
1879 for (const RewriteableMemOp &Op : Ops) {
1880 SelectHandSpeculativity Spec;
1881 Instruction *I;
1882 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1883 I = *US;
1884 } else {
1885 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1886 I = PSL.getPointer();
1887 Spec = PSL.getInt();
1888 }
1889 if (Spec.areAllSpeculatable()) {
1890 speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB);
1891 } else {
1892 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1893 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1894 CFGChanged = true;
1895 }
1896 I->eraseFromParent();
1897 }
1898
1899 for (User *U : make_early_inc_range(SI.users()))
1900 cast<BitCastInst>(U)->eraseFromParent();
1901 SI.eraseFromParent();
1902 return CFGChanged;
1903}
1904
1905/// Compute an adjusted pointer from Ptr by Offset bytes where the
1906/// resulting pointer has PointerTy.
1907static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1909 const Twine &NamePrefix) {
1910 if (Offset != 0)
1911 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1912 NamePrefix + "sroa_idx");
1913 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1914 NamePrefix + "sroa_cast");
1915}
1916
1917/// Compute the adjusted alignment for a load or store from an offset.
1920}
1921
1922/// Test whether we can convert a value from the old to the new type.
1923///
1924/// This predicate should be used to guard calls to convertValue in order to
1925/// ensure that we only try to convert viable values. The strategy is that we
1926/// will peel off single element struct and array wrappings to get to an
1927/// underlying value, and convert that value.
1928static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1929 if (OldTy == NewTy)
1930 return true;
1931
1932 // For integer types, we can't handle any bit-width differences. This would
1933 // break both vector conversions with extension and introduce endianness
1934 // issues when in conjunction with loads and stores.
1935 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1936 assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1937 cast<IntegerType>(NewTy)->getBitWidth() &&
1938 "We can't have the same bitwidth for different int types");
1939 return false;
1940 }
1941
1942 if (DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1943 DL.getTypeSizeInBits(OldTy).getFixedValue())
1944 return false;
1945 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1946 return false;
1947
1948 // We can convert pointers to integers and vice-versa. Same for vectors
1949 // of pointers and integers.
1950 OldTy = OldTy->getScalarType();
1951 NewTy = NewTy->getScalarType();
1952 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1953 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1954 unsigned OldAS = OldTy->getPointerAddressSpace();
1955 unsigned NewAS = NewTy->getPointerAddressSpace();
1956 // Convert pointers if they are pointers from the same address space or
1957 // different integral (not non-integral) address spaces with the same
1958 // pointer size.
1959 return OldAS == NewAS ||
1960 (!DL.isNonIntegralAddressSpace(OldAS) &&
1961 !DL.isNonIntegralAddressSpace(NewAS) &&
1962 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1963 }
1964
1965 // We can convert integers to integral pointers, but not to non-integral
1966 // pointers.
1967 if (OldTy->isIntegerTy())
1968 return !DL.isNonIntegralPointerType(NewTy);
1969
1970 // We can convert integral pointers to integers, but non-integral pointers
1971 // need to remain pointers.
1972 if (!DL.isNonIntegralPointerType(OldTy))
1973 return NewTy->isIntegerTy();
1974
1975 return false;
1976 }
1977
1978 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
1979 return false;
1980
1981 return true;
1982}
1983
1984/// Generic routine to convert an SSA value to a value of a different
1985/// type.
1986///
1987/// This will try various different casting techniques, such as bitcasts,
1988/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1989/// two types for viability with this routine.
1990static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1991 Type *NewTy) {
1992 Type *OldTy = V->getType();
1993 assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
1994
1995 if (OldTy == NewTy)
1996 return V;
1997
1998 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1999 "Integer types must be the exact same to convert.");
2000
2001 // See if we need inttoptr for this type pair. May require additional bitcast.
2002 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2003 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2004 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2005 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2006 // Directly handle i64 to i8*
2007 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
2008 NewTy);
2009 }
2010
2011 // See if we need ptrtoint for this type pair. May require additional bitcast.
2012 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2013 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2014 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2015 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2016 // Expand i8* to i64 --> i8* to i64 to i64
2017 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2018 NewTy);
2019 }
2020
2021 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2022 unsigned OldAS = OldTy->getPointerAddressSpace();
2023 unsigned NewAS = NewTy->getPointerAddressSpace();
2024 // To convert pointers with different address spaces (they are already
2025 // checked convertible, i.e. they have the same pointer size), so far we
2026 // cannot use `bitcast` (which has restrict on the same address space) or
2027 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2028 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2029 // size.
2030 if (OldAS != NewAS) {
2031 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2032 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2033 NewTy);
2034 }
2035 }
2036
2037 return IRB.CreateBitCast(V, NewTy);
2038}
2039
2040/// Test whether the given slice use can be promoted to a vector.
2041///
2042/// This function is called to test each entry in a partition which is slated
2043/// for a single slice.
2044static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2045 VectorType *Ty,
2046 uint64_t ElementSize,
2047 const DataLayout &DL) {
2048 // First validate the slice offsets.
2049 uint64_t BeginOffset =
2050 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2051 uint64_t BeginIndex = BeginOffset / ElementSize;
2052 if (BeginIndex * ElementSize != BeginOffset ||
2053 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2054 return false;
2055 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2056 uint64_t EndIndex = EndOffset / ElementSize;
2057 if (EndIndex * ElementSize != EndOffset ||
2058 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2059 return false;
2060
2061 assert(EndIndex > BeginIndex && "Empty vector!");
2062 uint64_t NumElements = EndIndex - BeginIndex;
2063 Type *SliceTy = (NumElements == 1)
2064 ? Ty->getElementType()
2065 : FixedVectorType::get(Ty->getElementType(), NumElements);
2066
2067 Type *SplitIntTy =
2068 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2069
2070 Use *U = S.getUse();
2071
2072 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2073 if (MI->isVolatile())
2074 return false;
2075 if (!S.isSplittable())
2076 return false; // Skip any unsplittable intrinsics.
2077 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2078 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2079 return false;
2080 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2081 if (LI->isVolatile())
2082 return false;
2083 Type *LTy = LI->getType();
2084 // Disable vector promotion when there are loads or stores of an FCA.
2085 if (LTy->isStructTy())
2086 return false;
2087 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2088 assert(LTy->isIntegerTy());
2089 LTy = SplitIntTy;
2090 }
2091 if (!canConvertValue(DL, SliceTy, LTy))
2092 return false;
2093 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2094 if (SI->isVolatile())
2095 return false;
2096 Type *STy = SI->getValueOperand()->getType();
2097 // Disable vector promotion when there are loads or stores of an FCA.
2098 if (STy->isStructTy())
2099 return false;
2100 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2101 assert(STy->isIntegerTy());
2102 STy = SplitIntTy;
2103 }
2104 if (!canConvertValue(DL, STy, SliceTy))
2105 return false;
2106 } else {
2107 return false;
2108 }
2109
2110 return true;
2111}
2112
2113/// Test whether a vector type is viable for promotion.
2114///
2115/// This implements the necessary checking for \c checkVectorTypesForPromotion
2116/// (and thus isVectorPromotionViable) over all slices of the alloca for the
2117/// given VectorType.
2118static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
2119 const DataLayout &DL) {
2120 uint64_t ElementSize =
2121 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2122
2123 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2124 // that aren't byte sized.
2125 if (ElementSize % 8)
2126 return false;
2127 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2128 "vector size not a multiple of element size?");
2129 ElementSize /= 8;
2130
2131 for (const Slice &S : P)
2132 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
2133 return false;
2134
2135 for (const Slice *S : P.splitSliceTails())
2136 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
2137 return false;
2138
2139 return true;
2140}
2141
2142/// Test whether any vector type in \p CandidateTys is viable for promotion.
2143///
2144/// This implements the necessary checking for \c isVectorPromotionViable over
2145/// all slices of the alloca for the given VectorType.
2146static VectorType *
2148 SmallVectorImpl<VectorType *> &CandidateTys,
2149 bool HaveCommonEltTy, Type *CommonEltTy,
2150 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2151 VectorType *CommonVecPtrTy) {
2152 // If we didn't find a vector type, nothing to do here.
2153 if (CandidateTys.empty())
2154 return nullptr;
2155
2156 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2157 // then we should choose it, not some other alternative.
2158 // But, we can't perform a no-op pointer address space change via bitcast,
2159 // so if we didn't have a common pointer element type, bail.
2160 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2161 return nullptr;
2162
2163 // Try to pick the "best" element type out of the choices.
2164 if (!HaveCommonEltTy && HaveVecPtrTy) {
2165 // If there was a pointer element type, there's really only one choice.
2166 CandidateTys.clear();
2167 CandidateTys.push_back(CommonVecPtrTy);
2168 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2169 // Integer-ify vector types.
2170 for (VectorType *&VTy : CandidateTys) {
2171 if (!VTy->getElementType()->isIntegerTy())
2172 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2173 VTy->getContext(), VTy->getScalarSizeInBits())));
2174 }
2175
2176 // Rank the remaining candidate vector types. This is easy because we know
2177 // they're all integer vectors. We sort by ascending number of elements.
2178 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2179 (void)DL;
2180 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2181 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2182 "Cannot have vector types of different sizes!");
2183 assert(RHSTy->getElementType()->isIntegerTy() &&
2184 "All non-integer types eliminated!");
2185 assert(LHSTy->getElementType()->isIntegerTy() &&
2186 "All non-integer types eliminated!");
2187 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2188 cast<FixedVectorType>(LHSTy)->getNumElements();
2189 };
2190 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2191 (void)DL;
2192 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2193 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2194 "Cannot have vector types of different sizes!");
2195 assert(RHSTy->getElementType()->isIntegerTy() &&
2196 "All non-integer types eliminated!");
2197 assert(LHSTy->getElementType()->isIntegerTy() &&
2198 "All non-integer types eliminated!");
2199 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2200 cast<FixedVectorType>(LHSTy)->getNumElements();
2201 };
2202 llvm::sort(CandidateTys, RankVectorTypesComp);
2203 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2204 CandidateTys.end());
2205 } else {
2206// The only way to have the same element type in every vector type is to
2207// have the same vector type. Check that and remove all but one.
2208#ifndef NDEBUG
2209 for (VectorType *VTy : CandidateTys) {
2210 assert(VTy->getElementType() == CommonEltTy &&
2211 "Unaccounted for element type!");
2212 assert(VTy == CandidateTys[0] &&
2213 "Different vector types with the same element type!");
2214 }
2215#endif
2216 CandidateTys.resize(1);
2217 }
2218
2219 // FIXME: hack. Do we have a named constant for this?
2220 // SDAG SDNode can't have more than 65535 operands.
2221 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2222 return cast<FixedVectorType>(VTy)->getNumElements() >
2223 std::numeric_limits<unsigned short>::max();
2224 });
2225
2226 for (VectorType *VTy : CandidateTys)
2227 if (checkVectorTypeForPromotion(P, VTy, DL))
2228 return VTy;
2229
2230 return nullptr;
2231}
2232
2234 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2235 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2236 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2237 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2238 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
2239 [[maybe_unused]] VectorType *OriginalElt =
2240 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2241 // Consider additional vector types where the element type size is a
2242 // multiple of load/store element size.
2243 for (Type *Ty : OtherTys) {
2244 if (!VectorType::isValidElementType(Ty))
2245 continue;
2246 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2247 // Make a copy of CandidateTys and iterate through it, because we
2248 // might append to CandidateTys in the loop.
2249 for (VectorType *const VTy : CandidateTysCopy) {
2250 // The elements in the copy should remain invariant throughout the loop
2251 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2252 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2253 unsigned ElementSize =
2254 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2255 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2256 VectorSize % TypeSize == 0) {
2257 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2258 CheckCandidateType(NewVTy);
2259 }
2260 }
2261 }
2262
2263 return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2264 CommonEltTy, HaveVecPtrTy,
2265 HaveCommonVecPtrTy, CommonVecPtrTy);
2266}
2267
2268/// Test whether the given alloca partitioning and range of slices can be
2269/// promoted to a vector.
2270///
2271/// This is a quick test to check whether we can rewrite a particular alloca
2272/// partition (and its newly formed alloca) into a vector alloca with only
2273/// whole-vector loads and stores such that it could be promoted to a vector
2274/// SSA value. We only can ensure this for a limited set of operations, and we
2275/// don't want to do the rewrites unless we are confident that the result will
2276/// be promotable, so we have an early test here.
2277static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2278 // Collect the candidate types for vector-based promotion. Also track whether
2279 // we have different element types.
2280 SmallVector<VectorType *, 4> CandidateTys;
2281 SetVector<Type *> LoadStoreTys;
2282 SetVector<Type *> DeferredTys;
2283 Type *CommonEltTy = nullptr;
2284 VectorType *CommonVecPtrTy = nullptr;
2285 bool HaveVecPtrTy = false;
2286 bool HaveCommonEltTy = true;
2287 bool HaveCommonVecPtrTy = true;
2288 auto CheckCandidateType = [&](Type *Ty) {
2289 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2290 // Return if bitcast to vectors is different for total size in bits.
2291 if (!CandidateTys.empty()) {
2292 VectorType *V = CandidateTys[0];
2293 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2294 DL.getTypeSizeInBits(V).getFixedValue()) {
2295 CandidateTys.clear();
2296 return;
2297 }
2298 }
2299 CandidateTys.push_back(VTy);
2300 Type *EltTy = VTy->getElementType();
2301
2302 if (!CommonEltTy)
2303 CommonEltTy = EltTy;
2304 else if (CommonEltTy != EltTy)
2305 HaveCommonEltTy = false;
2306
2307 if (EltTy->isPointerTy()) {
2308 HaveVecPtrTy = true;
2309 if (!CommonVecPtrTy)
2310 CommonVecPtrTy = VTy;
2311 else if (CommonVecPtrTy != VTy)
2312 HaveCommonVecPtrTy = false;
2313 }
2314 }
2315 };
2316
2317 // Put load and store types into a set for de-duplication.
2318 for (const Slice &S : P) {
2319 Type *Ty;
2320 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2321 Ty = LI->getType();
2322 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2323 Ty = SI->getValueOperand()->getType();
2324 else
2325 continue;
2326
2327 auto CandTy = Ty->getScalarType();
2328 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2329 S.endOffset() != P.endOffset())) {
2330 DeferredTys.insert(Ty);
2331 continue;
2332 }
2333
2334 LoadStoreTys.insert(Ty);
2335 // Consider any loads or stores that are the exact size of the slice.
2336 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2337 CheckCandidateType(Ty);
2338 }
2339
2340 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2342 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2343 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2344 HaveCommonVecPtrTy, CommonVecPtrTy))
2345 return VTy;
2346
2347 CandidateTys.clear();
2349 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2350 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2351 CommonVecPtrTy);
2352}
2353
2354/// Test whether a slice of an alloca is valid for integer widening.
2355///
2356/// This implements the necessary checking for the \c isIntegerWideningViable
2357/// test below on a single slice of the alloca.
2358static bool isIntegerWideningViableForSlice(const Slice &S,
2359 uint64_t AllocBeginOffset,
2360 Type *AllocaTy,
2361 const DataLayout &DL,
2362 bool &WholeAllocaOp) {
2363 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2364
2365 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2366 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2367
2368 Use *U = S.getUse();
2369
2370 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2371 // larger than other load/store slices (RelEnd > Size). But lifetime are
2372 // always promotable and should not impact other slices' promotability of the
2373 // partition.
2374 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2375 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2376 return true;
2377 }
2378
2379 // We can't reasonably handle cases where the load or store extends past
2380 // the end of the alloca's type and into its padding.
2381 if (RelEnd > Size)
2382 return false;
2383
2384 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2385 if (LI->isVolatile())
2386 return false;
2387 // We can't handle loads that extend past the allocated memory.
2388 if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size)
2389 return false;
2390 // So far, AllocaSliceRewriter does not support widening split slice tails
2391 // in rewriteIntegerLoad.
2392 if (S.beginOffset() < AllocBeginOffset)
2393 return false;
2394 // Note that we don't count vector loads or stores as whole-alloca
2395 // operations which enable integer widening because we would prefer to use
2396 // vector widening instead.
2397 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2398 WholeAllocaOp = true;
2399 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2400 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2401 return false;
2402 } else if (RelBegin != 0 || RelEnd != Size ||
2403 !canConvertValue(DL, AllocaTy, LI->getType())) {
2404 // Non-integer loads need to be convertible from the alloca type so that
2405 // they are promotable.
2406 return false;
2407 }
2408 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2409 Type *ValueTy = SI->getValueOperand()->getType();
2410 if (SI->isVolatile())
2411 return false;
2412 // We can't handle stores that extend past the allocated memory.
2413 if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size)
2414 return false;
2415 // So far, AllocaSliceRewriter does not support widening split slice tails
2416 // in rewriteIntegerStore.
2417 if (S.beginOffset() < AllocBeginOffset)
2418 return false;
2419 // Note that we don't count vector loads or stores as whole-alloca
2420 // operations which enable integer widening because we would prefer to use
2421 // vector widening instead.
2422 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2423 WholeAllocaOp = true;
2424 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2425 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2426 return false;
2427 } else if (RelBegin != 0 || RelEnd != Size ||
2428 !canConvertValue(DL, ValueTy, AllocaTy)) {
2429 // Non-integer stores need to be convertible to the alloca type so that
2430 // they are promotable.
2431 return false;
2432 }
2433 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2434 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2435 return false;
2436 if (!S.isSplittable())
2437 return false; // Skip any unsplittable intrinsics.
2438 } else {
2439 return false;
2440 }
2441
2442 return true;
2443}
2444
2445/// Test whether the given alloca partition's integer operations can be
2446/// widened to promotable ones.
2447///
2448/// This is a quick test to check whether we can rewrite the integer loads and
2449/// stores to a particular alloca into wider loads and stores and be able to
2450/// promote the resulting alloca.
2451static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2452 const DataLayout &DL) {
2453 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2454 // Don't create integer types larger than the maximum bitwidth.
2455 if (SizeInBits > IntegerType::MAX_INT_BITS)
2456 return false;
2457
2458 // Don't try to handle allocas with bit-padding.
2459 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2460 return false;
2461
2462 // We need to ensure that an integer type with the appropriate bitwidth can
2463 // be converted to the alloca type, whatever that is. We don't want to force
2464 // the alloca itself to have an integer type if there is a more suitable one.
2465 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2466 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2467 !canConvertValue(DL, IntTy, AllocaTy))
2468 return false;
2469
2470 // While examining uses, we ensure that the alloca has a covering load or
2471 // store. We don't want to widen the integer operations only to fail to
2472 // promote due to some other unsplittable entry (which we may make splittable
2473 // later). However, if there are only splittable uses, go ahead and assume
2474 // that we cover the alloca.
2475 // FIXME: We shouldn't consider split slices that happen to start in the
2476 // partition here...
2477 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2478
2479 for (const Slice &S : P)
2480 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2481 WholeAllocaOp))
2482 return false;
2483
2484 for (const Slice *S : P.splitSliceTails())
2485 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2486 WholeAllocaOp))
2487 return false;
2488
2489 return WholeAllocaOp;
2490}
2491
2492static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2494 const Twine &Name) {
2495 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2496 IntegerType *IntTy = cast<IntegerType>(V->getType());
2497 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2498 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2499 "Element extends past full value");
2500 uint64_t ShAmt = 8 * Offset;
2501 if (DL.isBigEndian())
2502 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2503 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2504 if (ShAmt) {
2505 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2506 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2507 }
2508 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2509 "Cannot extract to a larger integer!");
2510 if (Ty != IntTy) {
2511 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2512 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2513 }
2514 return V;
2515}
2516
2517static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2518 Value *V, uint64_t Offset, const Twine &Name) {
2519 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2520 IntegerType *Ty = cast<IntegerType>(V->getType());
2521 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2522 "Cannot insert a larger integer!");
2523 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2524 if (Ty != IntTy) {
2525 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2526 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2527 }
2528 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2529 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2530 "Element store outside of alloca store");
2531 uint64_t ShAmt = 8 * Offset;
2532 if (DL.isBigEndian())
2533 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2534 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2535 if (ShAmt) {
2536 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2537 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2538 }
2539
2540 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2541 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2542 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2543 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2544 V = IRB.CreateOr(Old, V, Name + ".insert");
2545 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2546 }
2547 return V;
2548}
2549
2550static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2551 unsigned EndIndex, const Twine &Name) {
2552 auto *VecTy = cast<FixedVectorType>(V->getType());
2553 unsigned NumElements = EndIndex - BeginIndex;
2554 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2555
2556 if (NumElements == VecTy->getNumElements())
2557 return V;
2558
2559 if (NumElements == 1) {
2560 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2561 Name + ".extract");
2562 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2563 return V;
2564 }
2565
2566 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2567 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2568 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2569 return V;
2570}
2571
2572static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2573 unsigned BeginIndex, const Twine &Name) {
2574 VectorType *VecTy = cast<VectorType>(Old->getType());
2575 assert(VecTy && "Can only insert a vector into a vector");
2576
2577 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2578 if (!Ty) {
2579 // Single element to insert.
2580 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2581 Name + ".insert");
2582 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2583 return V;
2584 }
2585
2586 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2587 cast<FixedVectorType>(VecTy)->getNumElements() &&
2588 "Too many elements!");
2589 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2590 cast<FixedVectorType>(VecTy)->getNumElements()) {
2591 assert(V->getType() == VecTy && "Vector type mismatch");
2592 return V;
2593 }
2594 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2595
2596 // When inserting a smaller vector into the larger to store, we first
2597 // use a shuffle vector to widen it with undef elements, and then
2598 // a second shuffle vector to select between the loaded vector and the
2599 // incoming vector.
2601 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2602 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2603 if (i >= BeginIndex && i < EndIndex)
2604 Mask.push_back(i - BeginIndex);
2605 else
2606 Mask.push_back(-1);
2607 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2608 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2609
2611 Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2612 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2613 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2614
2615 V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
2616
2617 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2618 return V;
2619}
2620
2621namespace {
2622
2623/// Visitor to rewrite instructions using p particular slice of an alloca
2624/// to use a new alloca.
2625///
2626/// Also implements the rewriting to vector-based accesses when the partition
2627/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2628/// lives here.
2629class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2630 // Befriend the base class so it can delegate to private visit methods.
2631 friend class InstVisitor<AllocaSliceRewriter, bool>;
2632
2634
2635 const DataLayout &DL;
2636 AllocaSlices &AS;
2637 SROA &Pass;
2638 AllocaInst &OldAI, &NewAI;
2639 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2640 Type *NewAllocaTy;
2641
2642 // This is a convenience and flag variable that will be null unless the new
2643 // alloca's integer operations should be widened to this integer type due to
2644 // passing isIntegerWideningViable above. If it is non-null, the desired
2645 // integer type will be stored here for easy access during rewriting.
2646 IntegerType *IntTy;
2647
2648 // If we are rewriting an alloca partition which can be written as pure
2649 // vector operations, we stash extra information here. When VecTy is
2650 // non-null, we have some strict guarantees about the rewritten alloca:
2651 // - The new alloca is exactly the size of the vector type here.
2652 // - The accesses all either map to the entire vector or to a single
2653 // element.
2654 // - The set of accessing instructions is only one of those handled above
2655 // in isVectorPromotionViable. Generally these are the same access kinds
2656 // which are promotable via mem2reg.
2657 VectorType *VecTy;
2658 Type *ElementTy;
2659 uint64_t ElementSize;
2660
2661 // The original offset of the slice currently being rewritten relative to
2662 // the original alloca.
2663 uint64_t BeginOffset = 0;
2664 uint64_t EndOffset = 0;
2665
2666 // The new offsets of the slice currently being rewritten relative to the
2667 // original alloca.
2668 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2669
2670 uint64_t SliceSize = 0;
2671 bool IsSplittable = false;
2672 bool IsSplit = false;
2673 Use *OldUse = nullptr;
2674 Instruction *OldPtr = nullptr;
2675
2676 // Track post-rewrite users which are PHI nodes and Selects.
2679
2680 // Utility IR builder, whose name prefix is setup for each visited use, and
2681 // the insertion point is set to point to the user.
2682 IRBuilderTy IRB;
2683
2684 // Return the new alloca, addrspacecasted if required to avoid changing the
2685 // addrspace of a volatile access.
2686 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2687 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2688 return &NewAI;
2689
2690 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2691 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2692 }
2693
2694public:
2695 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2696 AllocaInst &OldAI, AllocaInst &NewAI,
2697 uint64_t NewAllocaBeginOffset,
2698 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2699 VectorType *PromotableVecTy,
2702 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2703 NewAllocaBeginOffset(NewAllocaBeginOffset),
2704 NewAllocaEndOffset(NewAllocaEndOffset),
2705 NewAllocaTy(NewAI.getAllocatedType()),
2706 IntTy(
2707 IsIntegerPromotable
2708 ? Type::getIntNTy(NewAI.getContext(),
2709 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2710 .getFixedValue())
2711 : nullptr),
2712 VecTy(PromotableVecTy),
2713 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2714 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2715 : 0),
2716 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2717 IRB(NewAI.getContext(), ConstantFolder()) {
2718 if (VecTy) {
2719 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2720 "Only multiple-of-8 sized vector elements are viable");
2721 ++NumVectorized;
2722 }
2723 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2724 }
2725
2726 bool visit(AllocaSlices::const_iterator I) {
2727 bool CanSROA = true;
2728 BeginOffset = I->beginOffset();
2729 EndOffset = I->endOffset();
2730 IsSplittable = I->isSplittable();
2731 IsSplit =
2732 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2733 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2734 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2735 LLVM_DEBUG(dbgs() << "\n");
2736
2737 // Compute the intersecting offset range.
2738 assert(BeginOffset < NewAllocaEndOffset);
2739 assert(EndOffset > NewAllocaBeginOffset);
2740 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2741 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2742
2743 SliceSize = NewEndOffset - NewBeginOffset;
2744 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2745 << ") NewBegin:(" << NewBeginOffset << ", "
2746 << NewEndOffset << ") NewAllocaBegin:("
2747 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2748 << ")\n");
2749 assert(IsSplit || NewBeginOffset == BeginOffset);
2750 OldUse = I->getUse();
2751 OldPtr = cast<Instruction>(OldUse->get());
2752
2753 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2754 IRB.SetInsertPoint(OldUserI);
2755 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2756 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2757 Twine(BeginOffset) + ".");
2758
2759 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2760 if (VecTy || IntTy)
2761 assert(CanSROA);
2762 return CanSROA;
2763 }
2764
2765private:
2766 // Make sure the other visit overloads are visible.
2767 using Base::visit;
2768
2769 // Every instruction which can end up as a user must have a rewrite rule.
2770 bool visitInstruction(Instruction &I) {
2771 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2772 llvm_unreachable("No rewrite rule for this instruction!");
2773 }
2774
2775 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2776 // Note that the offset computation can use BeginOffset or NewBeginOffset
2777 // interchangeably for unsplit slices.
2778 assert(IsSplit || BeginOffset == NewBeginOffset);
2779 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2780
2781#ifndef NDEBUG
2782 StringRef OldName = OldPtr->getName();
2783 // Skip through the last '.sroa.' component of the name.
2784 size_t LastSROAPrefix = OldName.rfind(".sroa.");
2785 if (LastSROAPrefix != StringRef::npos) {
2786 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2787 // Look for an SROA slice index.
2788 size_t IndexEnd = OldName.find_first_not_of("0123456789");
2789 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2790 // Strip the index and look for the offset.
2791 OldName = OldName.substr(IndexEnd + 1);
2792 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2793 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2794 // Strip the offset.
2795 OldName = OldName.substr(OffsetEnd + 1);
2796 }
2797 }
2798 // Strip any SROA suffixes as well.
2799 OldName = OldName.substr(0, OldName.find(".sroa_"));
2800#endif
2801
2802 return getAdjustedPtr(IRB, DL, &NewAI,
2803 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
2804 PointerTy,
2805#ifndef NDEBUG
2806 Twine(OldName) + "."
2807#else
2808 Twine()
2809#endif
2810 );
2811 }
2812
2813 /// Compute suitable alignment to access this slice of the *new*
2814 /// alloca.
2815 ///
2816 /// You can optionally pass a type to this routine and if that type's ABI
2817 /// alignment is itself suitable, this will return zero.
2818 Align getSliceAlign() {
2819 return commonAlignment(NewAI.getAlign(),
2820 NewBeginOffset - NewAllocaBeginOffset);
2821 }
2822
2823 unsigned getIndex(uint64_t Offset) {
2824 assert(VecTy && "Can only call getIndex when rewriting a vector");
2825 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2826 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2827 uint32_t Index = RelOffset / ElementSize;
2828 assert(Index * ElementSize == RelOffset);
2829 return Index;
2830 }
2831
2832 void deleteIfTriviallyDead(Value *V) {
2833 Instruction *I = cast<Instruction>(V);
2835 Pass.DeadInsts.push_back(I);
2836 }
2837
2838 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
2839 unsigned BeginIndex = getIndex(NewBeginOffset);
2840 unsigned EndIndex = getIndex(NewEndOffset);
2841 assert(EndIndex > BeginIndex && "Empty vector!");
2842
2843 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2844 NewAI.getAlign(), "load");
2845
2846 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2847 LLVMContext::MD_access_group});
2848 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
2849 }
2850
2851 Value *rewriteIntegerLoad(LoadInst &LI) {
2852 assert(IntTy && "We cannot insert an integer to the alloca");
2853 assert(!LI.isVolatile());
2854 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2855 NewAI.getAlign(), "load");
2856 V = convertValue(DL, IRB, V, IntTy);
2857 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2858 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2859 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2860 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2861 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2862 }
2863 // It is possible that the extracted type is not the load type. This
2864 // happens if there is a load past the end of the alloca, and as
2865 // a consequence the slice is narrower but still a candidate for integer
2866 // lowering. To handle this case, we just zero extend the extracted
2867 // integer.
2868 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
2869 "Can only handle an extract for an overly wide load");
2870 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2871 V = IRB.CreateZExt(V, LI.getType());
2872 return V;
2873 }
2874
2875 bool visitLoadInst(LoadInst &LI) {
2876 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
2877 Value *OldOp = LI.getOperand(0);
2878 assert(OldOp == OldPtr);
2879
2880 AAMDNodes AATags = LI.getAAMetadata();
2881
2882 unsigned AS = LI.getPointerAddressSpace();
2883
2884 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2885 : LI.getType();
2886 const bool IsLoadPastEnd =
2887 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2888 bool IsPtrAdjusted = false;
2889 Value *V;
2890 if (VecTy) {
2891 V = rewriteVectorizedLoadInst(LI);
2892 } else if (IntTy && LI.getType()->isIntegerTy()) {
2893 V = rewriteIntegerLoad(LI);
2894 } else if (NewBeginOffset == NewAllocaBeginOffset &&
2895 NewEndOffset == NewAllocaEndOffset &&
2896 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2897 (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2898 TargetTy->isIntegerTy() && !LI.isVolatile()))) {
2899 Value *NewPtr =
2900 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
2901 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
2902 NewAI.getAlign(), LI.isVolatile(),
2903 LI.getName());
2904 if (LI.isVolatile())
2905 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2906 if (NewLI->isAtomic())
2907 NewLI->setAlignment(LI.getAlign());
2908
2909 // Copy any metadata that is valid for the new load. This may require
2910 // conversion to a different kind of metadata, e.g. !nonnull might change
2911 // to !range or vice versa.
2912 copyMetadataForLoad(*NewLI, LI);
2913
2914 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
2915 if (AATags)
2916 NewLI->setAAMetadata(AATags.adjustForAccess(
2917 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2918
2919 // Try to preserve nonnull metadata
2920 V = NewLI;
2921
2922 // If this is an integer load past the end of the slice (which means the
2923 // bytes outside the slice are undef or this load is dead) just forcibly
2924 // fix the integer size with correct handling of endianness.
2925 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2926 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2927 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2928 V = IRB.CreateZExt(V, TITy, "load.ext");
2929 if (DL.isBigEndian())
2930 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2931 "endian_shift");
2932 }
2933 } else {
2934 Type *LTy = IRB.getPtrTy(AS);
2935 LoadInst *NewLI =
2936 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2937 getSliceAlign(), LI.isVolatile(), LI.getName());
2938
2939 if (AATags)
2940 NewLI->setAAMetadata(AATags.adjustForAccess(
2941 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2942
2943 if (LI.isVolatile())
2944 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2945 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2946 LLVMContext::MD_access_group});
2947
2948 V = NewLI;
2949 IsPtrAdjusted = true;
2950 }
2951 V = convertValue(DL, IRB, V, TargetTy);
2952
2953 if (IsSplit) {
2954 assert(!LI.isVolatile());
2955 assert(LI.getType()->isIntegerTy() &&
2956 "Only integer type loads and stores are split");
2957 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
2958 "Split load isn't smaller than original load");
2959 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
2960 "Non-byte-multiple bit width");
2961 // Move the insertion point just past the load so that we can refer to it.
2962 BasicBlock::iterator LIIt = std::next(LI.getIterator());
2963 // Ensure the insertion point comes before any debug-info immediately
2964 // after the load, so that variable values referring to the load are
2965 // dominated by it.
2966 LIIt.setHeadBit(true);
2967 IRB.SetInsertPoint(LI.getParent(), LIIt);
2968 // Create a placeholder value with the same type as LI to use as the
2969 // basis for the new value. This allows us to replace the uses of LI with
2970 // the computed value, and then replace the placeholder with LI, leaving
2971 // LI only used for this computation.
2972 Value *Placeholder =
2973 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
2974 false, Align(1));
2975 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2976 "insert");
2977 LI.replaceAllUsesWith(V);
2978 Placeholder->replaceAllUsesWith(&LI);
2979 Placeholder->deleteValue();
2980 } else {
2981 LI.replaceAllUsesWith(V);
2982 }
2983
2984 Pass.DeadInsts.push_back(&LI);
2985 deleteIfTriviallyDead(OldOp);
2986 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
2987 return !LI.isVolatile() && !IsPtrAdjusted;
2988 }
2989
2990 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
2991 AAMDNodes AATags) {
2992 // Capture V for the purpose of debug-info accounting once it's converted
2993 // to a vector store.
2994 Value *OrigV = V;
2995 if (V->getType() != VecTy) {
2996 unsigned BeginIndex = getIndex(NewBeginOffset);
2997 unsigned EndIndex = getIndex(NewEndOffset);
2998 assert(EndIndex > BeginIndex && "Empty vector!");
2999 unsigned NumElements = EndIndex - BeginIndex;
3000 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3001 "Too many elements!");
3002 Type *SliceTy = (NumElements == 1)
3003 ? ElementTy
3004 : FixedVectorType::get(ElementTy, NumElements);
3005 if (V->getType() != SliceTy)
3006 V = convertValue(DL, IRB, V, SliceTy);
3007
3008 // Mix in the existing elements.
3009 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3010 NewAI.getAlign(), "load");
3011 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3012 }
3013 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3014 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3015 LLVMContext::MD_access_group});
3016 if (AATags)
3017 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3018 V->getType(), DL));
3019 Pass.DeadInsts.push_back(&SI);
3020
3021 // NOTE: Careful to use OrigV rather than V.
3022 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3023 Store, Store->getPointerOperand(), OrigV, DL);
3024 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3025 return true;
3026 }
3027
3028 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3029 assert(IntTy && "We cannot extract an integer from the alloca");
3030 assert(!SI.isVolatile());
3031 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3032 IntTy->getBitWidth()) {
3033 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3034 NewAI.getAlign(), "oldload");
3035 Old = convertValue(DL, IRB, Old, IntTy);
3036 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3037 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3038 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3039 }
3040 V = convertValue(DL, IRB, V, NewAllocaTy);
3041 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3042 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3043 LLVMContext::MD_access_group});
3044 if (AATags)
3045 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3046 V->getType(), DL));
3047
3048 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3049 Store, Store->getPointerOperand(),
3050 Store->getValueOperand(), DL);
3051
3052 Pass.DeadInsts.push_back(&SI);
3053 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3054 return true;
3055 }
3056
3057 bool visitStoreInst(StoreInst &SI) {
3058 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3059 Value *OldOp = SI.getOperand(1);
3060 assert(OldOp == OldPtr);
3061
3062 AAMDNodes AATags = SI.getAAMetadata();
3063 Value *V = SI.getValueOperand();
3064
3065 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3066 // alloca that should be re-examined after promoting this alloca.
3067 if (V->getType()->isPointerTy())
3068 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3069 Pass.PostPromotionWorklist.insert(AI);
3070
3071 if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) {
3072 assert(!SI.isVolatile());
3073 assert(V->getType()->isIntegerTy() &&
3074 "Only integer type loads and stores are split");
3075 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3076 "Non-byte-multiple bit width");
3077 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3078 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3079 "extract");
3080 }
3081
3082 if (VecTy)
3083 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3084 if (IntTy && V->getType()->isIntegerTy())
3085 return rewriteIntegerStore(V, SI, AATags);
3086
3087 StoreInst *NewSI;
3088 if (NewBeginOffset == NewAllocaBeginOffset &&
3089 NewEndOffset == NewAllocaEndOffset &&
3090 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3091 V = convertValue(DL, IRB, V, NewAllocaTy);
3092 Value *NewPtr =
3093 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3094
3095 NewSI =
3096 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3097 } else {
3098 unsigned AS = SI.getPointerAddressSpace();
3099 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3100 NewSI =
3101 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3102 }
3103 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3104 LLVMContext::MD_access_group});
3105 if (AATags)
3106 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3107 V->getType(), DL));
3108 if (SI.isVolatile())
3109 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3110 if (NewSI->isAtomic())
3111 NewSI->setAlignment(SI.getAlign());
3112
3113 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3114 NewSI, NewSI->getPointerOperand(),
3115 NewSI->getValueOperand(), DL);
3116
3117 Pass.DeadInsts.push_back(&SI);
3118 deleteIfTriviallyDead(OldOp);
3119
3120 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3121 return NewSI->getPointerOperand() == &NewAI &&
3122 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3123 !SI.isVolatile();
3124 }
3125
3126 /// Compute an integer value from splatting an i8 across the given
3127 /// number of bytes.
3128 ///
3129 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3130 /// call this routine.
3131 /// FIXME: Heed the advice above.
3132 ///
3133 /// \param V The i8 value to splat.
3134 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3135 Value *getIntegerSplat(Value *V, unsigned Size) {
3136 assert(Size > 0 && "Expected a positive number of bytes.");
3137 IntegerType *VTy = cast<IntegerType>(V->getType());
3138 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3139 if (Size == 1)
3140 return V;
3141
3142 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3143 V = IRB.CreateMul(
3144 IRB.CreateZExt(V, SplatIntTy, "zext"),
3145 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3146 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3147 SplatIntTy)),
3148 "isplat");
3149 return V;
3150 }
3151
3152 /// Compute a vector splat for a given element value.
3153 Value *getVectorSplat(Value *V, unsigned NumElements) {
3154 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3155 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3156 return V;
3157 }
3158
3159 bool visitMemSetInst(MemSetInst &II) {
3160 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3161 assert(II.getRawDest() == OldPtr);
3162
3163 AAMDNodes AATags = II.getAAMetadata();
3164
3165 // If the memset has a variable size, it cannot be split, just adjust the
3166 // pointer to the new alloca.
3167 if (!isa<ConstantInt>(II.getLength())) {
3168 assert(!IsSplit);
3169 assert(NewBeginOffset == BeginOffset);
3170 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3171 II.setDestAlignment(getSliceAlign());
3172 // In theory we should call migrateDebugInfo here. However, we do not
3173 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3174 // constant geps, or storing a variable number of bytes.
3176 at::getDVRAssignmentMarkers(&II).empty() &&
3177 "AT: Unexpected link to non-const GEP");
3178 deleteIfTriviallyDead(OldPtr);
3179 return false;
3180 }
3181
3182 // Record this instruction for deletion.
3183 Pass.DeadInsts.push_back(&II);
3184
3185 Type *AllocaTy = NewAI.getAllocatedType();
3186 Type *ScalarTy = AllocaTy->getScalarType();
3187
3188 const bool CanContinue = [&]() {
3189 if (VecTy || IntTy)
3190 return true;
3191 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3192 return false;
3193 // Length must be in range for FixedVectorType.
3194 auto *C = cast<ConstantInt>(II.getLength());
3195 const uint64_t Len = C->getLimitedValue();
3196 if (Len > std::numeric_limits<unsigned>::max())
3197 return false;
3198 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3199 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3200 return canConvertValue(DL, SrcTy, AllocaTy) &&
3201 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3202 }();
3203
3204 // If this doesn't map cleanly onto the alloca type, and that type isn't
3205 // a single value type, just emit a memset.
3206 if (!CanContinue) {
3207 Type *SizeTy = II.getLength()->getType();
3208 unsigned Sz = NewEndOffset - NewBeginOffset;
3209 Constant *Size = ConstantInt::get(SizeTy, Sz);
3210 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3211 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3212 MaybeAlign(getSliceAlign()), II.isVolatile()));
3213 if (AATags)
3214 New->setAAMetadata(
3215 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3216
3217 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3218 New, New->getRawDest(), nullptr, DL);
3219
3220 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3221 return false;
3222 }
3223
3224 // If we can represent this as a simple value, we have to build the actual
3225 // value to store, which requires expanding the byte present in memset to
3226 // a sensible representation for the alloca type. This is essentially
3227 // splatting the byte to a sufficiently wide integer, splatting it across
3228 // any desired vector width, and bitcasting to the final type.
3229 Value *V;
3230
3231 if (VecTy) {
3232 // If this is a memset of a vectorized alloca, insert it.
3233 assert(ElementTy == ScalarTy);
3234
3235 unsigned BeginIndex = getIndex(NewBeginOffset);
3236 unsigned EndIndex = getIndex(NewEndOffset);
3237 assert(EndIndex > BeginIndex && "Empty vector!");
3238 unsigned NumElements = EndIndex - BeginIndex;
3239 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3240 "Too many elements!");
3241
3242 Value *Splat = getIntegerSplat(
3243 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3244 Splat = convertValue(DL, IRB, Splat, ElementTy);
3245 if (NumElements > 1)
3246 Splat = getVectorSplat(Splat, NumElements);
3247
3248 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3249 NewAI.getAlign(), "oldload");
3250 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3251 } else if (IntTy) {
3252 // If this is a memset on an alloca where we can widen stores, insert the
3253 // set integer.
3254 assert(!II.isVolatile());
3255
3256 uint64_t Size = NewEndOffset - NewBeginOffset;
3257 V = getIntegerSplat(II.getValue(), Size);
3258
3259 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3260 EndOffset != NewAllocaBeginOffset)) {
3261 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3262 NewAI.getAlign(), "oldload");
3263 Old = convertValue(DL, IRB, Old, IntTy);
3264 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3265 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3266 } else {
3267 assert(V->getType() == IntTy &&
3268 "Wrong type for an alloca wide integer!");
3269 }
3270 V = convertValue(DL, IRB, V, AllocaTy);
3271 } else {
3272 // Established these invariants above.
3273 assert(NewBeginOffset == NewAllocaBeginOffset);
3274 assert(NewEndOffset == NewAllocaEndOffset);
3275
3276 V = getIntegerSplat(II.getValue(),
3277 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3278 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3279 V = getVectorSplat(
3280 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3281
3282 V = convertValue(DL, IRB, V, AllocaTy);
3283 }
3284
3285 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3286 StoreInst *New =
3287 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3288 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3289 LLVMContext::MD_access_group});
3290 if (AATags)
3291 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3292 V->getType(), DL));
3293
3294 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3295 New, New->getPointerOperand(), V, DL);
3296
3297 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3298 return !II.isVolatile();
3299 }
3300
3301 bool visitMemTransferInst(MemTransferInst &II) {
3302 // Rewriting of memory transfer instructions can be a bit tricky. We break
3303 // them into two categories: split intrinsics and unsplit intrinsics.
3304
3305 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3306
3307 AAMDNodes AATags = II.getAAMetadata();
3308
3309 bool IsDest = &II.getRawDestUse() == OldUse;
3310 assert((IsDest && II.getRawDest() == OldPtr) ||
3311 (!IsDest && II.getRawSource() == OldPtr));
3312
3313 Align SliceAlign = getSliceAlign();
3314 // For unsplit intrinsics, we simply modify the source and destination
3315 // pointers in place. This isn't just an optimization, it is a matter of
3316 // correctness. With unsplit intrinsics we may be dealing with transfers
3317 // within a single alloca before SROA ran, or with transfers that have
3318 // a variable length. We may also be dealing with memmove instead of
3319 // memcpy, and so simply updating the pointers is the necessary for us to
3320 // update both source and dest of a single call.
3321 if (!IsSplittable) {
3322 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3323 if (IsDest) {
3324 // Update the address component of linked dbg.assigns.
3325 auto UpdateAssignAddress = [&](auto *DbgAssign) {
3326 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3327 DbgAssign->getAddress() == II.getDest())
3328 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3329 };
3330 for_each(at::getAssignmentMarkers(&II), UpdateAssignAddress);
3331 for_each(at::getDVRAssignmentMarkers(&II), UpdateAssignAddress);
3332 II.setDest(AdjustedPtr);
3333 II.setDestAlignment(SliceAlign);
3334 } else {
3335 II.setSource(AdjustedPtr);
3336 II.setSourceAlignment(SliceAlign);
3337 }
3338
3339 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3340 deleteIfTriviallyDead(OldPtr);
3341 return false;
3342 }
3343 // For split transfer intrinsics we have an incredibly useful assurance:
3344 // the source and destination do not reside within the same alloca, and at
3345 // least one of them does not escape. This means that we can replace
3346 // memmove with memcpy, and we don't need to worry about all manner of
3347 // downsides to splitting and transforming the operations.
3348
3349 // If this doesn't map cleanly onto the alloca type, and that type isn't
3350 // a single value type, just emit a memcpy.
3351 bool EmitMemCpy =
3352 !VecTy && !IntTy &&
3353 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3354 SliceSize !=
3355 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
3356 !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
3358
3359 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3360 // size hasn't been shrunk based on analysis of the viable range, this is
3361 // a no-op.
3362 if (EmitMemCpy && &OldAI == &NewAI) {
3363 // Ensure the start lines up.
3364 assert(NewBeginOffset == BeginOffset);
3365
3366 // Rewrite the size as needed.
3367 if (NewEndOffset != EndOffset)
3368 II.setLength(ConstantInt::get(II.getLength()->getType(),
3369 NewEndOffset - NewBeginOffset));
3370 return false;
3371 }
3372 // Record this instruction for deletion.
3373 Pass.DeadInsts.push_back(&II);
3374
3375 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3376 // alloca that should be re-examined after rewriting this instruction.
3377 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3378 if (AllocaInst *AI =
3379 dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
3380 assert(AI != &OldAI && AI != &NewAI &&
3381 "Splittable transfers cannot reach the same alloca on both ends.");
3382 Pass.Worklist.insert(AI);
3383 }
3384
3385 Type *OtherPtrTy = OtherPtr->getType();
3386 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3387
3388 // Compute the relative offset for the other pointer within the transfer.
3389 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3390 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3391 Align OtherAlign =
3392 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3393 OtherAlign =
3394 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3395
3396 if (EmitMemCpy) {
3397 // Compute the other pointer, folding as much as possible to produce
3398 // a single, simple GEP in most cases.
3399 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3400 OtherPtr->getName() + ".");
3401
3402 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3403 Type *SizeTy = II.getLength()->getType();
3404 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3405
3406 Value *DestPtr, *SrcPtr;
3407 MaybeAlign DestAlign, SrcAlign;
3408 // Note: IsDest is true iff we're copying into the new alloca slice
3409 if (IsDest) {
3410 DestPtr = OurPtr;
3411 DestAlign = SliceAlign;
3412 SrcPtr = OtherPtr;
3413 SrcAlign = OtherAlign;
3414 } else {
3415 DestPtr = OtherPtr;
3416 DestAlign = OtherAlign;
3417 SrcPtr = OurPtr;
3418 SrcAlign = SliceAlign;
3419 }
3420 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3421 Size, II.isVolatile());
3422 if (AATags)
3423 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3424
3425 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3426 if (IsDest) {
3427 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3428 &II, New, DestPtr, nullptr, DL);
3429 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3431 DL, Offset, /*AllowNonInbounds*/ true))) {
3432 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3433 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3434 }
3435 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3436 return false;
3437 }
3438
3439 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3440 NewEndOffset == NewAllocaEndOffset;
3441 uint64_t Size = NewEndOffset - NewBeginOffset;
3442 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3443 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3444 unsigned NumElements = EndIndex - BeginIndex;
3445 IntegerType *SubIntTy =
3446 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3447
3448 // Reset the other pointer type to match the register type we're going to
3449 // use, but using the address space of the original other pointer.
3450 Type *OtherTy;
3451 if (VecTy && !IsWholeAlloca) {
3452 if (NumElements == 1)
3453 OtherTy = VecTy->getElementType();
3454 else
3455 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3456 } else if (IntTy && !IsWholeAlloca) {
3457 OtherTy = SubIntTy;
3458 } else {
3459 OtherTy = NewAllocaTy;
3460 }
3461
3462 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3463 OtherPtr->getName() + ".");
3464 MaybeAlign SrcAlign = OtherAlign;
3465 MaybeAlign DstAlign = SliceAlign;
3466 if (!IsDest)
3467 std::swap(SrcAlign, DstAlign);
3468
3469 Value *SrcPtr;
3470 Value *DstPtr;
3471
3472 if (IsDest) {
3473 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3474 SrcPtr = AdjPtr;
3475 } else {
3476 DstPtr = AdjPtr;
3477 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3478 }
3479
3480 Value *Src;
3481 if (VecTy && !IsWholeAlloca && !IsDest) {
3482 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3483 NewAI.getAlign(), "load");
3484 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3485 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3486 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3487 NewAI.getAlign(), "load");
3488 Src = convertValue(DL, IRB, Src, IntTy);
3489 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3490 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3491 } else {
3492 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3493 II.isVolatile(), "copyload");
3494 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3495 LLVMContext::MD_access_group});
3496 if (AATags)
3497 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3498 Load->getType(), DL));
3499 Src = Load;
3500 }
3501
3502 if (VecTy && !IsWholeAlloca && IsDest) {
3503 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3504 NewAI.getAlign(), "oldload");
3505 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3506 } else if (IntTy && !IsWholeAlloca && IsDest) {
3507 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3508 NewAI.getAlign(), "oldload");
3509 Old = convertValue(DL, IRB, Old, IntTy);
3510 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3511 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3512 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3513 }
3514
3515 StoreInst *Store = cast<StoreInst>(
3516 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3517 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3518 LLVMContext::MD_access_group});
3519 if (AATags)
3520 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3521 Src->getType(), DL));
3522
3523 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3524 if (IsDest) {
3525
3526 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3527 Store, DstPtr, Src, DL);
3528 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3530 DL, Offset, /*AllowNonInbounds*/ true))) {
3531 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3532 &II, Store, DstPtr, Src, DL);
3533 }
3534
3535 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3536 return !II.isVolatile();
3537 }
3538
3539 bool visitIntrinsicInst(IntrinsicInst &II) {
3540 assert((II.isLifetimeStartOrEnd() || II.isLaunderOrStripInvariantGroup() ||
3541 II.isDroppable()) &&
3542 "Unexpected intrinsic!");
3543 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3544
3545 // Record this instruction for deletion.
3546 Pass.DeadInsts.push_back(&II);
3547
3548 if (II.isDroppable()) {
3549 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3550 // TODO For now we forget assumed information, this can be improved.
3551 OldPtr->dropDroppableUsesIn(II);
3552 return true;
3553 }
3554
3555 if (II.isLaunderOrStripInvariantGroup())
3556 return true;
3557
3558 assert(II.getArgOperand(1) == OldPtr);
3559 // Lifetime intrinsics are only promotable if they cover the whole alloca.
3560 // Therefore, we drop lifetime intrinsics which don't cover the whole
3561 // alloca.
3562 // (In theory, intrinsics which partially cover an alloca could be
3563 // promoted, but PromoteMemToReg doesn't handle that case.)
3564 // FIXME: Check whether the alloca is promotable before dropping the
3565 // lifetime intrinsics?
3566 if (NewBeginOffset != NewAllocaBeginOffset ||
3567 NewEndOffset != NewAllocaEndOffset)
3568 return true;
3569
3570 ConstantInt *Size =
3571 ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
3572 NewEndOffset - NewBeginOffset);
3573 // Lifetime intrinsics always expect an i8* so directly get such a pointer
3574 // for the new alloca slice.
3575 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3576 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3577 Value *New;
3578 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3579 New = IRB.CreateLifetimeStart(Ptr, Size);
3580 else
3581 New = IRB.CreateLifetimeEnd(Ptr, Size);
3582
3583 (void)New;
3584 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3585
3586 return true;
3587 }
3588
3589 void fixLoadStoreAlign(Instruction &Root) {
3590 // This algorithm implements the same visitor loop as
3591 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3592 // or store found.
3595 Visited.insert(&Root);
3596 Uses.push_back(&Root);
3597 do {
3598 Instruction *I = Uses.pop_back_val();
3599
3600 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3601 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3602 continue;
3603 }
3604 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3605 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3606 continue;
3607 }
3608
3609 assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
3610 isa<PHINode>(I) || isa<SelectInst>(I) ||
3611 isa<GetElementPtrInst>(I));
3612 for (User *U : I->users())
3613 if (Visited.insert(cast<Instruction>(U)).second)
3614 Uses.push_back(cast<Instruction>(U));
3615 } while (!Uses.empty());
3616 }
3617
3618 bool visitPHINode(PHINode &PN) {
3619 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3620 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3621 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3622
3623 // We would like to compute a new pointer in only one place, but have it be
3624 // as local as possible to the PHI. To do that, we re-use the location of
3625 // the old pointer, which necessarily must be in the right position to
3626 // dominate the PHI.
3628 if (isa<PHINode>(OldPtr))
3629 IRB.SetInsertPoint(OldPtr->getParent(),
3630 OldPtr->getParent()->getFirstInsertionPt());
3631 else
3632 IRB.SetInsertPoint(OldPtr);
3633 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3634
3635 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3636 // Replace the operands which were using the old pointer.
3637 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3638
3639 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
3640 deleteIfTriviallyDead(OldPtr);
3641
3642 // Fix the alignment of any loads or stores using this PHI node.
3643 fixLoadStoreAlign(PN);
3644
3645 // PHIs can't be promoted on their own, but often can be speculated. We
3646 // check the speculation outside of the rewriter so that we see the
3647 // fully-rewritten alloca.
3648 PHIUsers.insert(&PN);
3649 return true;
3650 }
3651
3652 bool visitSelectInst(SelectInst &SI) {
3653 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3654 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3655 "Pointer isn't an operand!");
3656 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3657 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3658
3659 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3660 // Replace the operands which were using the old pointer.
3661 if (SI.getOperand(1) == OldPtr)
3662 SI.setOperand(1, NewPtr);
3663 if (SI.getOperand(2) == OldPtr)
3664 SI.setOperand(2, NewPtr);
3665
3666 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
3667 deleteIfTriviallyDead(OldPtr);
3668
3669 // Fix the alignment of any loads or stores using this select.
3670 fixLoadStoreAlign(SI);
3671
3672 // Selects can't be promoted on their own, but often can be speculated. We
3673 // check the speculation outside of the rewriter so that we see the
3674 // fully-rewritten alloca.
3675 SelectUsers.insert(&SI);
3676 return true;
3677 }
3678};
3679
3680/// Visitor to rewrite aggregate loads and stores as scalar.
3681///
3682/// This pass aggressively rewrites all aggregate loads and stores on
3683/// a particular pointer (or any pointer derived from it which we can identify)
3684/// with scalar loads and stores.
3685class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3686 // Befriend the base class so it can delegate to private visit methods.
3687 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3688
3689 /// Queue of pointer uses to analyze and potentially rewrite.
3691
3692 /// Set to prevent us from cycling with phi nodes and loops.
3693 SmallPtrSet<User *, 8> Visited;
3694
3695 /// The current pointer use being rewritten. This is used to dig up the used
3696 /// value (as opposed to the user).
3697 Use *U = nullptr;
3698
3699 /// Used to calculate offsets, and hence alignment, of subobjects.
3700 const DataLayout &DL;
3701
3702 IRBuilderTy &IRB;
3703
3704public:
3705 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
3706 : DL(DL), IRB(IRB) {}
3707
3708 /// Rewrite loads and stores through a pointer and all pointers derived from
3709 /// it.
3710 bool rewrite(Instruction &I) {
3711 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3712 enqueueUsers(I);
3713 bool Changed = false;
3714 while (!Queue.empty()) {
3715 U = Queue.pop_back_val();
3716 Changed |= visit(cast<Instruction>(U->getUser()));
3717 }
3718 return Changed;
3719 }
3720
3721private:
3722 /// Enqueue all the users of the given instruction for further processing.
3723 /// This uses a set to de-duplicate users.
3724 void enqueueUsers(Instruction &I) {
3725 for (Use &U : I.uses())
3726 if (Visited.insert(U.getUser()).second)
3727 Queue.push_back(&U);
3728 }
3729
3730 // Conservative default is to not rewrite anything.
3731 bool visitInstruction(Instruction &I) { return false; }
3732
3733 /// Generic recursive split emission class.
3734 template <typename Derived> class OpSplitter {
3735 protected:
3736 /// The builder used to form new instructions.
3737 IRBuilderTy &IRB;
3738
3739 /// The indices which to be used with insert- or extractvalue to select the
3740 /// appropriate value within the aggregate.
3742
3743 /// The indices to a GEP instruction which will move Ptr to the correct slot
3744 /// within the aggregate.
3745 SmallVector<Value *, 4> GEPIndices;
3746
3747 /// The base pointer of the original op, used as a base for GEPing the
3748 /// split operations.
3749 Value *Ptr;
3750
3751 /// The base pointee type being GEPed into.
3752 Type *BaseTy;
3753
3754 /// Known alignment of the base pointer.
3755 Align BaseAlign;
3756
3757 /// To calculate offset of each component so we can correctly deduce
3758 /// alignments.
3759 const DataLayout &DL;
3760
3761 /// Initialize the splitter with an insertion point, Ptr and start with a
3762 /// single zero GEP index.
3764 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
3765 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
3766 BaseAlign(BaseAlign), DL(DL) {
3767 IRB.SetInsertPoint(InsertionPoint);
3768 }
3769
3770 public:
3771 /// Generic recursive split emission routine.
3772 ///
3773 /// This method recursively splits an aggregate op (load or store) into
3774 /// scalar or vector ops. It splits recursively until it hits a single value
3775 /// and emits that single value operation via the template argument.
3776 ///
3777 /// The logic of this routine relies on GEPs and insertvalue and
3778 /// extractvalue all operating with the same fundamental index list, merely
3779 /// formatted differently (GEPs need actual values).
3780 ///
3781 /// \param Ty The type being split recursively into smaller ops.
3782 /// \param Agg The aggregate value being built up or stored, depending on
3783 /// whether this is splitting a load or a store respectively.
3784 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3785 if (Ty->isSingleValueType()) {
3786 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3787 return static_cast<Derived *>(this)->emitFunc(
3788 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
3789 }
3790
3791 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3792 unsigned OldSize = Indices.size();
3793 (void)OldSize;
3794 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3795 ++Idx) {
3796 assert(Indices.size() == OldSize && "Did not return to the old size");
3797 Indices.push_back(Idx);
3798 GEPIndices.push_back(IRB.getInt32(Idx));
3799 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3800 GEPIndices.pop_back();
3801 Indices.pop_back();
3802 }
3803 return;
3804 }
3805
3806 if (StructType *STy = dyn_cast<StructType>(Ty)) {
3807 unsigned OldSize = Indices.size();
3808 (void)OldSize;
3809 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3810 ++Idx) {
3811 assert(Indices.size() == OldSize && "Did not return to the old size");
3812 Indices.push_back(Idx);
3813 GEPIndices.push_back(IRB.getInt32(Idx));
3814 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3815 GEPIndices.pop_back();
3816 Indices.pop_back();
3817 }
3818 return;
3819 }
3820
3821 llvm_unreachable("Only arrays and structs are aggregate loadable types");
3822 }
3823 };
3824
3825 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3826 AAMDNodes AATags;
3827 // A vector to hold the split components that we want to emit
3828 // separate fake uses for.
3829 SmallVector<Value *, 4> Components;
3830 // A vector to hold all the fake uses of the struct that we are splitting.
3831 // Usually there should only be one, but we are handling the general case.
3833
3834 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3835 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
3836 IRBuilderTy &IRB)
3837 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
3838 IRB),
3839 AATags(AATags) {}
3840
3841 /// Emit a leaf load of a single value. This is called at the leaves of the
3842 /// recursive emission to actually load values.
3843 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3845 // Load the single value and insert it using the indices.
3846 Value *GEP =
3847 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3848 LoadInst *Load =
3849 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
3850
3851 APInt Offset(
3852 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3853 if (AATags &&
3855 Load->setAAMetadata(
3856 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
3857 // Record the load so we can generate a fake use for this aggregate
3858 // component.
3859 Components.push_back(Load);
3860
3861 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3862 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
3863 }
3864
3865 // Stash the fake uses that use the value generated by this instruction.
3866 void recordFakeUses(LoadInst &LI) {
3867 for (Use &U : LI.uses())
3868 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
3869 if (II->getIntrinsicID() == Intrinsic::fake_use)
3870 FakeUses.push_back(II);
3871 }
3872
3873 // Replace all fake uses of the aggregate with a series of fake uses, one
3874 // for each split component.
3875 void emitFakeUses() {
3876 for (Instruction *I : FakeUses) {
3877 IRB.SetInsertPoint(I);
3878 for (auto *V : Components)
3879 IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V});
3880 I->eraseFromParent();
3881 }
3882 }
3883 };
3884
3885 bool visitLoadInst(LoadInst &LI) {
3886 assert(LI.getPointerOperand() == *U);
3887 if (!LI.isSimple() || LI.getType()->isSingleValueType())
3888 return false;
3889
3890 // We have an aggregate being loaded, split it apart.
3891 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3892 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
3893 getAdjustedAlignment(&LI, 0), DL, IRB);
3894 Splitter.recordFakeUses(LI);
3896 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3897 Splitter.emitFakeUses();
3898 Visited.erase(&LI);
3899 LI.replaceAllUsesWith(V);
3900 LI.eraseFromParent();
3901 return true;
3902 }
3903
3904 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3905 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3906 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
3907 const DataLayout &DL, IRBuilderTy &IRB)
3908 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3909 DL, IRB),
3910 AATags(AATags), AggStore(AggStore) {}
3911 AAMDNodes AATags;
3912 StoreInst *AggStore;
3913 /// Emit a leaf store of a single value. This is called at the leaves of the
3914 /// recursive emission to actually produce stores.
3915 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3917 // Extract the single value and store it using the indices.
3918 //
3919 // The gep and extractvalue values are factored out of the CreateStore
3920 // call to make the output independent of the argument evaluation order.
3921 Value *ExtractValue =
3922 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3923 Value *InBoundsGEP =
3924 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3925 StoreInst *Store =
3926 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3927
3928 APInt Offset(
3929 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3931 if (AATags) {
3932 Store->setAAMetadata(AATags.adjustForAccess(
3933 Offset.getZExtValue(), ExtractValue->getType(), DL));
3934 }
3935
3936 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
3937 // If we cannot (because there's an intervening non-const or unbounded
3938 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
3939 // this instruction.
3941 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
3942 uint64_t SizeInBits =
3943 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
3944 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
3945 SizeInBits, AggStore, Store,
3946 Store->getPointerOperand(), Store->getValueOperand(),
3947 DL);
3948 } else {
3949 assert(at::getAssignmentMarkers(Store).empty() &&
3950 at::getDVRAssignmentMarkers(Store).empty() &&
3951 "AT: unexpected debug.assign linked to store through "
3952 "unbounded GEP");
3953 }
3954 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3955 }
3956 };
3957
3958 bool visitStoreInst(StoreInst &SI) {
3959 if (!SI.isSimple() || SI.getPointerOperand() != *U)
3960 return false;
3961 Value *V = SI.getValueOperand();
3962 if (V->getType()->isSingleValueType())
3963 return false;
3964
3965 // We have an aggregate being stored, split it apart.
3966 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3967 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
3968 getAdjustedAlignment(&SI, 0), DL, IRB);
3969 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3970 Visited.erase(&SI);
3971 // The stores replacing SI each have markers describing fragments of the
3972 // assignment so delete the assignment markers linked to SI.
3974 SI.eraseFromParent();
3975 return true;
3976 }
3977
3978 bool visitBitCastInst(BitCastInst &BC) {
3979 enqueueUsers(BC);
3980 return false;
3981 }
3982
3983 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
3984 enqueueUsers(ASC);
3985 return false;
3986 }
3987
3988 // Unfold gep (select cond, ptr1, ptr2), idx
3989 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
3990 // and gep ptr, (select cond, idx1, idx2)
3991 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
3992 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
3993 // Check whether the GEP has exactly one select operand and all indices
3994 // will become constant after the transform.
3995 SelectInst *Sel = dyn_cast<SelectInst>(GEPI.getPointerOperand());
3996 for (Value *Op : GEPI.indices()) {
3997 if (auto *SI = dyn_cast<SelectInst>(Op)) {
3998 if (Sel)
3999 return false;
4000
4001 Sel = SI;
4002 if (!isa<ConstantInt>(Sel->getTrueValue()) ||
4003 !isa<ConstantInt>(Sel->getFalseValue()))
4004 return false;
4005 continue;
4006 }
4007
4008 if (!isa<ConstantInt>(Op))
4009 return false;
4010 }
4011
4012 if (!Sel)
4013 return false;
4014
4015 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4016 dbgs() << " original: " << *Sel << "\n";
4017 dbgs() << " " << GEPI << "\n";);
4018
4019 auto GetNewOps = [&](Value *SelOp) {
4020 SmallVector<Value *> NewOps;
4021 for (Value *Op : GEPI.operands())
4022 if (Op == Sel)
4023 NewOps.push_back(SelOp);
4024 else
4025 NewOps.push_back(Op);
4026 return NewOps;
4027 };
4028
4029 Value *True = Sel->getTrueValue();
4030 Value *False = Sel->getFalseValue();
4031 SmallVector<Value *> TrueOps = GetNewOps(True);
4032 SmallVector<Value *> FalseOps = GetNewOps(False);
4033
4034 IRB.SetInsertPoint(&GEPI);
4035 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4036
4037 Type *Ty = GEPI.getSourceElementType();
4038 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4039 True->getName() + ".sroa.gep", NW);
4040
4041 Value *NFalse =
4042 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4043 False->getName() + ".sroa.gep", NW);
4044
4045 Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
4046 Sel->getName() + ".sroa.sel");
4047 Visited.erase(&GEPI);
4048 GEPI.replaceAllUsesWith(NSel);
4049 GEPI.eraseFromParent();
4050 Instruction *NSelI = cast<Instruction>(NSel);
4051 Visited.insert(NSelI);
4052 enqueueUsers(*NSelI);
4053
4054 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4055 dbgs() << " " << *NFalse << "\n";
4056 dbgs() << " " << *NSel << "\n";);
4057
4058 return true;
4059 }
4060
4061 // Unfold gep (phi ptr1, ptr2), idx
4062 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4063 // and gep ptr, (phi idx1, idx2)
4064 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4065 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4066 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4067 // operand (looking through the phi if it is the phi we want to unfold) is
4068 // an instruction besides a static alloca.
4069 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4070 auto IsInvalidPointerOperand = [](Value *V) {
4071 if (!isa<Instruction>(V))
4072 return false;
4073 if (auto *AI = dyn_cast<AllocaInst>(V))
4074 return !AI->isStaticAlloca();
4075 return true;
4076 };
4077 if (Phi) {
4078 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4079 return false;
4080 } else {
4081 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4082 return false;
4083 }
4084 // Check whether the GEP has exactly one phi operand (including the pointer
4085 // operand) and all indices will become constant after the transform.
4086 for (Value *Op : GEPI.indices()) {
4087 if (auto *SI = dyn_cast<PHINode>(Op)) {
4088 if (Phi)
4089 return false;
4090
4091 Phi = SI;
4092 if (!all_of(Phi->incoming_values(),
4093 [](Value *V) { return isa<ConstantInt>(V); }))
4094 return false;
4095 continue;
4096 }
4097
4098 if (!isa<ConstantInt>(Op))
4099 return false;
4100 }
4101
4102 if (!Phi)
4103 return false;
4104
4105 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4106 dbgs() << " original: " << *Phi << "\n";
4107 dbgs() << " " << GEPI << "\n";);
4108
4109 auto GetNewOps = [&](Value *PhiOp) {
4110 SmallVector<Value *> NewOps;
4111 for (Value *Op : GEPI.operands())
4112 if (Op == Phi)
4113 NewOps.push_back(PhiOp);
4114 else
4115 NewOps.push_back(Op);
4116 return NewOps;
4117 };
4118
4119 IRB.SetInsertPoint(Phi);
4120 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4121 Phi->getName() + ".sroa.phi");
4122
4123 Type *SourceTy = GEPI.getSourceElementType();
4124 // We only handle arguments, constants, and static allocas here, so we can
4125 // insert GEPs at the end of the entry block.
4126 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4127 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4128 Value *Op = Phi->getIncomingValue(I);
4129 BasicBlock *BB = Phi->getIncomingBlock(I);
4130 Value *NewGEP;
4131 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4132 NewGEP = NewPhi->getIncomingValue(NI);
4133 } else {
4134 SmallVector<Value *> NewOps = GetNewOps(Op);
4135 NewGEP =
4136 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4137 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4138 }
4139 NewPhi->addIncoming(NewGEP, BB);
4140 }
4141
4142 Visited.erase(&GEPI);
4143 GEPI.replaceAllUsesWith(NewPhi);
4144 GEPI.eraseFromParent();
4145 Visited.insert(NewPhi);
4146 enqueueUsers(*NewPhi);
4147
4148 LLVM_DEBUG(dbgs() << " to: ";
4149 for (Value *In
4150 : NewPhi->incoming_values()) dbgs()
4151 << "\n " << *In;
4152 dbgs() << "\n " << *NewPhi << '\n');
4153
4154 return true;
4155 }
4156
4157 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4158 if (unfoldGEPSelect(GEPI))
4159 return true;
4160
4161 if (unfoldGEPPhi(GEPI))
4162 return true;
4163
4164 enqueueUsers(GEPI);
4165 return false;
4166 }
4167
4168 bool visitPHINode(PHINode &PN) {
4169 enqueueUsers(PN);
4170 return false;
4171 }
4172
4173 bool visitSelectInst(SelectInst &SI) {
4174 enqueueUsers(SI);
4175 return false;
4176 }
4177};
4178
4179} // end anonymous namespace
4180
4181/// Strip aggregate type wrapping.
4182///
4183/// This removes no-op aggregate types wrapping an underlying type. It will
4184/// strip as many layers of types as it can without changing either the type
4185/// size or the allocated size.
4187 if (Ty->isSingleValueType())
4188 return Ty;
4189
4190 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4191 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4192
4193 Type *InnerTy;
4194 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4195 InnerTy = ArrTy->getElementType();
4196 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4197 const StructLayout *SL = DL.getStructLayout(STy);
4198 unsigned Index = SL->getElementContainingOffset(0);
4199 InnerTy = STy->getElementType(Index);
4200 } else {
4201 return Ty;
4202 }
4203
4204 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4205 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4206 return Ty;
4207
4208 return stripAggregateTypeWrapping(DL, InnerTy);
4209}
4210
4211/// Try to find a partition of the aggregate type passed in for a given
4212/// offset and size.
4213///
4214/// This recurses through the aggregate type and tries to compute a subtype
4215/// based on the offset and size. When the offset and size span a sub-section
4216/// of an array, it will even compute a new array type for that sub-section,
4217/// and the same for structs.
4218///
4219/// Note that this routine is very strict and tries to find a partition of the
4220/// type which produces the *exact* right offset and size. It is not forgiving
4221/// when the size or offset cause either end of type-based partition to be off.
4222/// Also, this is a best-effort routine. It is reasonable to give up and not
4223/// return a type if necessary.
4225 uint64_t Size) {
4226 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4227 return stripAggregateTypeWrapping(DL, Ty);
4228 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4229 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4230 return nullptr;
4231
4232 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4233 Type *ElementTy;
4234 uint64_t TyNumElements;
4235 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4236 ElementTy = AT->getElementType();
4237 TyNumElements = AT->getNumElements();
4238 } else {
4239 // FIXME: This isn't right for vectors with non-byte-sized or
4240 // non-power-of-two sized elements.
4241 auto *VT = cast<FixedVectorType>(Ty);
4242 ElementTy = VT->getElementType();
4243 TyNumElements = VT->getNumElements();
4244 }
4245 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4246 uint64_t NumSkippedElements = Offset / ElementSize;
4247 if (NumSkippedElements >= TyNumElements)
4248 return nullptr;
4249 Offset -= NumSkippedElements * ElementSize;
4250
4251 // First check if we need to recurse.
4252 if (Offset > 0 || Size < ElementSize) {
4253 // Bail if the partition ends in a different array element.
4254 if ((Offset + Size) > ElementSize)
4255 return nullptr;
4256 // Recurse through the element type trying to peel off offset bytes.
4257 return getTypePartition(DL, ElementTy, Offset, Size);
4258 }
4259 assert(Offset == 0);
4260
4261 if (Size == ElementSize)
4262 return stripAggregateTypeWrapping(DL, ElementTy);
4263 assert(Size > ElementSize);
4264 uint64_t NumElements = Size / ElementSize;
4265 if (NumElements * ElementSize != Size)
4266 return nullptr;
4267 return ArrayType::get(ElementTy, NumElements);
4268 }
4269
4270 StructType *STy = dyn_cast<StructType>(Ty);
4271 if (!STy)
4272 return nullptr;
4273
4274 const StructLayout *SL = DL.getStructLayout(STy);
4275
4276 if (SL->getSizeInBits().isScalable())
4277 return nullptr;
4278
4279 if (Offset >= SL->getSizeInBytes())
4280 return nullptr;
4281 uint64_t EndOffset = Offset + Size;
4282 if (EndOffset > SL->getSizeInBytes())
4283 return nullptr;
4284
4285 unsigned Index = SL->getElementContainingOffset(Offset);
4286 Offset -= SL->getElementOffset(Index);
4287
4288 Type *ElementTy = STy->getElementType(Index);
4289 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4290 if (Offset >= ElementSize)
4291 return nullptr; // The offset points into alignment padding.
4292
4293 // See if any partition must be contained by the element.
4294 if (Offset > 0 || Size < ElementSize) {
4295 if ((Offset + Size) > ElementSize)
4296 return nullptr;
4297 return getTypePartition(DL, ElementTy, Offset, Size);
4298 }
4299 assert(Offset == 0);
4300
4301 if (Size == ElementSize)
4302 return stripAggregateTypeWrapping(DL, ElementTy);
4303
4304 StructType::element_iterator EI = STy->element_begin() + Index,
4305 EE = STy->element_end();
4306 if (EndOffset < SL->getSizeInBytes()) {
4307 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4308 if (Index == EndIndex)
4309 return nullptr; // Within a single element and its padding.
4310
4311 // Don't try to form "natural" types if the elements don't line up with the
4312 // expected size.
4313 // FIXME: We could potentially recurse down through the last element in the
4314 // sub-struct to find a natural end point.
4315 if (SL->getElementOffset(EndIndex) != EndOffset)
4316 return nullptr;
4317
4318 assert(Index < EndIndex);
4319 EE = STy->element_begin() + EndIndex;
4320 }
4321
4322 // Try to build up a sub-structure.
4323 StructType *SubTy =
4324 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4325 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4326 if (Size != SubSL->getSizeInBytes())
4327 return nullptr; // The sub-struct doesn't have quite the size needed.
4328
4329 return SubTy;
4330}
4331
4332/// Pre-split loads and stores to simplify rewriting.
4333///
4334/// We want to break up the splittable load+store pairs as much as
4335/// possible. This is important to do as a preprocessing step, as once we
4336/// start rewriting the accesses to partitions of the alloca we lose the
4337/// necessary information to correctly split apart paired loads and stores
4338/// which both point into this alloca. The case to consider is something like
4339/// the following:
4340///
4341/// %a = alloca [12 x i8]
4342/// %gep1 = getelementptr i8, ptr %a, i32 0
4343/// %gep2 = getelementptr i8, ptr %a, i32 4
4344/// %gep3 = getelementptr i8, ptr %a, i32 8
4345/// store float 0.0, ptr %gep1
4346/// store float 1.0, ptr %gep2
4347/// %v = load i64, ptr %gep1
4348/// store i64 %v, ptr %gep2
4349/// %f1 = load float, ptr %gep2
4350/// %f2 = load float, ptr %gep3
4351///
4352/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4353/// promote everything so we recover the 2 SSA values that should have been
4354/// there all along.
4355///
4356/// \returns true if any changes are made.
4357bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4358 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4359
4360 // Track the loads and stores which are candidates for pre-splitting here, in
4361 // the order they first appear during the partition scan. These give stable
4362 // iteration order and a basis for tracking which loads and stores we
4363 // actually split.
4366
4367 // We need to accumulate the splits required of each load or store where we
4368 // can find them via a direct lookup. This is important to cross-check loads
4369 // and stores against each other. We also track the slice so that we can kill
4370 // all the slices that end up split.
4371 struct SplitOffsets {
4372 Slice *S;
4373 std::vector<uint64_t> Splits;
4374 };
4376
4377 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4378 // This is important as we also cannot pre-split stores of those loads!
4379 // FIXME: This is all pretty gross. It means that we can be more aggressive
4380 // in pre-splitting when the load feeding the store happens to come from
4381 // a separate alloca. Put another way, the effectiveness of SROA would be
4382 // decreased by a frontend which just concatenated all of its local allocas
4383 // into one big flat alloca. But defeating such patterns is exactly the job
4384 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4385 // change store pre-splitting to actually force pre-splitting of the load
4386 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4387 // maybe it would make it more principled?
4388 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4389
4390 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4391 for (auto &P : AS.partitions()) {
4392 for (Slice &S : P) {
4393 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4394 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4395 // If this is a load we have to track that it can't participate in any
4396 // pre-splitting. If this is a store of a load we have to track that
4397 // that load also can't participate in any pre-splitting.
4398 if (auto *LI = dyn_cast<LoadInst>(I))
4399 UnsplittableLoads.insert(LI);
4400 else if (auto *SI = dyn_cast<StoreInst>(I))
4401 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4402 UnsplittableLoads.insert(LI);
4403 continue;
4404 }
4405 assert(P.endOffset() > S.beginOffset() &&
4406 "Empty or backwards partition!");
4407
4408 // Determine if this is a pre-splittable slice.
4409 if (auto *LI = dyn_cast<LoadInst>(I)) {
4410 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4411
4412 // The load must be used exclusively to store into other pointers for
4413 // us to be able to arbitrarily pre-split it. The stores must also be
4414 // simple to avoid changing semantics.
4415 auto IsLoadSimplyStored = [](LoadInst *LI) {
4416 for (User *LU : LI->users()) {
4417 auto *SI = dyn_cast<StoreInst>(LU);
4418 if (!SI || !SI->isSimple())
4419 return false;
4420 }
4421 return true;
4422 };
4423 if (!IsLoadSimplyStored(LI)) {
4424 UnsplittableLoads.insert(LI);
4425 continue;
4426 }
4427
4428 Loads.push_back(LI);
4429 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4430 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4431 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4432 continue;
4433 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4434 if (!StoredLoad || !StoredLoad->isSimple())
4435 continue;
4436 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4437
4438 Stores.push_back(SI);
4439 } else {
4440 // Other uses cannot be pre-split.
4441 continue;
4442 }
4443
4444 // Record the initial split.
4445 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4446 auto &Offsets = SplitOffsetsMap[I];
4447 assert(Offsets.Splits.empty() &&
4448 "Should not have splits the first time we see an instruction!");
4449 Offsets.S = &S;
4450 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4451 }
4452
4453 // Now scan the already split slices, and add a split for any of them which
4454 // we're going to pre-split.
4455 for (Slice *S : P.splitSliceTails()) {
4456 auto SplitOffsetsMapI =
4457 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4458 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4459 continue;
4460 auto &Offsets = SplitOffsetsMapI->second;
4461
4462 assert(Offsets.S == S && "Found a mismatched slice!");
4463 assert(!Offsets.Splits.empty() &&
4464 "Cannot have an empty set of splits on the second partition!");
4465 assert(Offsets.Splits.back() ==
4466 P.beginOffset() - Offsets.S->beginOffset() &&
4467 "Previous split does not end where this one begins!");
4468
4469 // Record each split. The last partition's end isn't needed as the size
4470 // of the slice dictates that.
4471 if (S->endOffset() > P.endOffset())
4472 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4473 }
4474 }
4475
4476 // We may have split loads where some of their stores are split stores. For
4477 // such loads and stores, we can only pre-split them if their splits exactly
4478 // match relative to their starting offset. We have to verify this prior to
4479 // any rewriting.
4480 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4481 // Lookup the load we are storing in our map of split
4482 // offsets.
4483 auto *LI = cast<LoadInst>(SI->getValueOperand());
4484 // If it was completely unsplittable, then we're done,
4485 // and this store can't be pre-split.
4486 if (UnsplittableLoads.count(LI))
4487 return true;
4488
4489 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4490 if (LoadOffsetsI == SplitOffsetsMap.end())
4491 return false; // Unrelated loads are definitely safe.
4492 auto &LoadOffsets = LoadOffsetsI->second;
4493
4494 // Now lookup the store's offsets.
4495 auto &StoreOffsets = SplitOffsetsMap[SI];
4496
4497 // If the relative offsets of each split in the load and
4498 // store match exactly, then we can split them and we
4499 // don't need to remove them here.
4500 if (LoadOffsets.Splits == StoreOffsets.Splits)
4501 return false;
4502
4503 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4504 << " " << *LI << "\n"
4505 << " " << *SI << "\n");
4506
4507 // We've found a store and load that we need to split
4508 // with mismatched relative splits. Just give up on them
4509 // and remove both instructions from our list of
4510 // candidates.
4511 UnsplittableLoads.insert(LI);
4512 return true;
4513 });
4514 // Now we have to go *back* through all the stores, because a later store may
4515 // have caused an earlier store's load to become unsplittable and if it is
4516 // unsplittable for the later store, then we can't rely on it being split in
4517 // the earlier store either.
4518 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4519 auto *LI = cast<LoadInst>(SI->getValueOperand());
4520 return UnsplittableLoads.count(LI);
4521 });
4522 // Once we've established all the loads that can't be split for some reason,
4523 // filter any that made it into our list out.
4524 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4525 return UnsplittableLoads.count(LI);
4526 });
4527
4528 // If no loads or stores are left, there is no pre-splitting to be done for
4529 // this alloca.
4530 if (Loads.empty() && Stores.empty())
4531 return false;
4532
4533 // From here on, we can't fail and will be building new accesses, so rig up
4534 // an IR builder.
4535 IRBuilderTy IRB(&AI);
4536
4537 // Collect the new slices which we will merge into the alloca slices.
4538 SmallVector<Slice, 4> NewSlices;
4539
4540 // Track any allocas we end up splitting loads and stores for so we iterate
4541 // on them.
4542 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4543
4544 // At this point, we have collected all of the loads and stores we can
4545 // pre-split, and the specific splits needed for them. We actually do the
4546 // splitting in a specific order in order to handle when one of the loads in
4547 // the value operand to one of the stores.
4548 //
4549 // First, we rewrite all of the split loads, and just accumulate each split
4550 // load in a parallel structure. We also build the slices for them and append
4551 // them to the alloca slices.
4553 std::vector<LoadInst *> SplitLoads;
4554 const DataLayout &DL = AI.getDataLayout();
4555 for (LoadInst *LI : Loads) {
4556 SplitLoads.clear();
4557
4558 auto &Offsets = SplitOffsetsMap[LI];
4559 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4560 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4561 "Load must have type size equal to store size");
4562 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4563 "Load must be >= slice size");
4564
4565 uint64_t BaseOffset = Offsets.S->beginOffset();
4566 assert(BaseOffset + SliceSize > BaseOffset &&
4567 "Cannot represent alloca access size using 64-bit integers!");
4568
4569 Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
4570 IRB.SetInsertPoint(LI);
4571
4572 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4573
4574 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4575 int Idx = 0, Size = Offsets.Splits.size();
4576 for (;;) {
4577 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4578 auto AS = LI->getPointerAddressSpace();
4579 auto *PartPtrTy = LI->getPointerOperandType();
4580 LoadInst *PLoad = IRB.CreateAlignedLoad(
4581 PartTy,
4582 getAdjustedPtr(IRB, DL, BasePtr,
4583 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4584 PartPtrTy, BasePtr->getName() + "."),
4585 getAdjustedAlignment(LI, PartOffset),
4586 /*IsVolatile*/ false, LI->getName());
4587 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4588 LLVMContext::MD_access_group});
4589
4590 // Append this load onto the list of split loads so we can find it later
4591 // to rewrite the stores.
4592 SplitLoads.push_back(PLoad);
4593
4594 // Now build a new slice for the alloca.
4595 NewSlices.push_back(
4596 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4597 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4598 /*IsSplittable*/ false));
4599 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4600 << ", " << NewSlices.back().endOffset()
4601 << "): " << *PLoad << "\n");
4602
4603 // See if we've handled all the splits.
4604 if (Idx >= Size)
4605 break;
4606
4607 // Setup the next partition.
4608 PartOffset = Offsets.Splits[Idx];
4609 ++Idx;
4610 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4611 }
4612
4613 // Now that we have the split loads, do the slow walk over all uses of the
4614 // load and rewrite them as split stores, or save the split loads to use
4615 // below if the store is going to be split there anyways.
4616 bool DeferredStores = false;
4617 for (User *LU : LI->users()) {
4618 StoreInst *SI = cast<StoreInst>(LU);
4619 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4620 DeferredStores = true;
4621 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
4622 << "\n");
4623 continue;
4624 }
4625
4626 Value *StoreBasePtr = SI->getPointerOperand();
4627 IRB.SetInsertPoint(SI);
4628 AAMDNodes AATags = SI->getAAMetadata();
4629
4630 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
4631
4632 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4633 LoadInst *PLoad = SplitLoads[Idx];
4634 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4635 auto *PartPtrTy = SI->getPointerOperandType();
4636
4637 auto AS = SI->getPointerAddressSpace();
4638 StoreInst *PStore = IRB.CreateAlignedStore(
4639 PLoad,
4640 getAdjustedPtr(IRB, DL, StoreBasePtr,
4641 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4642 PartPtrTy, StoreBasePtr->getName() + "."),
4643 getAdjustedAlignment(SI, PartOffset),
4644 /*IsVolatile*/ false);
4645 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4646 LLVMContext::MD_access_group,
4647 LLVMContext::MD_DIAssignID});
4648
4649 if (AATags)
4650 PStore->setAAMetadata(
4651 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
4652 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
4653 }
4654
4655 // We want to immediately iterate on any allocas impacted by splitting
4656 // this store, and we have to track any promotable alloca (indicated by
4657 // a direct store) as needing to be resplit because it is no longer
4658 // promotable.
4659 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4660 ResplitPromotableAllocas.insert(OtherAI);
4661 Worklist.insert(OtherAI);
4662 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4663 StoreBasePtr->stripInBoundsOffsets())) {
4664 Worklist.insert(OtherAI);
4665 }
4666
4667 // Mark the original store as dead.
4668 DeadInsts.push_back(SI);
4669 }
4670
4671 // Save the split loads if there are deferred stores among the users.
4672 if (DeferredStores)
4673 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
4674
4675 // Mark the original load as dead and kill the original slice.
4676 DeadInsts.push_back(LI);
4677 Offsets.S->kill();
4678 }
4679
4680 // Second, we rewrite all of the split stores. At this point, we know that
4681 // all loads from this alloca have been split already. For stores of such
4682 // loads, we can simply look up the pre-existing split loads. For stores of
4683 // other loads, we split those loads first and then write split stores of
4684 // them.
4685 for (StoreInst *SI : Stores) {
4686 auto *LI = cast<LoadInst>(SI->getValueOperand());
4687 IntegerType *Ty = cast<IntegerType>(LI->getType());
4688 assert(Ty->getBitWidth() % 8 == 0);
4689 uint64_t StoreSize = Ty->getBitWidth() / 8;
4690 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
4691
4692 auto &Offsets = SplitOffsetsMap[SI];
4693 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
4694 "Slice size should always match load size exactly!");
4695 uint64_t BaseOffset = Offsets.S->beginOffset();
4696 assert(BaseOffset + StoreSize > BaseOffset &&
4697 "Cannot represent alloca access size using 64-bit integers!");
4698
4699 Value *LoadBasePtr = LI->getPointerOperand();
4700 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
4701
4702 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
4703
4704 // Check whether we have an already split load.
4705 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
4706 std::vector<LoadInst *> *SplitLoads = nullptr;
4707 if (SplitLoadsMapI != SplitLoadsMap.end()) {
4708 SplitLoads = &SplitLoadsMapI->second;
4709 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
4710 "Too few split loads for the number of splits in the store!");
4711 } else {
4712 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
4713 }
4714
4715 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4716 int Idx = 0, Size = Offsets.Splits.size();
4717 for (;;) {
4718 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
4719 auto *LoadPartPtrTy = LI->getPointerOperandType();
4720 auto *StorePartPtrTy = SI->getPointerOperandType();
4721
4722 // Either lookup a split load or create one.
4723 LoadInst *PLoad;
4724 if (SplitLoads) {
4725 PLoad = (*SplitLoads)[Idx];
4726 } else {
4727 IRB.SetInsertPoint(LI);
4728 auto AS = LI->getPointerAddressSpace();
4729 PLoad = IRB.CreateAlignedLoad(
4730 PartTy,
4731 getAdjustedPtr(IRB, DL, LoadBasePtr,
4732 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4733 LoadPartPtrTy, LoadBasePtr->getName() + "."),
4734 getAdjustedAlignment(LI, PartOffset),
4735 /*IsVolatile*/ false, LI->getName());
4736 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4737 LLVMContext::MD_access_group});
4738 }
4739
4740 // And store this partition.
4741 IRB.SetInsertPoint(SI);
4742 auto AS = SI->getPointerAddressSpace();
4743 StoreInst *PStore = IRB.CreateAlignedStore(
4744 PLoad,
4745 getAdjustedPtr(IRB, DL, StoreBasePtr,
4746 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4747 StorePartPtrTy, StoreBasePtr->getName() + "."),
4748 getAdjustedAlignment(SI, PartOffset),
4749 /*IsVolatile*/ false);
4750 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4751 LLVMContext::MD_access_group});
4752
4753 // Now build a new slice for the alloca.
4754 NewSlices.push_back(
4755 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4756 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
4757 /*IsSplittable*/ false));
4758 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4759 << ", " << NewSlices.back().endOffset()
4760 << "): " << *PStore << "\n");
4761 if (!SplitLoads) {
4762 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
4763 }
4764
4765 // See if we've finished all the splits.
4766 if (Idx >= Size)
4767 break;
4768
4769 // Setup the next partition.
4770 PartOffset = Offsets.Splits[Idx];
4771 ++Idx;
4772 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
4773 }
4774
4775 // We want to immediately iterate on any allocas impacted by splitting
4776 // this load, which is only relevant if it isn't a load of this alloca and
4777 // thus we didn't already split the loads above. We also have to keep track
4778 // of any promotable allocas we split loads on as they can no longer be
4779 // promoted.
4780 if (!SplitLoads) {
4781 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4782 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4783 ResplitPromotableAllocas.insert(OtherAI);
4784 Worklist.insert(OtherAI);
4785 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4786 LoadBasePtr->stripInBoundsOffsets())) {
4787 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4788 Worklist.insert(OtherAI);
4789 }
4790 }
4791
4792 // Mark the original store as dead now that we've split it up and kill its
4793 // slice. Note that we leave the original load in place unless this store
4794 // was its only use. It may in turn be split up if it is an alloca load
4795 // for some other alloca, but it may be a normal load. This may introduce
4796 // redundant loads, but where those can be merged the rest of the optimizer
4797 // should handle the merging, and this uncovers SSA splits which is more
4798 // important. In practice, the original loads will almost always be fully
4799 // split and removed eventually, and the splits will be merged by any
4800 // trivial CSE, including instcombine.
4801 if (LI->hasOneUse()) {
4802 assert(*LI->user_begin() == SI && "Single use isn't this store!");
4803 DeadInsts.push_back(LI);
4804 }
4805 DeadInsts.push_back(SI);
4806 Offsets.S->kill();
4807 }
4808
4809 // Remove the killed slices that have ben pre-split.
4810 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
4811
4812 // Insert our new slices. This will sort and merge them into the sorted
4813 // sequence.
4814 AS.insert(NewSlices);
4815
4816 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
4817#ifndef NDEBUG
4818 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4819 LLVM_DEBUG(AS.print(dbgs(), I, " "));
4820#endif
4821
4822 // Finally, don't try to promote any allocas that new require re-splitting.
4823 // They have already been added to the worklist above.
4824 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
4825
4826 return true;
4827}
4828
4829/// Rewrite an alloca partition's users.
4830///
4831/// This routine drives both of the rewriting goals of the SROA pass. It tries
4832/// to rewrite uses of an alloca partition to be conducive for SSA value
4833/// promotion. If the partition needs a new, more refined alloca, this will
4834/// build that new alloca, preserving as much type information as possible, and
4835/// rewrite the uses of the old alloca to point at the new one and have the
4836/// appropriate new offsets. It also evaluates how successful the rewrite was
4837/// at enabling promotion and if it was successful queues the alloca to be
4838/// promoted.
4839AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4840 Partition &P) {
4841 // Try to compute a friendly type for this partition of the alloca. This
4842 // won't always succeed, in which case we fall back to a legal integer type
4843 // or an i8 array of an appropriate size.
4844 Type *SliceTy = nullptr;
4845 VectorType *SliceVecTy = nullptr;
4846 const DataLayout &DL = AI.getDataLayout();
4847 std::pair<Type *, IntegerType *> CommonUseTy =
4848 findCommonType(P.begin(), P.end(), P.endOffset());
4849 // Do all uses operate on the same type?
4850 if (CommonUseTy.first)
4851 if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) {
4852 SliceTy = CommonUseTy.first;
4853 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4854 }
4855 // If not, can we find an appropriate subtype in the original allocated type?
4856 if (!SliceTy)
4857 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4858 P.beginOffset(), P.size()))
4859 SliceTy = TypePartitionTy;
4860
4861 // If still not, can we use the largest bitwidth integer type used?
4862 if (!SliceTy && CommonUseTy.second)
4863 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) {
4864 SliceTy = CommonUseTy.second;
4865 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4866 }
4867 if ((!SliceTy || (SliceTy->isArrayTy() &&
4868 SliceTy->getArrayElementType()->isIntegerTy())) &&
4869 DL.isLegalInteger(P.size() * 8)) {
4870 SliceTy = Type::getIntNTy(*C, P.size() * 8);
4871 }
4872
4873 // If the common use types are not viable for promotion then attempt to find
4874 // another type that is viable.
4875 if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL))
4876 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4877 P.beginOffset(), P.size())) {
4878 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4879 if (TypePartitionVecTy &&
4880 checkVectorTypeForPromotion(P, TypePartitionVecTy, DL))
4881 SliceTy = TypePartitionTy;
4882 }
4883
4884 if (!SliceTy)
4885 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4886 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
4887
4888 bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4889
4890 VectorType *VecTy =
4891 IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4892 if (VecTy)
4893 SliceTy = VecTy;
4894
4895 // Check for the case where we're going to rewrite to a new alloca of the
4896 // exact same type as the original, and with the same access offsets. In that
4897 // case, re-use the existing alloca, but still run through the rewriter to
4898 // perform phi and select speculation.
4899 // P.beginOffset() can be non-zero even with the same type in a case with
4900 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
4901 AllocaInst *NewAI;
4902 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
4903 NewAI = &AI;
4904 // FIXME: We should be able to bail at this point with "nothing changed".
4905 // FIXME: We might want to defer PHI speculation until after here.
4906 // FIXME: return nullptr;
4907 } else {
4908 // Make sure the alignment is compatible with P.beginOffset().
4909 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
4910 // If we will get at least this much alignment from the type alone, leave
4911 // the alloca's alignment unconstrained.
4912 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
4913 NewAI = new AllocaInst(
4914 SliceTy, AI.getAddressSpace(), nullptr,
4915 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
4916 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
4917 AI.getIterator());
4918 // Copy the old AI debug location over to the new one.
4919 NewAI->setDebugLoc(AI.getDebugLoc());
4920 ++NumNewAllocas;
4921 }
4922
4923 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
4924 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
4925
4926 // Track the high watermark on the worklist as it is only relevant for
4927 // promoted allocas. We will reset it to this point if the alloca is not in
4928 // fact scheduled for promotion.
4929 unsigned PPWOldSize = PostPromotionWorklist.size();
4930 unsigned NumUses = 0;
4933
4934 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4935 P.endOffset(), IsIntegerPromotable, VecTy,
4936 PHIUsers, SelectUsers);
4937 bool Promotable = true;
4938 for (Slice *S : P.splitSliceTails()) {
4939 Promotable &= Rewriter.visit(S);
4940 ++NumUses;
4941 }
4942 for (Slice &S : P) {
4943 Promotable &= Rewriter.visit(&S);
4944 ++NumUses;
4945 }
4946
4947 NumAllocaPartitionUses += NumUses;
4948 MaxUsesPerAllocaPartition.updateMax(NumUses);
4949
4950 // Now that we've processed all the slices in the new partition, check if any
4951 // PHIs or Selects would block promotion.
4952 for (PHINode *PHI : PHIUsers)
4953 if (!isSafePHIToSpeculate(*PHI)) {
4954 Promotable = false;
4955 PHIUsers.clear();
4956 SelectUsers.clear();
4957 break;
4958 }
4959
4961 NewSelectsToRewrite;
4962 NewSelectsToRewrite.reserve(SelectUsers.size());
4963 for (SelectInst *Sel : SelectUsers) {
4964 std::optional<RewriteableMemOps> Ops =
4965 isSafeSelectToSpeculate(*Sel, PreserveCFG);
4966 if (!Ops) {
4967 Promotable = false;
4968 PHIUsers.clear();
4969 SelectUsers.clear();
4970 NewSelectsToRewrite.clear();
4971 break;
4972 }
4973 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
4974 }
4975
4976 if (Promotable) {
4977 for (Use *U : AS.getDeadUsesIfPromotable()) {
4978 auto *OldInst = dyn_cast<Instruction>(U->get());
4980 if (OldInst)
4981 if (isInstructionTriviallyDead(OldInst))
4982 DeadInsts.push_back(OldInst);
4983 }
4984 if (PHIUsers.empty() && SelectUsers.empty()) {
4985 // Promote the alloca.
4986 PromotableAllocas.insert(NewAI);
4987 } else {
4988 // If we have either PHIs or Selects to speculate, add them to those
4989 // worklists and re-queue the new alloca so that we promote in on the
4990 // next iteration.
4991 for (PHINode *PHIUser : PHIUsers)
4992 SpeculatablePHIs.insert(PHIUser);
4993 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4994 NewSelectsToRewrite.size());
4995 for (auto &&KV : llvm::make_range(
4996 std::make_move_iterator(NewSelectsToRewrite.begin()),
4997 std::make_move_iterator(NewSelectsToRewrite.end())))
4998 SelectsToRewrite.insert(std::move(KV));
4999 Worklist.insert(NewAI);
5000 }
5001 } else {
5002 // Drop any post-promotion work items if promotion didn't happen.
5003 while (PostPromotionWorklist.size() > PPWOldSize)
5004 PostPromotionWorklist.pop_back();
5005
5006 // We couldn't promote and we didn't create a new partition, nothing
5007 // happened.
5008 if (NewAI == &AI)
5009 return nullptr;
5010
5011 // If we can't promote the alloca, iterate on it to check for new
5012 // refinements exposed by splitting the current alloca. Don't iterate on an
5013 // alloca which didn't actually change and didn't get promoted.
5014 Worklist.insert(NewAI);
5015 }
5016
5017 return NewAI;
5018}
5019
5020// There isn't a shared interface to get the "address" parts out of a
5021// dbg.declare and dbg.assign, so provide some wrappers now for
5022// both debug intrinsics and records.
5024 if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
5025 return DAI->getAddress();
5026 return cast<DbgDeclareInst>(DVI)->getAddress();
5027}
5028
5030 return DVR->getAddress();
5031}
5032
5034 if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
5035 return DAI->isKillAddress();
5036 return cast<DbgDeclareInst>(DVI)->isKillLocation();
5037}
5038
5040 if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
5041 return DVR->isKillAddress();
5042 return DVR->isKillLocation();
5043}
5044
5046 if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
5047 return DAI->getAddressExpression();
5048 return cast<DbgDeclareInst>(DVI)->getExpression();
5049}
5050
5052 if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
5053 return DVR->getAddressExpression();
5054 return DVR->getExpression();
5055}
5056
5057/// Create or replace an existing fragment in a DIExpression with \p Frag.
5058/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5059/// operation, add \p BitExtractOffset to the offset part.
5060///
5061/// Returns the new expression, or nullptr if this fails (see details below).
5062///
5063/// This function is similar to DIExpression::createFragmentExpression except
5064/// for 3 important distinctions:
5065/// 1. The new fragment isn't relative to an existing fragment.
5066/// 2. It assumes the computed location is a memory location. This means we
5067/// don't need to perform checks that creating the fragment preserves the
5068/// expression semantics.
5069/// 3. Existing extract_bits are modified independently of fragment changes
5070/// using \p BitExtractOffset. A change to the fragment offset or size
5071/// may affect a bit extract. But a bit extract offset can change
5072/// independently of the fragment dimensions.
5073///
5074/// Returns the new expression, or nullptr if one couldn't be created.
5075/// Ideally this is only used to signal that a bit-extract has become
5076/// zero-sized (and thus the new debug record has no size and can be
5077/// dropped), however, it fails for other reasons too - see the FIXME below.
5078///
5079/// FIXME: To keep the change that introduces this function NFC it bails
5080/// in some situations unecessarily, e.g. when fragment and bit extract
5081/// sizes differ.
5084 int64_t BitExtractOffset) {
5086 bool HasFragment = false;
5087 bool HasBitExtract = false;
5088
5089 for (auto &Op : Expr->expr_ops()) {
5090 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5091 HasFragment = true;
5092 continue;
5093 }
5094 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5096 HasBitExtract = true;
5097 int64_t ExtractOffsetInBits = Op.getArg(0);
5098 int64_t ExtractSizeInBits = Op.getArg(1);
5099
5100 // DIExpression::createFragmentExpression doesn't know how to handle
5101 // a fragment that is smaller than the extract. Copy the behaviour
5102 // (bail) to avoid non-NFC changes.
5103 // FIXME: Don't do this.
5104 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5105 return nullptr;
5106
5107 assert(BitExtractOffset <= 0);
5108 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5109
5110 // DIExpression::createFragmentExpression doesn't know what to do
5111 // if the new extract starts "outside" the existing one. Copy the
5112 // behaviour (bail) to avoid non-NFC changes.
5113 // FIXME: Don't do this.
5114 if (AdjustedOffset < 0)
5115 return nullptr;
5116
5117 Ops.push_back(Op.getOp());
5118 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5119 Ops.push_back(ExtractSizeInBits);
5120 continue;
5121 }
5122 Op.appendToVector(Ops);
5123 }
5124
5125 // Unsupported by createFragmentExpression, so don't support it here yet to
5126 // preserve NFC-ness.
5127 if (HasFragment && HasBitExtract)
5128 return nullptr;
5129
5130 if (!HasBitExtract) {
5132 Ops.push_back(Frag.OffsetInBits);
5133 Ops.push_back(Frag.SizeInBits);
5134 }
5135 return DIExpression::get(Expr->getContext(), Ops);
5136}
5137
5138/// Insert a new dbg.declare.
5139/// \p Orig Original to copy debug loc and variable from.
5140/// \p NewAddr Location's new base address.
5141/// \p NewAddrExpr New expression to apply to address.
5142/// \p BeforeInst Insert position.
5143/// \p NewFragment New fragment (absolute, non-relative).
5144/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5145static void
5147 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5148 std::optional<DIExpression::FragmentInfo> NewFragment,
5149 int64_t BitExtractAdjustment) {
5150 if (NewFragment)
5151 NewAddrExpr = createOrReplaceFragment(NewAddrExpr, *NewFragment,
5152 BitExtractAdjustment);
5153 if (!NewAddrExpr)
5154 return;
5155
5156 DIB.insertDeclare(NewAddr, Orig->getVariable(), NewAddrExpr,
5157 Orig->getDebugLoc(), BeforeInst);
5158}
5159
5160/// Insert a new dbg.assign.
5161/// \p Orig Original to copy debug loc, variable, value and value expression
5162/// from.
5163/// \p NewAddr Location's new base address.
5164/// \p NewAddrExpr New expression to apply to address.
5165/// \p BeforeInst Insert position.
5166/// \p NewFragment New fragment (absolute, non-relative).
5167/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5168static void
5170 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5171 std::optional<DIExpression::FragmentInfo> NewFragment,
5172 int64_t BitExtractAdjustment) {
5173 // DIBuilder::insertDbgAssign will insert the #dbg_assign after NewAddr.
5174 (void)BeforeInst;
5175
5176 // A dbg.assign puts fragment info in the value expression only. The address
5177 // expression has already been built: NewAddrExpr.
5178 DIExpression *NewFragmentExpr = Orig->getExpression();
5179 if (NewFragment)
5180 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5181 BitExtractAdjustment);
5182 if (!NewFragmentExpr)
5183 return;
5184
5185 // Apply a DIAssignID to the store if it doesn't already have it.
5186 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5187 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5189 }
5190
5191 Instruction *NewAssign = cast<Instruction *>(DIB.insertDbgAssign(
5192 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5193 NewAddrExpr, Orig->getDebugLoc()));
5194 LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
5195 (void)NewAssign;
5196}
5197
5198/// Insert a new DbgRecord.
5199/// \p Orig Original to copy record type, debug loc and variable from, and
5200/// additionally value and value expression for dbg_assign records.
5201/// \p NewAddr Location's new base address.
5202/// \p NewAddrExpr New expression to apply to address.
5203/// \p BeforeInst Insert position.
5204/// \p NewFragment New fragment (absolute, non-relative).
5205/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5206static void
5208 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5209 std::optional<DIExpression::FragmentInfo> NewFragment,
5210 int64_t BitExtractAdjustment) {
5211 (void)DIB;
5212
5213 // A dbg_assign puts fragment info in the value expression only. The address
5214 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5215 // new fragment info into NewAddrExpr (as it only has one expression).
5216 DIExpression *NewFragmentExpr =
5217 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5218 if (NewFragment)
5219 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5220 BitExtractAdjustment);
5221 if (!NewFragmentExpr)
5222 return;
5223
5224 if (Orig->isDbgDeclare()) {
5226 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5227 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5228 BeforeInst->getIterator());
5229 return;
5230 }
5231
5232 if (Orig->isDbgValue()) {
5234 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5235 // Drop debug information if the expression doesn't start with a
5236 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5237 // describes the address of alloca rather than the value inside the alloca.
5238 if (!NewFragmentExpr->startsWithDeref())
5239 DVR->setKillAddress();
5240 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5241 BeforeInst->getIterator());
5242 return;
5243 }
5244
5245 // Apply a DIAssignID to the store if it doesn't already have it.
5246 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5247 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5249 }
5250
5252 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5253 NewAddrExpr, Orig->getDebugLoc());
5254 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5255 (void)NewAssign;
5256}
5257
5258/// Walks the slices of an alloca and form partitions based on them,
5259/// rewriting each of their uses.
5260bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5261 if (AS.begin() == AS.end())
5262 return false;
5263
5264 unsigned NumPartitions = 0;
5265 bool Changed = false;
5266 const DataLayout &DL = AI.getModule()->getDataLayout();
5267
5268 // First try to pre-split loads and stores.
5269 Changed |= presplitLoadsAndStores(AI, AS);
5270
5271 // Now that we have identified any pre-splitting opportunities,
5272 // mark loads and stores unsplittable except for the following case.
5273 // We leave a slice splittable if all other slices are disjoint or fully
5274 // included in the slice, such as whole-alloca loads and stores.
5275 // If we fail to split these during pre-splitting, we want to force them
5276 // to be rewritten into a partition.
5277 bool IsSorted = true;
5278
5279 uint64_t AllocaSize =
5280 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5281 const uint64_t MaxBitVectorSize = 1024;
5282 if (AllocaSize <= MaxBitVectorSize) {
5283 // If a byte boundary is included in any load or store, a slice starting or
5284 // ending at the boundary is not splittable.
5285 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5286 for (Slice &S : AS)
5287 for (unsigned O = S.beginOffset() + 1;
5288 O < S.endOffset() && O < AllocaSize; O++)
5289 SplittableOffset.reset(O);
5290
5291 for (Slice &S : AS) {
5292 if (!S.isSplittable())
5293 continue;
5294
5295 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5296 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5297 continue;
5298
5299 if (isa<LoadInst>(S.getUse()->getUser()) ||
5300 isa<StoreInst>(S.getUse()->getUser())) {
5301 S.makeUnsplittable();
5302 IsSorted = false;
5303 }
5304 }
5305 } else {
5306 // We only allow whole-alloca splittable loads and stores
5307 // for a large alloca to avoid creating too large BitVector.
5308 for (Slice &S : AS) {
5309 if (!S.isSplittable())
5310 continue;
5311
5312 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5313 continue;
5314
5315 if (isa<LoadInst>(S.getUse()->getUser()) ||
5316 isa<StoreInst>(S.getUse()->getUser())) {
5317 S.makeUnsplittable();
5318 IsSorted = false;
5319 }
5320 }
5321 }
5322
5323 if (!IsSorted)
5325
5326 /// Describes the allocas introduced by rewritePartition in order to migrate
5327 /// the debug info.
5328 struct Fragment {
5329 AllocaInst *Alloca;
5331 uint64_t Size;
5332 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5333 : Alloca(AI), Offset(O), Size(S) {}
5334 };
5335 SmallVector<Fragment, 4> Fragments;
5336
5337 // Rewrite each partition.
5338 for (auto &P : AS.partitions()) {
5339 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5340 Changed = true;
5341 if (NewAI != &AI) {
5342 uint64_t SizeOfByte = 8;
5343 uint64_t AllocaSize =
5344 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5345 // Don't include any padding.
5346 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5347 Fragments.push_back(
5348 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5349 }
5350 }
5351 ++NumPartitions;
5352 }
5353
5354 NumAllocaPartitions += NumPartitions;
5355 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5356
5357 // Migrate debug information from the old alloca to the new alloca(s)
5358 // and the individual partitions.
5359 auto MigrateOne = [&](auto *DbgVariable) {
5360 // Can't overlap with undef memory.
5362 return;
5363
5364 const Value *DbgPtr = getAddress(DbgVariable);
5366 DbgVariable->getFragmentOrEntireVariable();
5367 // Get the address expression constant offset if one exists and the ops
5368 // that come after it.
5369 int64_t CurrentExprOffsetInBytes = 0;
5370 SmallVector<uint64_t> PostOffsetOps;
5372 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5373 return; // Couldn't interpret this DIExpression - drop the var.
5374
5375 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5376 int64_t ExtractOffsetInBits = 0;
5377 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5378 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5380 ExtractOffsetInBits = Op.getArg(0);
5381 break;
5382 }
5383 }
5384
5385 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5386 for (auto Fragment : Fragments) {
5387 int64_t OffsetFromLocationInBits;
5388 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5389 // Find the variable fragment that the new alloca slice covers.
5390 // Drop debug info for this variable fragment if we can't compute an
5391 // intersect between it and the alloca slice.
5393 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5394 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5395 NewDbgFragment, OffsetFromLocationInBits))
5396 continue; // Do not migrate this fragment to this slice.
5397
5398 // Zero sized fragment indicates there's no intersect between the variable
5399 // fragment and the alloca slice. Skip this slice for this variable
5400 // fragment.
5401 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5402 continue; // Do not migrate this fragment to this slice.
5403
5404 // No fragment indicates DbgVariable's variable or fragment exactly
5405 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5406 if (!NewDbgFragment)
5407 NewDbgFragment = DbgVariable->getFragment();
5408
5409 // Reduce the new expression offset by the bit-extract offset since
5410 // we'll be keeping that.
5411 int64_t OffestFromNewAllocaInBits =
5412 OffsetFromLocationInBits - ExtractOffsetInBits;
5413 // We need to adjust an existing bit extract if the offset expression
5414 // can't eat the slack (i.e., if the new offset would be negative).
5415 int64_t BitExtractOffset =
5416 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5417 // The magnitude of a negative value indicates the number of bits into
5418 // the existing variable fragment that the memory region begins. The new
5419 // variable fragment already excludes those bits - the new DbgPtr offset
5420 // only needs to be applied if it's positive.
5421 OffestFromNewAllocaInBits =
5422 std::max(int64_t(0), OffestFromNewAllocaInBits);
5423
5424 // Rebuild the expression:
5425 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5426 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5427 // address expression but the value expression instead.
5428 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5429 if (OffestFromNewAllocaInBits > 0) {
5430 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5431 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5432 }
5433
5434 // Remove any existing intrinsics on the new alloca describing
5435 // the variable fragment.
5436 auto RemoveOne = [DbgVariable](auto *OldDII) {
5437 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5438 return LHS->getVariable() == RHS->getVariable() &&
5439 LHS->getDebugLoc()->getInlinedAt() ==
5440 RHS->getDebugLoc()->getInlinedAt();
5441 };
5442 if (SameVariableFragment(OldDII, DbgVariable))
5443 OldDII->eraseFromParent();
5444 };
5445 for_each(findDbgDeclares(Fragment.Alloca), RemoveOne);
5446 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5447 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5448 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5449 NewDbgFragment, BitExtractOffset);
5450 }
5451 };
5452
5453 // Migrate debug information from the old alloca to the new alloca(s)
5454 // and the individual partitions.
5455 for_each(findDbgDeclares(&AI), MigrateOne);
5456 for_each(findDVRDeclares(&AI), MigrateOne);
5457 for_each(findDVRValues(&AI), MigrateOne);
5458 for_each(at::getAssignmentMarkers(&AI), MigrateOne);
5459 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5460
5461 return Changed;
5462}
5463
5464/// Clobber a use with poison, deleting the used value if it becomes dead.
5465void SROA::clobberUse(Use &U) {
5466 Value *OldV = U;
5467 // Replace the use with an poison value.
5468 U = PoisonValue::get(OldV->getType());
5469
5470 // Check for this making an instruction dead. We have to garbage collect
5471 // all the dead instructions to ensure the uses of any alloca end up being
5472 // minimal.
5473 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5474 if (isInstructionTriviallyDead(OldI)) {
5475 DeadInsts.push_back(OldI);
5476 }
5477}
5478
5479/// A basic LoadAndStorePromoter that does not remove store nodes.
5481public:
5483 Type *ZeroType)
5484 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5485 bool shouldDelete(Instruction *I) const override {
5486 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5487 }
5488
5490 return UndefValue::get(ZeroType);
5491 }
5492
5493private:
5494 Type *ZeroType;
5495};
5496
5497bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5498 // Look through each "partition", looking for slices with the same start/end
5499 // that do not overlap with any before them. The slices are sorted by
5500 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5501 // sophisticated algorithm that takes splittable slices into account.
5502 auto PartitionBegin = AS.begin();
5503 auto PartitionEnd = PartitionBegin;
5504 uint64_t BeginOffset = PartitionBegin->beginOffset();
5505 uint64_t EndOffset = PartitionBegin->endOffset();
5506 while (PartitionBegin != AS.end()) {
5507 bool AllSameAndValid = true;
5509 Type *PartitionType = nullptr;
5510 while (PartitionEnd != AS.end() &&
5511 (PartitionEnd->beginOffset() < EndOffset ||
5512 PartitionEnd->endOffset() <= EndOffset)) {
5513 if (AllSameAndValid) {
5514 AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
5515 PartitionEnd->endOffset() == EndOffset;
5516 Instruction *User =
5517 cast<Instruction>(PartitionEnd->getUse()->getUser());
5518 if (auto *LI = dyn_cast<LoadInst>(User)) {
5519 Type *UserTy = LI->getType();
5520 // LoadAndStorePromoter requires all the types to be the same.
5521 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5522 AllSameAndValid = false;
5523 PartitionType = UserTy;
5524 Insts.push_back(User);
5525 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5526 Type *UserTy = SI->getValueOperand()->getType();
5527 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5528 AllSameAndValid = false;
5529 PartitionType = UserTy;
5530 Insts.push_back(User);
5531 } else if (!isAssumeLikeIntrinsic(User)) {
5532 AllSameAndValid = false;
5533 }
5534 }
5535 EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
5536 ++PartitionEnd;
5537 }
5538
5539 // So long as all the slices start and end offsets matched, update loads to
5540 // the values stored in the partition.
5541 if (AllSameAndValid && !Insts.empty()) {
5542 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5543 << EndOffset << ")\n");
5545 SSAUpdater SSA(&NewPHIs);
5546 Insts.push_back(&AI);
5547 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5548 Promoter.run(Insts);
5549 }
5550
5551 // Step on to the next partition.
5552 PartitionBegin = PartitionEnd;
5553 if (PartitionBegin == AS.end())
5554 break;
5555 BeginOffset = PartitionBegin->beginOffset();
5556 EndOffset = PartitionBegin->endOffset();
5557 }
5558 return true;
5559}
5560
5561/// Analyze an alloca for SROA.
5562///
5563/// This analyzes the alloca to ensure we can reason about it, builds
5564/// the slices of the alloca, and then hands it off to be split and
5565/// rewritten as needed.
5566std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5567SROA::runOnAlloca(AllocaInst &AI) {
5568 bool Changed = false;
5569 bool CFGChanged = false;
5570
5571 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5572 ++NumAllocasAnalyzed;
5573
5574 // Special case dead allocas, as they're trivial.
5575 if (AI.use_empty()) {
5576 AI.eraseFromParent();
5577 Changed = true;
5578 return {Changed, CFGChanged};
5579 }
5580 const DataLayout &DL = AI.getDataLayout();
5581
5582 // Skip alloca forms that this analysis can't handle.
5583 auto *AT = AI.getAllocatedType();
5584 TypeSize Size = DL.getTypeAllocSize(AT);
5585 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5586 Size.getFixedValue() == 0)
5587 return {Changed, CFGChanged};
5588
5589 // First, split any FCA loads and stores touching this alloca to promote
5590 // better splitting and promotion opportunities.
5591 IRBuilderTy IRB(&AI);
5592 AggLoadStoreRewriter AggRewriter(DL, IRB);
5593 Changed |= AggRewriter.rewrite(AI);
5594
5595 // Build the slices using a recursive instruction-visiting builder.
5596 AllocaSlices AS(DL, AI);
5597 LLVM_DEBUG(AS.print(dbgs()));
5598 if (AS.isEscaped())
5599 return {Changed, CFGChanged};
5600
5601 if (AS.isEscapedReadOnly()) {
5602 Changed |= propagateStoredValuesToLoads(AI, AS);
5603 return {Changed, CFGChanged};
5604 }
5605
5606 // Delete all the dead users of this alloca before splitting and rewriting it.
5607 for (Instruction *DeadUser : AS.getDeadUsers()) {
5608 // Free up everything used by this instruction.
5609 for (Use &DeadOp : DeadUser->operands())
5610 clobberUse(DeadOp);
5611
5612 // Now replace the uses of this instruction.
5613 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5614
5615 // And mark it for deletion.
5616 DeadInsts.push_back(DeadUser);
5617 Changed = true;
5618 }
5619 for (Use *DeadOp : AS.getDeadOperands()) {
5620 clobberUse(*DeadOp);
5621 Changed = true;
5622 }
5623
5624 // No slices to split. Leave the dead alloca for a later pass to clean up.
5625 if (AS.begin() == AS.end())
5626 return {Changed, CFGChanged};
5627
5628 Changed |= splitAlloca(AI, AS);
5629
5630 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
5631 while (!SpeculatablePHIs.empty())
5632 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
5633
5634 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
5635 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5636 while (!RemainingSelectsToRewrite.empty()) {
5637 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5638 CFGChanged |=
5639 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
5640 }
5641
5642 return {Changed, CFGChanged};
5643}
5644
5645/// Delete the dead instructions accumulated in this run.
5646///
5647/// Recursively deletes the dead instructions we've accumulated. This is done
5648/// at the very end to maximize locality of the recursive delete and to
5649/// minimize the problems of invalidated instruction pointers as such pointers
5650/// are used heavily in the intermediate stages of the algorithm.
5651///
5652/// We also record the alloca instructions deleted here so that they aren't
5653/// subsequently handed to mem2reg to promote.
5654bool SROA::deleteDeadInstructions(
5655 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5656 bool Changed = false;
5657 while (!DeadInsts.empty()) {
5658 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
5659 if (!I)
5660 continue;
5661 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
5662
5663 // If the instruction is an alloca, find the possible dbg.declare connected
5664 // to it, and remove it too. We must do this before calling RAUW or we will
5665 // not be able to find it.
5666 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5667 DeletedAllocas.insert(AI);
5668 for (DbgDeclareInst *OldDII : findDbgDeclares(AI))
5669 OldDII->eraseFromParent();
5670 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
5671 OldDII->eraseFromParent();
5672 }
5673
5675 I->replaceAllUsesWith(UndefValue::get(I->getType()));
5676
5677 for (Use &Operand : I->operands())
5678 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
5679 // Zero out the operand and see if it becomes trivially dead.
5680 Operand = nullptr;
5682 DeadInsts.push_back(U);
5683 }
5684
5685 ++NumDeleted;
5686 I->eraseFromParent();
5687 Changed = true;
5688 }
5689 return Changed;
5690}
5691/// Promote the allocas, using the best available technique.
5692///
5693/// This attempts to promote whatever allocas have been identified as viable in
5694/// the PromotableAllocas list. If that list is empty, there is nothing to do.
5695/// This function returns whether any promotion occurred.
5696bool SROA::promoteAllocas(Function &F) {
5697 if (PromotableAllocas.empty())
5698 return false;
5699
5700 if (SROASkipMem2Reg) {
5701 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
5702 } else {
5703 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
5704 NumPromoted += PromotableAllocas.size();
5705 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5706 }
5707
5708 PromotableAllocas.clear();
5709 return true;
5710}
5711
5712std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
5713 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
5714
5715 const DataLayout &DL = F.getDataLayout();
5716 BasicBlock &EntryBB = F.getEntryBlock();
5717 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
5718 I != E; ++I) {
5719 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5720 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5722 PromotableAllocas.insert(AI);
5723 else
5724 Worklist.insert(AI);
5725 }
5726 }
5727
5728 bool Changed = false;
5729 bool CFGChanged = false;
5730 // A set of deleted alloca instruction pointers which should be removed from
5731 // the list of promotable allocas.
5732 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
5733
5734 do {
5735 while (!Worklist.empty()) {
5736 auto [IterationChanged, IterationCFGChanged] =
5737 runOnAlloca(*Worklist.pop_back_val());
5738 Changed |= IterationChanged;
5739 CFGChanged |= IterationCFGChanged;
5740
5741 Changed |= deleteDeadInstructions(DeletedAllocas);
5742
5743 // Remove the deleted allocas from various lists so that we don't try to
5744 // continue processing them.
5745 if (!DeletedAllocas.empty()) {
5746 Worklist.set_subtract(DeletedAllocas);
5747 PostPromotionWorklist.set_subtract(DeletedAllocas);
5748 PromotableAllocas.set_subtract(DeletedAllocas);
5749 DeletedAllocas.clear();
5750 }
5751 }
5752
5753 Changed |= promoteAllocas(F);
5754
5755 Worklist = PostPromotionWorklist;
5756 PostPromotionWorklist.clear();
5757 } while (!Worklist.empty());
5758
5759 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
5760 assert((!CFGChanged || !PreserveCFG) &&
5761 "Should not have modified the CFG when told to preserve it.");
5762
5763 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
5764 for (auto &BB : F) {
5766 }
5767 }
5768
5769 return {Changed, CFGChanged};
5770}
5771
5775 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5776 auto [Changed, CFGChanged] =
5777 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5778 if (!Changed)
5779 return PreservedAnalyses::all();
5781 if (!CFGChanged)
5784 return PA;
5785}
5786
5788 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5789 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
5790 OS, MapClassName2PassName);
5791 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
5792 : "<modify-cfg>");
5793}
5794
5796
5797namespace {
5798
5799/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
5800class SROALegacyPass : public FunctionPass {
5802
5803public:
5804 static char ID;
5805
5806 SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG)
5809 }
5810
5811 bool runOnFunction(Function &F) override {
5812 if (skipFunction(F))
5813 return false;
5814
5815 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5816 AssumptionCache &AC =
5817 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5818 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5819 auto [Changed, _] =
5820 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5821 return Changed;
5822 }
5823
5824 void getAnalysisUsage(AnalysisUsage &AU) const override {
5829 }
5830
5831 StringRef getPassName() const override { return "SROA"; }
5832};
5833
5834} // end anonymous namespace
5835
5836char SROALegacyPass::ID = 0;
5837
5839 return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG
5841}
5842
5843INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
5844 "Scalar Replacement Of Aggregates", false, false)
5847INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint32_t Index
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition: MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition: SROA.cpp:353
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1918
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
DbgVariableRecord * UnwrapDbgInstPtr(DbgInstPtr P, DbgVariableRecord *Unused)
Helpers for handling new and old debug info modes in migrateDebugInfo.
Definition: SROA.cpp:330
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition: SROA.cpp:1485
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:4186
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1928
static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI)
Definition: SROA.cpp:318
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy)
Definition: SROA.cpp:2233
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition: SROA.cpp:272
const Value * getAddress(const DbgVariableIntrinsic *DVI)
Definition: SROA.cpp:5023
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition: SROA.cpp:5082
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2517
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition: SROA.cpp:1907
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:2358
Scalar Replacement Of Aggregates
Definition: SROA.cpp:5847
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2550
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:1006
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition: SROA.cpp:1873
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition: SROA.cpp:1806
sroa
Definition: SROA.cpp:5847
static Value * foldSelectInst(SelectInst &SI)
Definition: SROA.cpp:993
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL)
Test whether a vector type is viable for promotion.
Definition: SROA.cpp:2118
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2572
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition: SROA.cpp:2451
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition: SROA.cpp:1625
bool isKillAddress(const DbgVariableIntrinsic *DVI)
Definition: SROA.cpp:5033
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition: SROA.cpp:1551
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2492
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:2044
static void insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new dbg.declare.
Definition: SROA.cpp:5146
const DIExpression * getAddressExpression(const DbgVariableIntrinsic *DVI)
Definition: SROA.cpp:5045
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition: SROA.cpp:1768
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:4224
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition: SROA.cpp:1706
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:1990
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition: SROA.cpp:2277
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy)
Test whether any vector type in CandidateTys is viable for promotion.
Definition: SROA.cpp:2147
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:261
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition: SROA.cpp:1018
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:1034
An iterator over partitions of the alloca's slices.
Definition: SROA.cpp:806
bool operator==(const partition_iterator &RHS) const
Definition: SROA.cpp:953
partition_iterator & operator++()
Definition: SROA.cpp:973
A basic LoadAndStorePromoter that does not remove store nodes.
Definition: SROA.cpp:5480
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition: SROA.cpp:5485
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition: SROA.cpp:5482
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition: SROA.cpp:5489
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
Definition: InstrTypes.h:1677
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1724
bool isDataOperand(const Use *U) const
Definition: InstrTypes.h:1246
This class represents a function call, abstracting a target machine's calling convention.
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
Definition: DIBuilder.cpp:974
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
std::optional< uint64_t > getSizeInBits() const
Determines the size of the variable's type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type.
Definition: DataLayout.h:445
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:421
This represents the llvm.dbg.assign instruction.
This represents the llvm.dbg.declare instruction.
DebugLoc getDebugLoc() const
Value * getValue(unsigned OpIdx=0) const
This is the common base class for debug info intrinsics for variables.
DILocalVariable * getVariable() const
DIExpression * getExpression() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
bool isKillAddress() const
Check whether this kills the address component.
Value * getValue(unsigned OpIdx=0) const
static DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DIExpression * getExpression() const
void setKillAddress()
Kill the address component.
DILocalVariable * getVariable() const
static DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
This class is used to track local variable information.
Definition: DwarfDebug.h:214
DILocation * getInlinedAt() const
Definition: DebugLoc.cpp:39
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
Represents flags for the getelementptr instruction/expression.
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition: Operator.cpp:113
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
Definition: Instructions.h:990
GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition: IRBuilder.h:60
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition: IRBuilder.h:64
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Base class for instruction visitors.
Definition: InstVisitor.h:78
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1764
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:368
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:390
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:54
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:151
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
void setAlignment(Align Align)
Definition: Instructions.h:215
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
Type * getPointerOperandType() const
Definition: Instructions.h:258
static unsigned getPointerOperandIndex()
Definition: Instructions.h:257
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
LLVMContext & getContext() const
Definition: Metadata.h:1233
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:294
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
PointerIntPair - This class implements a pair of a pointer and small integer.
void setPointer(PointerTy PtrVal) &
IntType getInt() const
void setInt(IntType IntVal) &
PointerTy getPointer() const
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
A base class for visitors over the uses of a pointer value.
void visitGetElementPtrInst(GetElementPtrInst &GEPI)
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitBitCastInst(BitCastInst &BC)
void visitIntrinsicInst(IntrinsicInst &II)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: SROA.cpp:5772
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SROA.cpp:5787
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition: SROA.cpp:5795
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getTrueValue() const
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
void setAlignment(Align Align)
Definition: Instructions.h:337
Value * getValueOperand()
Definition: Instructions.h:378
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
Value * getPointerOperand()
Definition: Instructions.h:381
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:571
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:347
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:297
static constexpr size_t npos
Definition: StringRef.h:53
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:253
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getSizeInBytes() const
Definition: DataLayout.h:574
unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
Definition: DataLayout.cpp:92
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
TypeSize getSizeInBits() const
Definition: DataLayout.h:576
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
element_iterator element_end() const
Definition: DerivedTypes.h:356
element_iterator element_begin() const
Definition: DerivedTypes.h:355
bool isPacked() const
Definition: DerivedTypes.h:284
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:366
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:353
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:261
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:243
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
Type * getArrayElementType() const
Definition: Type.h:411
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:295
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
op_iterator op_begin()
Definition: User.h:280
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
Value * getOperand(unsigned i) const
Definition: User.h:228
op_iterator op_end()
Definition: User.h:282
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:786
iterator_range< user_iterator > users()
Definition: Value.h:421
static void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition: Value.cpp:217
void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition: Value.cpp:209
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void setAborted(Instruction *I)
Mark the visit as aborted.
Definition: PtrUseVisitor.h:86
void setEscapedAndAborted(Instruction *I)
Mark the pointer as escaped, and the visit as aborted.
void setEscapedReadOnly(Instruction *I)
Mark the pointer as escaped into a readonly-nocapture call.
Definition: PtrUseVisitor.h:99
APInt Offset
The constant offset of the use if that is known.
void enqueueUsers(Value &I)
Enqueue the users of this instruction in the visit worklist.
bool IsOffsetKnown
True if we have a known constant offset for the use currently being visited.
PtrInfo PI
The info collected about the pointer being visited thus far.
Use * U
The use currently being visited.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1600
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1866
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1880
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition: Dwarf.h:149
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition: Dwarf.h:142
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition: Dwarf.h:148
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
TinyPtrVector< DbgDeclareInst * > findDbgDeclares(Value *V)
Finds dbg.declare intrinsics declaring local variables as living in the memory that 'V' points to.
Definition: DebugInfo.cpp:47
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto successors(const MachineBasicBlock *BB)
void * PointerTy
Definition: GenericValue.h:21
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3464
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1453
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:406
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:385
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition: DebugInfo.cpp:83
void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
void initializeSROALegacyPassPass(PassRegistry &)
bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
Definition: DebugInfo.cpp:2298
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
As above, for DVRDeclares.
Definition: DebugInfo.cpp:66
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition: SROA.cpp:5838
SROAOptions
Definition: SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NDEBUG
Definition: regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition: Metadata.h:814
AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Describes an element of a Bitfield.
Definition: Bitfields.h:223
Holds functions to get, set or test bitfields.
Definition: Bitfields.h:212
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254