LLVM 19.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
46#include "llvm/Config/llvm-config.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DIBuilder.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DebugInfo.h"
56#include "llvm/IR/Dominators.h"
57#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/Metadata.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PassManager.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
76#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
87#include <algorithm>
88#include <cassert>
89#include <cstddef>
90#include <cstdint>
91#include <cstring>
92#include <iterator>
93#include <string>
94#include <tuple>
95#include <utility>
96#include <variant>
97#include <vector>
98
99using namespace llvm;
100
101#define DEBUG_TYPE "sroa"
102
103STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
104STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
105STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
106STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
107STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
108STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
109STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
110STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
111STATISTIC(NumLoadsPredicated,
112 "Number of loads rewritten into predicated loads to allow promotion");
114 NumStoresPredicated,
115 "Number of stores rewritten into predicated loads to allow promotion");
116STATISTIC(NumDeleted, "Number of instructions deleted");
117STATISTIC(NumVectorized, "Number of vectorized aggregates");
118
119/// Hidden option to experiment with completely strict handling of inbounds
120/// GEPs.
121static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
122 cl::Hidden);
123/// Disable running mem2reg during SROA in order to test or debug SROA.
124static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
125 cl::Hidden);
126namespace {
127
128class AllocaSliceRewriter;
129class AllocaSlices;
130class Partition;
131
132class SelectHandSpeculativity {
133 unsigned char Storage = 0; // None are speculatable by default.
134 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
135 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
136public:
137 SelectHandSpeculativity() = default;
138 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
139 bool isSpeculatable(bool isTrueVal) const;
140 bool areAllSpeculatable() const;
141 bool areAnySpeculatable() const;
142 bool areNoneSpeculatable() const;
143 // For interop as int half of PointerIntPair.
144 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
145 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
146};
147static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
148
149using PossiblySpeculatableLoad =
151using UnspeculatableStore = StoreInst *;
152using RewriteableMemOp =
153 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
154using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
155
156/// An optimization pass providing Scalar Replacement of Aggregates.
157///
158/// This pass takes allocations which can be completely analyzed (that is, they
159/// don't escape) and tries to turn them into scalar SSA values. There are
160/// a few steps to this process.
161///
162/// 1) It takes allocations of aggregates and analyzes the ways in which they
163/// are used to try to split them into smaller allocations, ideally of
164/// a single scalar data type. It will split up memcpy and memset accesses
165/// as necessary and try to isolate individual scalar accesses.
166/// 2) It will transform accesses into forms which are suitable for SSA value
167/// promotion. This can be replacing a memset with a scalar store of an
168/// integer value, or it can involve speculating operations on a PHI or
169/// select to be a PHI or select of the results.
170/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
171/// onto insert and extract operations on a vector value, and convert them to
172/// this form. By doing so, it will enable promotion of vector aggregates to
173/// SSA vector values.
174class SROA {
175 LLVMContext *const C;
176 DomTreeUpdater *const DTU;
177 AssumptionCache *const AC;
178 const bool PreserveCFG;
179
180 /// Worklist of alloca instructions to simplify.
181 ///
182 /// Each alloca in the function is added to this. Each new alloca formed gets
183 /// added to it as well to recursively simplify unless that alloca can be
184 /// directly promoted. Finally, each time we rewrite a use of an alloca other
185 /// the one being actively rewritten, we add it back onto the list if not
186 /// already present to ensure it is re-visited.
188
189 /// A collection of instructions to delete.
190 /// We try to batch deletions to simplify code and make things a bit more
191 /// efficient. We also make sure there is no dangling pointers.
192 SmallVector<WeakVH, 8> DeadInsts;
193
194 /// Post-promotion worklist.
195 ///
196 /// Sometimes we discover an alloca which has a high probability of becoming
197 /// viable for SROA after a round of promotion takes place. In those cases,
198 /// the alloca is enqueued here for re-processing.
199 ///
200 /// Note that we have to be very careful to clear allocas out of this list in
201 /// the event they are deleted.
202 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
203
204 /// A collection of alloca instructions we can directly promote.
205 std::vector<AllocaInst *> PromotableAllocas;
206
207 /// A worklist of PHIs to speculate prior to promoting allocas.
208 ///
209 /// All of these PHIs have been checked for the safety of speculation and by
210 /// being speculated will allow promoting allocas currently in the promotable
211 /// queue.
212 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
213
214 /// A worklist of select instructions to rewrite prior to promoting
215 /// allocas.
217
218 /// Select instructions that use an alloca and are subsequently loaded can be
219 /// rewritten to load both input pointers and then select between the result,
220 /// allowing the load of the alloca to be promoted.
221 /// From this:
222 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
223 /// %V = load <type>, ptr %P2
224 /// to:
225 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
226 /// %V2 = load <type>, ptr %Other
227 /// %V = select i1 %cond, <type> %V1, <type> %V2
228 ///
229 /// We can do this to a select if its only uses are loads
230 /// and if either the operand to the select can be loaded unconditionally,
231 /// or if we are allowed to perform CFG modifications.
232 /// If found an intervening bitcast with a single use of the load,
233 /// allow the promotion.
234 static std::optional<RewriteableMemOps>
235 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
236
237public:
238 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
239 SROAOptions PreserveCFG_)
240 : C(C), DTU(DTU), AC(AC),
241 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
242
243 /// Main run method used by both the SROAPass and by the legacy pass.
244 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
245
246private:
247 friend class AllocaSliceRewriter;
248
249 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
250 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
251 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
252 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
253 void clobberUse(Use &U);
254 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
255 bool promoteAllocas(Function &F);
256};
257
258} // end anonymous namespace
259
260/// Calculate the fragment of a variable to use when slicing a store
261/// based on the slice dimensions, existing fragment, and base storage
262/// fragment.
263/// Results:
264/// UseFrag - Use Target as the new fragment.
265/// UseNoFrag - The new slice already covers the whole variable.
266/// Skip - The new alloca slice doesn't include this variable.
267/// FIXME: Can we use calculateFragmentIntersect instead?
268namespace {
269enum FragCalcResult { UseFrag, UseNoFrag, Skip };
270}
271static FragCalcResult
273 uint64_t NewStorageSliceOffsetInBits,
274 uint64_t NewStorageSliceSizeInBits,
275 std::optional<DIExpression::FragmentInfo> StorageFragment,
276 std::optional<DIExpression::FragmentInfo> CurrentFragment,
278 // If the base storage describes part of the variable apply the offset and
279 // the size constraint.
280 if (StorageFragment) {
281 Target.SizeInBits =
282 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
283 Target.OffsetInBits =
284 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
285 } else {
286 Target.SizeInBits = NewStorageSliceSizeInBits;
287 Target.OffsetInBits = NewStorageSliceOffsetInBits;
288 }
289
290 // If this slice extracts the entirety of an independent variable from a
291 // larger alloca, do not produce a fragment expression, as the variable is
292 // not fragmented.
293 if (!CurrentFragment) {
294 if (auto Size = Variable->getSizeInBits()) {
295 // Treat the current fragment as covering the whole variable.
296 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
297 if (Target == CurrentFragment)
298 return UseNoFrag;
299 }
300 }
301
302 // No additional work to do if there isn't a fragment already, or there is
303 // but it already exactly describes the new assignment.
304 if (!CurrentFragment || *CurrentFragment == Target)
305 return UseFrag;
306
307 // Reject the target fragment if it doesn't fit wholly within the current
308 // fragment. TODO: We could instead chop up the target to fit in the case of
309 // a partial overlap.
310 if (Target.startInBits() < CurrentFragment->startInBits() ||
311 Target.endInBits() > CurrentFragment->endInBits())
312 return Skip;
313
314 // Target fits within the current fragment, return it.
315 return UseFrag;
316}
317
319 return DebugVariable(DVI->getVariable(), std::nullopt,
320 DVI->getDebugLoc().getInlinedAt());
321}
323 return DebugVariable(DPV->getVariable(), std::nullopt,
324 DPV->getDebugLoc().getInlinedAt());
325}
326
327/// Helpers for handling new and old debug info modes in migrateDebugInfo.
328/// These overloads unwrap a DbgInstPtr {Instruction* | DbgRecord*} union based
329/// on the \p Unused parameter type.
331 (void)Unused;
332 return static_cast<DPValue *>(cast<DbgRecord *>(P));
333}
335 (void)Unused;
336 return static_cast<DbgAssignIntrinsic *>(cast<Instruction *>(P));
337}
338
339/// Find linked dbg.assign and generate a new one with the correct
340/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
341/// value component is copied from the old dbg.assign to the new.
342/// \param OldAlloca Alloca for the variable before splitting.
343/// \param IsSplit True if the store (not necessarily alloca)
344/// is being split.
345/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
346/// \param SliceSizeInBits New number of bits being written to.
347/// \param OldInst Instruction that is being split.
348/// \param Inst New instruction performing this part of the
349/// split store.
350/// \param Dest Store destination.
351/// \param Value Stored value.
352/// \param DL Datalayout.
353static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
354 uint64_t OldAllocaOffsetInBits,
355 uint64_t SliceSizeInBits, Instruction *OldInst,
356 Instruction *Inst, Value *Dest, Value *Value,
357 const DataLayout &DL) {
358 auto MarkerRange = at::getAssignmentMarkers(OldInst);
359 auto DPVAssignMarkerRange = at::getDPVAssignmentMarkers(OldInst);
360 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
361 if (MarkerRange.empty() && DPVAssignMarkerRange.empty())
362 return;
363
364 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
365 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
366 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
367 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
368 << "\n");
369 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
370 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
371 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
372 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
373 if (Value)
374 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
375
376 /// Map of aggregate variables to their fragment associated with OldAlloca.
378 BaseFragments;
379 for (auto *DAI : at::getAssignmentMarkers(OldAlloca))
380 BaseFragments[getAggregateVariable(DAI)] =
381 DAI->getExpression()->getFragmentInfo();
382 for (auto *DPV : at::getDPVAssignmentMarkers(OldAlloca))
383 BaseFragments[getAggregateVariable(DPV)] =
384 DPV->getExpression()->getFragmentInfo();
385
386 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
387 // one). It shouldn't already have one: assert this assumption.
388 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
389 DIAssignID *NewID = nullptr;
390 auto &Ctx = Inst->getContext();
391 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
392 assert(OldAlloca->isStaticAlloca());
393
394 auto MigrateDbgAssign = [&](auto *DbgAssign) {
395 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
396 << "\n");
397 auto *Expr = DbgAssign->getExpression();
398 bool SetKillLocation = false;
399
400 if (IsSplit) {
401 std::optional<DIExpression::FragmentInfo> BaseFragment;
402 {
403 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
404 if (R == BaseFragments.end())
405 return;
406 BaseFragment = R->second;
407 }
408 std::optional<DIExpression::FragmentInfo> CurrentFragment =
409 Expr->getFragmentInfo();
410 DIExpression::FragmentInfo NewFragment;
411 FragCalcResult Result = calculateFragment(
412 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
413 BaseFragment, CurrentFragment, NewFragment);
414
415 if (Result == Skip)
416 return;
417 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
418 if (CurrentFragment) {
419 // Rewrite NewFragment to be relative to the existing one (this is
420 // what createFragmentExpression wants). CalculateFragment has
421 // already resolved the size for us. FIXME: Should it return the
422 // relative fragment too?
423 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
424 }
425 // Add the new fragment info to the existing expression if possible.
427 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
428 Expr = *E;
429 } else {
430 // Otherwise, add the new fragment info to an empty expression and
431 // discard the value component of this dbg.assign as the value cannot
432 // be computed with the new fragment.
434 DIExpression::get(Expr->getContext(), std::nullopt),
435 NewFragment.OffsetInBits, NewFragment.SizeInBits);
436 SetKillLocation = true;
437 }
438 }
439 }
440
441 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
442 if (!NewID) {
443 NewID = DIAssignID::getDistinct(Ctx);
444 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 }
446
447 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
448 auto *NewAssign = UnwrapDbgInstPtr(
449 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
450 Dest,
451 DIExpression::get(Expr->getContext(), std::nullopt),
452 DbgAssign->getDebugLoc()),
453 DbgAssign);
454
455 // If we've updated the value but the original dbg.assign has an arglist
456 // then kill it now - we can't use the requested new value.
457 // We can't replace the DIArgList with the new value as it'd leave
458 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
459 // an arglist). And we can't keep the DIArgList in case the linked store
460 // is being split - in which case the DIArgList + expression may no longer
461 // be computing the correct value.
462 // This should be a very rare situation as it requires the value being
463 // stored to differ from the dbg.assign (i.e., the value has been
464 // represented differently in the debug intrinsic for some reason).
465 SetKillLocation |=
466 Value && (DbgAssign->hasArgList() ||
467 !DbgAssign->getExpression()->isSingleLocationExpression());
468 if (SetKillLocation)
469 NewAssign->setKillLocation();
470
471 // We could use more precision here at the cost of some additional (code)
472 // complexity - if the original dbg.assign was adjacent to its store, we
473 // could position this new dbg.assign adjacent to its store rather than the
474 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
475 // what we get now:
476 // split store !1
477 // split store !2
478 // dbg.assign !1
479 // dbg.assign !2
480 // This (current behaviour) results results in debug assignments being
481 // noted as slightly offset (in code) from the store. In practice this
482 // should have little effect on the debugging experience due to the fact
483 // that all the split stores should get the same line number.
484 NewAssign->moveBefore(DbgAssign);
485
486 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
487 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
488 };
489
490 for_each(MarkerRange, MigrateDbgAssign);
491 for_each(DPVAssignMarkerRange, MigrateDbgAssign);
492}
493
494namespace {
495
496/// A custom IRBuilder inserter which prefixes all names, but only in
497/// Assert builds.
498class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
499 std::string Prefix;
500
501 Twine getNameWithPrefix(const Twine &Name) const {
502 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
503 }
504
505public:
506 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
507
508 void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
509 BasicBlock::iterator InsertPt) const override {
510 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
511 InsertPt);
512 }
513};
514
515/// Provide a type for IRBuilder that drops names in release builds.
517
518/// A used slice of an alloca.
519///
520/// This structure represents a slice of an alloca used by some instruction. It
521/// stores both the begin and end offsets of this use, a pointer to the use
522/// itself, and a flag indicating whether we can classify the use as splittable
523/// or not when forming partitions of the alloca.
524class Slice {
525 /// The beginning offset of the range.
526 uint64_t BeginOffset = 0;
527
528 /// The ending offset, not included in the range.
529 uint64_t EndOffset = 0;
530
531 /// Storage for both the use of this slice and whether it can be
532 /// split.
533 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
534
535public:
536 Slice() = default;
537
538 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
539 : BeginOffset(BeginOffset), EndOffset(EndOffset),
540 UseAndIsSplittable(U, IsSplittable) {}
541
542 uint64_t beginOffset() const { return BeginOffset; }
543 uint64_t endOffset() const { return EndOffset; }
544
545 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
546 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
547
548 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
549
550 bool isDead() const { return getUse() == nullptr; }
551 void kill() { UseAndIsSplittable.setPointer(nullptr); }
552
553 /// Support for ordering ranges.
554 ///
555 /// This provides an ordering over ranges such that start offsets are
556 /// always increasing, and within equal start offsets, the end offsets are
557 /// decreasing. Thus the spanning range comes first in a cluster with the
558 /// same start position.
559 bool operator<(const Slice &RHS) const {
560 if (beginOffset() < RHS.beginOffset())
561 return true;
562 if (beginOffset() > RHS.beginOffset())
563 return false;
564 if (isSplittable() != RHS.isSplittable())
565 return !isSplittable();
566 if (endOffset() > RHS.endOffset())
567 return true;
568 return false;
569 }
570
571 /// Support comparison with a single offset to allow binary searches.
572 friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
573 uint64_t RHSOffset) {
574 return LHS.beginOffset() < RHSOffset;
575 }
576 friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
577 const Slice &RHS) {
578 return LHSOffset < RHS.beginOffset();
579 }
580
581 bool operator==(const Slice &RHS) const {
582 return isSplittable() == RHS.isSplittable() &&
583 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
584 }
585 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
586};
587
588/// Representation of the alloca slices.
589///
590/// This class represents the slices of an alloca which are formed by its
591/// various uses. If a pointer escapes, we can't fully build a representation
592/// for the slices used and we reflect that in this structure. The uses are
593/// stored, sorted by increasing beginning offset and with unsplittable slices
594/// starting at a particular offset before splittable slices.
595class AllocaSlices {
596public:
597 /// Construct the slices of a particular alloca.
598 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
599
600 /// Test whether a pointer to the allocation escapes our analysis.
601 ///
602 /// If this is true, the slices are never fully built and should be
603 /// ignored.
604 bool isEscaped() const { return PointerEscapingInstr; }
605
606 /// Support for iterating over the slices.
607 /// @{
608 using iterator = SmallVectorImpl<Slice>::iterator;
609 using range = iterator_range<iterator>;
610
611 iterator begin() { return Slices.begin(); }
612 iterator end() { return Slices.end(); }
613
615 using const_range = iterator_range<const_iterator>;
616
617 const_iterator begin() const { return Slices.begin(); }
618 const_iterator end() const { return Slices.end(); }
619 /// @}
620
621 /// Erase a range of slices.
622 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
623
624 /// Insert new slices for this alloca.
625 ///
626 /// This moves the slices into the alloca's slices collection, and re-sorts
627 /// everything so that the usual ordering properties of the alloca's slices
628 /// hold.
629 void insert(ArrayRef<Slice> NewSlices) {
630 int OldSize = Slices.size();
631 Slices.append(NewSlices.begin(), NewSlices.end());
632 auto SliceI = Slices.begin() + OldSize;
633 llvm::sort(SliceI, Slices.end());
634 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
635 }
636
637 // Forward declare the iterator and range accessor for walking the
638 // partitions.
639 class partition_iterator;
641
642 /// Access the dead users for this alloca.
643 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
644
645 /// Access Uses that should be dropped if the alloca is promotable.
646 ArrayRef<Use *> getDeadUsesIfPromotable() const {
647 return DeadUseIfPromotable;
648 }
649
650 /// Access the dead operands referring to this alloca.
651 ///
652 /// These are operands which have cannot actually be used to refer to the
653 /// alloca as they are outside its range and the user doesn't correct for
654 /// that. These mostly consist of PHI node inputs and the like which we just
655 /// need to replace with undef.
656 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
657
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
660 void printSlice(raw_ostream &OS, const_iterator I,
661 StringRef Indent = " ") const;
662 void printUse(raw_ostream &OS, const_iterator I,
663 StringRef Indent = " ") const;
664 void print(raw_ostream &OS) const;
665 void dump(const_iterator I) const;
666 void dump() const;
667#endif
668
669private:
670 template <typename DerivedT, typename RetT = void> class BuilderBase;
671 class SliceBuilder;
672
673 friend class AllocaSlices::SliceBuilder;
674
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
676 /// Handle to alloca instruction to simplify method interfaces.
677 AllocaInst &AI;
678#endif
679
680 /// The instruction responsible for this alloca not having a known set
681 /// of slices.
682 ///
683 /// When an instruction (potentially) escapes the pointer to the alloca, we
684 /// store a pointer to that here and abort trying to form slices of the
685 /// alloca. This will be null if the alloca slices are analyzed successfully.
686 Instruction *PointerEscapingInstr;
687
688 /// The slices of the alloca.
689 ///
690 /// We store a vector of the slices formed by uses of the alloca here. This
691 /// vector is sorted by increasing begin offset, and then the unsplittable
692 /// slices before the splittable ones. See the Slice inner class for more
693 /// details.
695
696 /// Instructions which will become dead if we rewrite the alloca.
697 ///
698 /// Note that these are not separated by slice. This is because we expect an
699 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
700 /// all these instructions can simply be removed and replaced with poison as
701 /// they come from outside of the allocated space.
703
704 /// Uses which will become dead if can promote the alloca.
705 SmallVector<Use *, 8> DeadUseIfPromotable;
706
707 /// Operands which will become dead if we rewrite the alloca.
708 ///
709 /// These are operands that in their particular use can be replaced with
710 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
711 /// to PHI nodes and the like. They aren't entirely dead (there might be
712 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
713 /// want to swap this particular input for poison to simplify the use lists of
714 /// the alloca.
715 SmallVector<Use *, 8> DeadOperands;
716};
717
718/// A partition of the slices.
719///
720/// An ephemeral representation for a range of slices which can be viewed as
721/// a partition of the alloca. This range represents a span of the alloca's
722/// memory which cannot be split, and provides access to all of the slices
723/// overlapping some part of the partition.
724///
725/// Objects of this type are produced by traversing the alloca's slices, but
726/// are only ephemeral and not persistent.
727class Partition {
728private:
729 friend class AllocaSlices;
731
732 using iterator = AllocaSlices::iterator;
733
734 /// The beginning and ending offsets of the alloca for this
735 /// partition.
736 uint64_t BeginOffset = 0, EndOffset = 0;
737
738 /// The start and end iterators of this partition.
739 iterator SI, SJ;
740
741 /// A collection of split slice tails overlapping the partition.
742 SmallVector<Slice *, 4> SplitTails;
743
744 /// Raw constructor builds an empty partition starting and ending at
745 /// the given iterator.
746 Partition(iterator SI) : SI(SI), SJ(SI) {}
747
748public:
749 /// The start offset of this partition.
750 ///
751 /// All of the contained slices start at or after this offset.
752 uint64_t beginOffset() const { return BeginOffset; }
753
754 /// The end offset of this partition.
755 ///
756 /// All of the contained slices end at or before this offset.
757 uint64_t endOffset() const { return EndOffset; }
758
759 /// The size of the partition.
760 ///
761 /// Note that this can never be zero.
762 uint64_t size() const {
763 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
764 return EndOffset - BeginOffset;
765 }
766
767 /// Test whether this partition contains no slices, and merely spans
768 /// a region occupied by split slices.
769 bool empty() const { return SI == SJ; }
770
771 /// \name Iterate slices that start within the partition.
772 /// These may be splittable or unsplittable. They have a begin offset >= the
773 /// partition begin offset.
774 /// @{
775 // FIXME: We should probably define a "concat_iterator" helper and use that
776 // to stitch together pointee_iterators over the split tails and the
777 // contiguous iterators of the partition. That would give a much nicer
778 // interface here. We could then additionally expose filtered iterators for
779 // split, unsplit, and unsplittable splices based on the usage patterns.
780 iterator begin() const { return SI; }
781 iterator end() const { return SJ; }
782 /// @}
783
784 /// Get the sequence of split slice tails.
785 ///
786 /// These tails are of slices which start before this partition but are
787 /// split and overlap into the partition. We accumulate these while forming
788 /// partitions.
789 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
790};
791
792} // end anonymous namespace
793
794/// An iterator over partitions of the alloca's slices.
795///
796/// This iterator implements the core algorithm for partitioning the alloca's
797/// slices. It is a forward iterator as we don't support backtracking for
798/// efficiency reasons, and re-use a single storage area to maintain the
799/// current set of split slices.
800///
801/// It is templated on the slice iterator type to use so that it can operate
802/// with either const or non-const slice iterators.
804 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
805 Partition> {
806 friend class AllocaSlices;
807
808 /// Most of the state for walking the partitions is held in a class
809 /// with a nice interface for examining them.
810 Partition P;
811
812 /// We need to keep the end of the slices to know when to stop.
813 AllocaSlices::iterator SE;
814
815 /// We also need to keep track of the maximum split end offset seen.
816 /// FIXME: Do we really?
817 uint64_t MaxSplitSliceEndOffset = 0;
818
819 /// Sets the partition to be empty at given iterator, and sets the
820 /// end iterator.
821 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
822 : P(SI), SE(SE) {
823 // If not already at the end, advance our state to form the initial
824 // partition.
825 if (SI != SE)
826 advance();
827 }
828
829 /// Advance the iterator to the next partition.
830 ///
831 /// Requires that the iterator not be at the end of the slices.
832 void advance() {
833 assert((P.SI != SE || !P.SplitTails.empty()) &&
834 "Cannot advance past the end of the slices!");
835
836 // Clear out any split uses which have ended.
837 if (!P.SplitTails.empty()) {
838 if (P.EndOffset >= MaxSplitSliceEndOffset) {
839 // If we've finished all splits, this is easy.
840 P.SplitTails.clear();
841 MaxSplitSliceEndOffset = 0;
842 } else {
843 // Remove the uses which have ended in the prior partition. This
844 // cannot change the max split slice end because we just checked that
845 // the prior partition ended prior to that max.
846 llvm::erase_if(P.SplitTails,
847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
848 assert(llvm::any_of(P.SplitTails,
849 [&](Slice *S) {
850 return S->endOffset() == MaxSplitSliceEndOffset;
851 }) &&
852 "Could not find the current max split slice offset!");
853 assert(llvm::all_of(P.SplitTails,
854 [&](Slice *S) {
855 return S->endOffset() <= MaxSplitSliceEndOffset;
856 }) &&
857 "Max split slice end offset is not actually the max!");
858 }
859 }
860
861 // If P.SI is already at the end, then we've cleared the split tail and
862 // now have an end iterator.
863 if (P.SI == SE) {
864 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
865 return;
866 }
867
868 // If we had a non-empty partition previously, set up the state for
869 // subsequent partitions.
870 if (P.SI != P.SJ) {
871 // Accumulate all the splittable slices which started in the old
872 // partition into the split list.
873 for (Slice &S : P)
874 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
875 P.SplitTails.push_back(&S);
876 MaxSplitSliceEndOffset =
877 std::max(S.endOffset(), MaxSplitSliceEndOffset);
878 }
879
880 // Start from the end of the previous partition.
881 P.SI = P.SJ;
882
883 // If P.SI is now at the end, we at most have a tail of split slices.
884 if (P.SI == SE) {
885 P.BeginOffset = P.EndOffset;
886 P.EndOffset = MaxSplitSliceEndOffset;
887 return;
888 }
889
890 // If the we have split slices and the next slice is after a gap and is
891 // not splittable immediately form an empty partition for the split
892 // slices up until the next slice begins.
893 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
894 !P.SI->isSplittable()) {
895 P.BeginOffset = P.EndOffset;
896 P.EndOffset = P.SI->beginOffset();
897 return;
898 }
899 }
900
901 // OK, we need to consume new slices. Set the end offset based on the
902 // current slice, and step SJ past it. The beginning offset of the
903 // partition is the beginning offset of the next slice unless we have
904 // pre-existing split slices that are continuing, in which case we begin
905 // at the prior end offset.
906 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
907 P.EndOffset = P.SI->endOffset();
908 ++P.SJ;
909
910 // There are two strategies to form a partition based on whether the
911 // partition starts with an unsplittable slice or a splittable slice.
912 if (!P.SI->isSplittable()) {
913 // When we're forming an unsplittable region, it must always start at
914 // the first slice and will extend through its end.
915 assert(P.BeginOffset == P.SI->beginOffset());
916
917 // Form a partition including all of the overlapping slices with this
918 // unsplittable slice.
919 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
920 if (!P.SJ->isSplittable())
921 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
922 ++P.SJ;
923 }
924
925 // We have a partition across a set of overlapping unsplittable
926 // partitions.
927 return;
928 }
929
930 // If we're starting with a splittable slice, then we need to form
931 // a synthetic partition spanning it and any other overlapping splittable
932 // splices.
933 assert(P.SI->isSplittable() && "Forming a splittable partition!");
934
935 // Collect all of the overlapping splittable slices.
936 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
937 P.SJ->isSplittable()) {
938 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
939 ++P.SJ;
940 }
941
942 // Back upiP.EndOffset if we ended the span early when encountering an
943 // unsplittable slice. This synthesizes the early end offset of
944 // a partition spanning only splittable slices.
945 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
946 assert(!P.SJ->isSplittable());
947 P.EndOffset = P.SJ->beginOffset();
948 }
949 }
950
951public:
952 bool operator==(const partition_iterator &RHS) const {
953 assert(SE == RHS.SE &&
954 "End iterators don't match between compared partition iterators!");
955
956 // The observed positions of partitions is marked by the P.SI iterator and
957 // the emptiness of the split slices. The latter is only relevant when
958 // P.SI == SE, as the end iterator will additionally have an empty split
959 // slices list, but the prior may have the same P.SI and a tail of split
960 // slices.
961 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
962 assert(P.SJ == RHS.P.SJ &&
963 "Same set of slices formed two different sized partitions!");
964 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
965 "Same slice position with differently sized non-empty split "
966 "slice tails!");
967 return true;
968 }
969 return false;
970 }
971
973 advance();
974 return *this;
975 }
976
977 Partition &operator*() { return P; }
978};
979
980/// A forward range over the partitions of the alloca's slices.
981///
982/// This accesses an iterator range over the partitions of the alloca's
983/// slices. It computes these partitions on the fly based on the overlapping
984/// offsets of the slices and the ability to split them. It will visit "empty"
985/// partitions to cover regions of the alloca only accessed via split
986/// slices.
987iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
988 return make_range(partition_iterator(begin(), end()),
989 partition_iterator(end(), end()));
990}
991
993 // If the condition being selected on is a constant or the same value is
994 // being selected between, fold the select. Yes this does (rarely) happen
995 // early on.
996 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
997 return SI.getOperand(1 + CI->isZero());
998 if (SI.getOperand(1) == SI.getOperand(2))
999 return SI.getOperand(1);
1000
1001 return nullptr;
1002}
1003
1004/// A helper that folds a PHI node or a select.
1006 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1007 // If PN merges together the same value, return that value.
1008 return PN->hasConstantValue();
1009 }
1010 return foldSelectInst(cast<SelectInst>(I));
1011}
1012
1013/// Builder for the alloca slices.
1014///
1015/// This class builds a set of alloca slices by recursively visiting the uses
1016/// of an alloca and making a slice for each load and store at each offset.
1017class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1018 friend class PtrUseVisitor<SliceBuilder>;
1019 friend class InstVisitor<SliceBuilder>;
1020
1022
1023 const uint64_t AllocSize;
1024 AllocaSlices &AS;
1025
1026 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1028
1029 /// Set to de-duplicate dead instructions found in the use walk.
1030 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1031
1032public:
1033 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1035 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1036 AS(AS) {}
1037
1038private:
1039 void markAsDead(Instruction &I) {
1040 if (VisitedDeadInsts.insert(&I).second)
1041 AS.DeadUsers.push_back(&I);
1042 }
1043
1044 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1045 bool IsSplittable = false) {
1046 // Completely skip uses which have a zero size or start either before or
1047 // past the end of the allocation.
1048 if (Size == 0 || Offset.uge(AllocSize)) {
1049 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1050 << Offset
1051 << " which has zero size or starts outside of the "
1052 << AllocSize << " byte alloca:\n"
1053 << " alloca: " << AS.AI << "\n"
1054 << " use: " << I << "\n");
1055 return markAsDead(I);
1056 }
1057
1058 uint64_t BeginOffset = Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset + Size;
1060
1061 // Clamp the end offset to the end of the allocation. Note that this is
1062 // formulated to handle even the case where "BeginOffset + Size" overflows.
1063 // This may appear superficially to be something we could ignore entirely,
1064 // but that is not so! There may be widened loads or PHI-node uses where
1065 // some instructions are dead but not others. We can't completely ignore
1066 // them, and so have to record at least the information here.
1067 assert(AllocSize >= BeginOffset); // Established above.
1068 if (Size > AllocSize - BeginOffset) {
1069 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1070 << Offset << " to remain within the " << AllocSize
1071 << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 EndOffset = AllocSize;
1075 }
1076
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1078 }
1079
1080 void visitBitCastInst(BitCastInst &BC) {
1081 if (BC.use_empty())
1082 return markAsDead(BC);
1083
1084 return Base::visitBitCastInst(BC);
1085 }
1086
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1088 if (ASC.use_empty())
1089 return markAsDead(ASC);
1090
1091 return Base::visitAddrSpaceCastInst(ASC);
1092 }
1093
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1095 if (GEPI.use_empty())
1096 return markAsDead(GEPI);
1097
1098 if (SROAStrictInbounds && GEPI.isInBounds()) {
1099 // FIXME: This is a manually un-factored variant of the basic code inside
1100 // of GEPs with checking of the inbounds invariant specified in the
1101 // langref in a very strict sense. If we ever want to enable
1102 // SROAStrictInbounds, this code should be factored cleanly into
1103 // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
1104 // by writing out the code here where we have the underlying allocation
1105 // size readily available.
1106 APInt GEPOffset = Offset;
1107 const DataLayout &DL = GEPI.getModule()->getDataLayout();
1108 for (gep_type_iterator GTI = gep_type_begin(GEPI),
1109 GTE = gep_type_end(GEPI);
1110 GTI != GTE; ++GTI) {
1111 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1112 if (!OpC)
1113 break;
1114
1115 // Handle a struct index, which adds its field offset to the pointer.
1116 if (StructType *STy = GTI.getStructTypeOrNull()) {
1117 unsigned ElementIdx = OpC->getZExtValue();
1118 const StructLayout *SL = DL.getStructLayout(STy);
1119 GEPOffset +=
1120 APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
1121 } else {
1122 // For array or vector indices, scale the index by the size of the
1123 // type.
1125 GEPOffset += Index * APInt(Offset.getBitWidth(),
1126 GTI.getSequentialElementStride(DL));
1127 }
1128
1129 // If this index has computed an intermediate pointer which is not
1130 // inbounds, then the result of the GEP is a poison value and we can
1131 // delete it and all uses.
1132 if (GEPOffset.ugt(AllocSize))
1133 return markAsDead(GEPI);
1134 }
1135 }
1136
1137 return Base::visitGetElementPtrInst(GEPI);
1138 }
1139
1140 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1141 uint64_t Size, bool IsVolatile) {
1142 // We allow splitting of non-volatile loads and stores where the type is an
1143 // integer type. These may be used to implement 'memcpy' or other "transfer
1144 // of bits" patterns.
1145 bool IsSplittable =
1147
1148 insertUse(I, Offset, Size, IsSplittable);
1149 }
1150
1151 void visitLoadInst(LoadInst &LI) {
1152 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1153 "All simple FCA loads should have been pre-split");
1154
1155 if (!IsOffsetKnown)
1156 return PI.setAborted(&LI);
1157
1159 if (Size.isScalable())
1160 return PI.setAborted(&LI);
1161
1162 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1163 LI.isVolatile());
1164 }
1165
1166 void visitStoreInst(StoreInst &SI) {
1167 Value *ValOp = SI.getValueOperand();
1168 if (ValOp == *U)
1169 return PI.setEscapedAndAborted(&SI);
1170 if (!IsOffsetKnown)
1171 return PI.setAborted(&SI);
1172
1173 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1174 if (StoreSize.isScalable())
1175 return PI.setAborted(&SI);
1176
1177 uint64_t Size = StoreSize.getFixedValue();
1178
1179 // If this memory access can be shown to *statically* extend outside the
1180 // bounds of the allocation, it's behavior is undefined, so simply
1181 // ignore it. Note that this is more strict than the generic clamping
1182 // behavior of insertUse. We also try to handle cases which might run the
1183 // risk of overflow.
1184 // FIXME: We should instead consider the pointer to have escaped if this
1185 // function is being instrumented for addressing bugs or race conditions.
1186 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1187 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1188 << Offset << " which extends past the end of the "
1189 << AllocSize << " byte alloca:\n"
1190 << " alloca: " << AS.AI << "\n"
1191 << " use: " << SI << "\n");
1192 return markAsDead(SI);
1193 }
1194
1195 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1196 "All simple FCA stores should have been pre-split");
1197 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1198 }
1199
1200 void visitMemSetInst(MemSetInst &II) {
1201 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1202 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1203 if ((Length && Length->getValue() == 0) ||
1204 (IsOffsetKnown && Offset.uge(AllocSize)))
1205 // Zero-length mem transfer intrinsics can be ignored entirely.
1206 return markAsDead(II);
1207
1208 if (!IsOffsetKnown)
1209 return PI.setAborted(&II);
1210
1211 insertUse(II, Offset,
1212 Length ? Length->getLimitedValue()
1213 : AllocSize - Offset.getLimitedValue(),
1214 (bool)Length);
1215 }
1216
1217 void visitMemTransferInst(MemTransferInst &II) {
1218 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1219 if (Length && Length->getValue() == 0)
1220 // Zero-length mem transfer intrinsics can be ignored entirely.
1221 return markAsDead(II);
1222
1223 // Because we can visit these intrinsics twice, also check to see if the
1224 // first time marked this instruction as dead. If so, skip it.
1225 if (VisitedDeadInsts.count(&II))
1226 return;
1227
1228 if (!IsOffsetKnown)
1229 return PI.setAborted(&II);
1230
1231 // This side of the transfer is completely out-of-bounds, and so we can
1232 // nuke the entire transfer. However, we also need to nuke the other side
1233 // if already added to our partitions.
1234 // FIXME: Yet another place we really should bypass this when
1235 // instrumenting for ASan.
1236 if (Offset.uge(AllocSize)) {
1238 MemTransferSliceMap.find(&II);
1239 if (MTPI != MemTransferSliceMap.end())
1240 AS.Slices[MTPI->second].kill();
1241 return markAsDead(II);
1242 }
1243
1244 uint64_t RawOffset = Offset.getLimitedValue();
1245 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1246
1247 // Check for the special case where the same exact value is used for both
1248 // source and dest.
1249 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1250 // For non-volatile transfers this is a no-op.
1251 if (!II.isVolatile())
1252 return markAsDead(II);
1253
1254 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1255 }
1256
1257 // If we have seen both source and destination for a mem transfer, then
1258 // they both point to the same alloca.
1259 bool Inserted;
1261 std::tie(MTPI, Inserted) =
1262 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1263 unsigned PrevIdx = MTPI->second;
1264 if (!Inserted) {
1265 Slice &PrevP = AS.Slices[PrevIdx];
1266
1267 // Check if the begin offsets match and this is a non-volatile transfer.
1268 // In that case, we can completely elide the transfer.
1269 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1270 PrevP.kill();
1271 return markAsDead(II);
1272 }
1273
1274 // Otherwise we have an offset transfer within the same alloca. We can't
1275 // split those.
1276 PrevP.makeUnsplittable();
1277 }
1278
1279 // Insert the use now that we've fixed up the splittable nature.
1280 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1281
1282 // Check that we ended up with a valid index in the map.
1283 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1284 "Map index doesn't point back to a slice with this user.");
1285 }
1286
1287 // Disable SRoA for any intrinsics except for lifetime invariants and
1288 // invariant group.
1289 // FIXME: What about debug intrinsics? This matches old behavior, but
1290 // doesn't make sense.
1291 void visitIntrinsicInst(IntrinsicInst &II) {
1292 if (II.isDroppable()) {
1293 AS.DeadUseIfPromotable.push_back(U);
1294 return;
1295 }
1296
1297 if (!IsOffsetKnown)
1298 return PI.setAborted(&II);
1299
1300 if (II.isLifetimeStartOrEnd()) {
1301 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
1302 uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
1303 Length->getLimitedValue());
1304 insertUse(II, Offset, Size, true);
1305 return;
1306 }
1307
1309 insertUse(II, Offset, AllocSize, true);
1310 enqueueUsers(II);
1311 return;
1312 }
1313
1315 }
1316
1317 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1318 // We consider any PHI or select that results in a direct load or store of
1319 // the same offset to be a viable use for slicing purposes. These uses
1320 // are considered unsplittable and the size is the maximum loaded or stored
1321 // size.
1324 Visited.insert(Root);
1325 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1326 const DataLayout &DL = Root->getModule()->getDataLayout();
1327 // If there are no loads or stores, the access is dead. We mark that as
1328 // a size zero access.
1329 Size = 0;
1330 do {
1331 Instruction *I, *UsedI;
1332 std::tie(UsedI, I) = Uses.pop_back_val();
1333
1334 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1335 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1336 if (LoadSize.isScalable()) {
1337 PI.setAborted(LI);
1338 return nullptr;
1339 }
1340 Size = std::max(Size, LoadSize.getFixedValue());
1341 continue;
1342 }
1343 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1344 Value *Op = SI->getOperand(0);
1345 if (Op == UsedI)
1346 return SI;
1347 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1348 if (StoreSize.isScalable()) {
1349 PI.setAborted(SI);
1350 return nullptr;
1351 }
1352 Size = std::max(Size, StoreSize.getFixedValue());
1353 continue;
1354 }
1355
1356 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1357 if (!GEP->hasAllZeroIndices())
1358 return GEP;
1359 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1360 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
1361 return I;
1362 }
1363
1364 for (User *U : I->users())
1365 if (Visited.insert(cast<Instruction>(U)).second)
1366 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1367 } while (!Uses.empty());
1368
1369 return nullptr;
1370 }
1371
1372 void visitPHINodeOrSelectInst(Instruction &I) {
1373 assert(isa<PHINode>(I) || isa<SelectInst>(I));
1374 if (I.use_empty())
1375 return markAsDead(I);
1376
1377 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1378 // instructions in this BB, which may be required during rewriting. Bail out
1379 // on these cases.
1380 if (isa<PHINode>(I) &&
1381 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
1382 return PI.setAborted(&I);
1383
1384 // TODO: We could use simplifyInstruction here to fold PHINodes and
1385 // SelectInsts. However, doing so requires to change the current
1386 // dead-operand-tracking mechanism. For instance, suppose neither loading
1387 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1388 // trap either. However, if we simply replace %U with undef using the
1389 // current dead-operand-tracking mechanism, "load (select undef, undef,
1390 // %other)" may trap because the select may return the first operand
1391 // "undef".
1392 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1393 if (Result == *U)
1394 // If the result of the constant fold will be the pointer, recurse
1395 // through the PHI/select as if we had RAUW'ed it.
1396 enqueueUsers(I);
1397 else
1398 // Otherwise the operand to the PHI/select is dead, and we can replace
1399 // it with poison.
1400 AS.DeadOperands.push_back(U);
1401
1402 return;
1403 }
1404
1405 if (!IsOffsetKnown)
1406 return PI.setAborted(&I);
1407
1408 // See if we already have computed info on this node.
1409 uint64_t &Size = PHIOrSelectSizes[&I];
1410 if (!Size) {
1411 // This is a new PHI/Select, check for an unsafe use of it.
1412 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1413 return PI.setAborted(UnsafeI);
1414 }
1415
1416 // For PHI and select operands outside the alloca, we can't nuke the entire
1417 // phi or select -- the other side might still be relevant, so we special
1418 // case them here and use a separate structure to track the operands
1419 // themselves which should be replaced with poison.
1420 // FIXME: This should instead be escaped in the event we're instrumenting
1421 // for address sanitization.
1422 if (Offset.uge(AllocSize)) {
1423 AS.DeadOperands.push_back(U);
1424 return;
1425 }
1426
1427 insertUse(I, Offset, Size);
1428 }
1429
1430 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1431
1432 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1433
1434 /// Disable SROA entirely if there are unhandled users of the alloca.
1435 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1436};
1437
1438AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1439 :
1440#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1441 AI(AI),
1442#endif
1443 PointerEscapingInstr(nullptr) {
1444 SliceBuilder PB(DL, AI, *this);
1445 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1446 if (PtrI.isEscaped() || PtrI.isAborted()) {
1447 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1448 // possibly by just storing the PtrInfo in the AllocaSlices.
1449 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1450 : PtrI.getAbortingInst();
1451 assert(PointerEscapingInstr && "Did not track a bad instruction");
1452 return;
1453 }
1454
1455 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1456
1457 // Sort the uses. This arranges for the offsets to be in ascending order,
1458 // and the sizes to be in descending order.
1459 llvm::stable_sort(Slices);
1460}
1461
1462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1463
1464void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1465 StringRef Indent) const {
1466 printSlice(OS, I, Indent);
1467 OS << "\n";
1468 printUse(OS, I, Indent);
1469}
1470
1471void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1472 StringRef Indent) const {
1473 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1474 << " slice #" << (I - begin())
1475 << (I->isSplittable() ? " (splittable)" : "");
1476}
1477
1478void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1479 StringRef Indent) const {
1480 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1481}
1482
1483void AllocaSlices::print(raw_ostream &OS) const {
1484 if (PointerEscapingInstr) {
1485 OS << "Can't analyze slices for alloca: " << AI << "\n"
1486 << " A pointer to this alloca escaped by:\n"
1487 << " " << *PointerEscapingInstr << "\n";
1488 return;
1489 }
1490
1491 OS << "Slices of alloca: " << AI << "\n";
1492 for (const_iterator I = begin(), E = end(); I != E; ++I)
1493 print(OS, I);
1494}
1495
1496LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1497 print(dbgs(), I);
1498}
1499LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1500
1501#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1502
1503/// Walk the range of a partitioning looking for a common type to cover this
1504/// sequence of slices.
1505static std::pair<Type *, IntegerType *>
1506findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1507 uint64_t EndOffset) {
1508 Type *Ty = nullptr;
1509 bool TyIsCommon = true;
1510 IntegerType *ITy = nullptr;
1511
1512 // Note that we need to look at *every* alloca slice's Use to ensure we
1513 // always get consistent results regardless of the order of slices.
1514 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1515 Use *U = I->getUse();
1516 if (isa<IntrinsicInst>(*U->getUser()))
1517 continue;
1518 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1519 continue;
1520
1521 Type *UserTy = nullptr;
1522 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1523 UserTy = LI->getType();
1524 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1525 UserTy = SI->getValueOperand()->getType();
1526 }
1527
1528 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1529 // If the type is larger than the partition, skip it. We only encounter
1530 // this for split integer operations where we want to use the type of the
1531 // entity causing the split. Also skip if the type is not a byte width
1532 // multiple.
1533 if (UserITy->getBitWidth() % 8 != 0 ||
1534 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1535 continue;
1536
1537 // Track the largest bitwidth integer type used in this way in case there
1538 // is no common type.
1539 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1540 ITy = UserITy;
1541 }
1542
1543 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1544 // depend on types skipped above.
1545 if (!UserTy || (Ty && Ty != UserTy))
1546 TyIsCommon = false; // Give up on anything but an iN type.
1547 else
1548 Ty = UserTy;
1549 }
1550
1551 return {TyIsCommon ? Ty : nullptr, ITy};
1552}
1553
1554/// PHI instructions that use an alloca and are subsequently loaded can be
1555/// rewritten to load both input pointers in the pred blocks and then PHI the
1556/// results, allowing the load of the alloca to be promoted.
1557/// From this:
1558/// %P2 = phi [i32* %Alloca, i32* %Other]
1559/// %V = load i32* %P2
1560/// to:
1561/// %V1 = load i32* %Alloca -> will be mem2reg'd
1562/// ...
1563/// %V2 = load i32* %Other
1564/// ...
1565/// %V = phi [i32 %V1, i32 %V2]
1566///
1567/// We can do this to a select if its only uses are loads and if the operands
1568/// to the select can be loaded unconditionally.
1569///
1570/// FIXME: This should be hoisted into a generic utility, likely in
1571/// Transforms/Util/Local.h
1573 const DataLayout &DL = PN.getModule()->getDataLayout();
1574
1575 // For now, we can only do this promotion if the load is in the same block
1576 // as the PHI, and if there are no stores between the phi and load.
1577 // TODO: Allow recursive phi users.
1578 // TODO: Allow stores.
1579 BasicBlock *BB = PN.getParent();
1580 Align MaxAlign;
1581 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1582 Type *LoadType = nullptr;
1583 for (User *U : PN.users()) {
1584 LoadInst *LI = dyn_cast<LoadInst>(U);
1585 if (!LI || !LI->isSimple())
1586 return false;
1587
1588 // For now we only allow loads in the same block as the PHI. This is
1589 // a common case that happens when instcombine merges two loads through
1590 // a PHI.
1591 if (LI->getParent() != BB)
1592 return false;
1593
1594 if (LoadType) {
1595 if (LoadType != LI->getType())
1596 return false;
1597 } else {
1598 LoadType = LI->getType();
1599 }
1600
1601 // Ensure that there are no instructions between the PHI and the load that
1602 // could store.
1603 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1604 if (BBI->mayWriteToMemory())
1605 return false;
1606
1607 MaxAlign = std::max(MaxAlign, LI->getAlign());
1608 }
1609
1610 if (!LoadType)
1611 return false;
1612
1613 APInt LoadSize =
1614 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1615
1616 // We can only transform this if it is safe to push the loads into the
1617 // predecessor blocks. The only thing to watch out for is that we can't put
1618 // a possibly trapping load in the predecessor if it is a critical edge.
1619 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1621 Value *InVal = PN.getIncomingValue(Idx);
1622
1623 // If the value is produced by the terminator of the predecessor (an
1624 // invoke) or it has side-effects, there is no valid place to put a load
1625 // in the predecessor.
1626 if (TI == InVal || TI->mayHaveSideEffects())
1627 return false;
1628
1629 // If the predecessor has a single successor, then the edge isn't
1630 // critical.
1631 if (TI->getNumSuccessors() == 1)
1632 continue;
1633
1634 // If this pointer is always safe to load, or if we can prove that there
1635 // is already a load in the block, then we can move the load to the pred
1636 // block.
1637 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1638 continue;
1639
1640 return false;
1641 }
1642
1643 return true;
1644}
1645
1646static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1647 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1648
1649 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1650 Type *LoadTy = SomeLoad->getType();
1651 IRB.SetInsertPoint(&PN);
1652 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1653 PN.getName() + ".sroa.speculated");
1654
1655 // Get the AA tags and alignment to use from one of the loads. It does not
1656 // matter which one we get and if any differ.
1657 AAMDNodes AATags = SomeLoad->getAAMetadata();
1658 Align Alignment = SomeLoad->getAlign();
1659
1660 // Rewrite all loads of the PN to use the new PHI.
1661 while (!PN.use_empty()) {
1662 LoadInst *LI = cast<LoadInst>(PN.user_back());
1663 LI->replaceAllUsesWith(NewPN);
1664 LI->eraseFromParent();
1665 }
1666
1667 // Inject loads into all of the pred blocks.
1668 DenseMap<BasicBlock *, Value *> InjectedLoads;
1669 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1670 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1671 Value *InVal = PN.getIncomingValue(Idx);
1672
1673 // A PHI node is allowed to have multiple (duplicated) entries for the same
1674 // basic block, as long as the value is the same. So if we already injected
1675 // a load in the predecessor, then we should reuse the same load for all
1676 // duplicated entries.
1677 if (Value *V = InjectedLoads.lookup(Pred)) {
1678 NewPN->addIncoming(V, Pred);
1679 continue;
1680 }
1681
1682 Instruction *TI = Pred->getTerminator();
1683 IRB.SetInsertPoint(TI);
1684
1685 LoadInst *Load = IRB.CreateAlignedLoad(
1686 LoadTy, InVal, Alignment,
1687 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1688 ++NumLoadsSpeculated;
1689 if (AATags)
1690 Load->setAAMetadata(AATags);
1691 NewPN->addIncoming(Load, Pred);
1692 InjectedLoads[Pred] = Load;
1693 }
1694
1695 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1696 PN.eraseFromParent();
1697}
1698
1699SelectHandSpeculativity &
1700SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1701 if (isTrueVal)
1702 Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage, true);
1703 else
1704 Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage, true);
1705 return *this;
1706}
1707
1708bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1709 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1710 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1711}
1712
1713bool SelectHandSpeculativity::areAllSpeculatable() const {
1714 return isSpeculatable(/*isTrueVal=*/true) &&
1715 isSpeculatable(/*isTrueVal=*/false);
1716}
1717
1718bool SelectHandSpeculativity::areAnySpeculatable() const {
1719 return isSpeculatable(/*isTrueVal=*/true) ||
1720 isSpeculatable(/*isTrueVal=*/false);
1721}
1722bool SelectHandSpeculativity::areNoneSpeculatable() const {
1723 return !areAnySpeculatable();
1724}
1725
1726static SelectHandSpeculativity
1728 assert(LI.isSimple() && "Only for simple loads");
1729 SelectHandSpeculativity Spec;
1730
1731 const DataLayout &DL = SI.getModule()->getDataLayout();
1732 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1734 &LI))
1735 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1736 else if (PreserveCFG)
1737 return Spec;
1738
1739 return Spec;
1740}
1741
1742std::optional<RewriteableMemOps>
1743SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1744 RewriteableMemOps Ops;
1745
1746 for (User *U : SI.users()) {
1747 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1748 U = *BC->user_begin();
1749
1750 if (auto *Store = dyn_cast<StoreInst>(U)) {
1751 // Note that atomic stores can be transformed; atomic semantics do not
1752 // have any meaning for a local alloca. Stores are not speculatable,
1753 // however, so if we can't turn it into a predicated store, we are done.
1754 if (Store->isVolatile() || PreserveCFG)
1755 return {}; // Give up on this `select`.
1756 Ops.emplace_back(Store);
1757 continue;
1758 }
1759
1760 auto *LI = dyn_cast<LoadInst>(U);
1761
1762 // Note that atomic loads can be transformed;
1763 // atomic semantics do not have any meaning for a local alloca.
1764 if (!LI || LI->isVolatile())
1765 return {}; // Give up on this `select`.
1766
1767 PossiblySpeculatableLoad Load(LI);
1768 if (!LI->isSimple()) {
1769 // If the `load` is not simple, we can't speculatively execute it,
1770 // but we could handle this via a CFG modification. But can we?
1771 if (PreserveCFG)
1772 return {}; // Give up on this `select`.
1773 Ops.emplace_back(Load);
1774 continue;
1775 }
1776
1777 SelectHandSpeculativity Spec =
1779 if (PreserveCFG && !Spec.areAllSpeculatable())
1780 return {}; // Give up on this `select`.
1781
1782 Load.setInt(Spec);
1783 Ops.emplace_back(Load);
1784 }
1785
1786 return Ops;
1787}
1788
1790 IRBuilderTy &IRB) {
1791 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1792
1793 Value *TV = SI.getTrueValue();
1794 Value *FV = SI.getFalseValue();
1795 // Replace the given load of the select with a select of two loads.
1796
1797 assert(LI.isSimple() && "We only speculate simple loads");
1798
1799 IRB.SetInsertPoint(&LI);
1800
1801 LoadInst *TL =
1802 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1803 LI.getName() + ".sroa.speculate.load.true");
1804 LoadInst *FL =
1805 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1806 LI.getName() + ".sroa.speculate.load.false");
1807 NumLoadsSpeculated += 2;
1808
1809 // Transfer alignment and AA info if present.
1810 TL->setAlignment(LI.getAlign());
1811 FL->setAlignment(LI.getAlign());
1812
1813 AAMDNodes Tags = LI.getAAMetadata();
1814 if (Tags) {
1815 TL->setAAMetadata(Tags);
1816 FL->setAAMetadata(Tags);
1817 }
1818
1819 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1820 LI.getName() + ".sroa.speculated");
1821
1822 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1823 LI.replaceAllUsesWith(V);
1824}
1825
1826template <typename T>
1828 SelectHandSpeculativity Spec,
1829 DomTreeUpdater &DTU) {
1830 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1831 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1832 BasicBlock *Head = I.getParent();
1833 Instruction *ThenTerm = nullptr;
1834 Instruction *ElseTerm = nullptr;
1835 if (Spec.areNoneSpeculatable())
1836 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1837 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1838 else {
1839 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1840 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1841 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1842 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1843 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1844 }
1845 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1846 Spec = {}; // Do not use `Spec` beyond this point.
1847 BasicBlock *Tail = I.getParent();
1848 Tail->setName(Head->getName() + ".cont");
1849 PHINode *PN;
1850 if (isa<LoadInst>(I))
1851 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1852 for (BasicBlock *SuccBB : successors(Head)) {
1853 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1854 int SuccIdx = IsThen ? 0 : 1;
1855 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1856 auto &CondMemOp = cast<T>(*I.clone());
1857 if (NewMemOpBB != Head) {
1858 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1859 if (isa<LoadInst>(I))
1860 ++NumLoadsPredicated;
1861 else
1862 ++NumStoresPredicated;
1863 } else {
1864 CondMemOp.dropUBImplyingAttrsAndMetadata();
1865 ++NumLoadsSpeculated;
1866 }
1867 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1868 Value *Ptr = SI.getOperand(1 + SuccIdx);
1869 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1870 if (isa<LoadInst>(I)) {
1871 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1872 PN->addIncoming(&CondMemOp, NewMemOpBB);
1873 } else
1874 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1875 }
1876 if (isa<LoadInst>(I)) {
1877 PN->takeName(&I);
1878 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1879 I.replaceAllUsesWith(PN);
1880 }
1881}
1882
1884 SelectHandSpeculativity Spec,
1885 DomTreeUpdater &DTU) {
1886 if (auto *LI = dyn_cast<LoadInst>(&I))
1887 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1888 else if (auto *SI = dyn_cast<StoreInst>(&I))
1889 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1890 else
1891 llvm_unreachable_internal("Only for load and store.");
1892}
1893
1895 const RewriteableMemOps &Ops,
1896 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1897 bool CFGChanged = false;
1898 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1899
1900 for (const RewriteableMemOp &Op : Ops) {
1901 SelectHandSpeculativity Spec;
1902 Instruction *I;
1903 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1904 I = *US;
1905 } else {
1906 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1907 I = PSL.getPointer();
1908 Spec = PSL.getInt();
1909 }
1910 if (Spec.areAllSpeculatable()) {
1911 speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB);
1912 } else {
1913 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1914 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1915 CFGChanged = true;
1916 }
1917 I->eraseFromParent();
1918 }
1919
1920 for (User *U : make_early_inc_range(SI.users()))
1921 cast<BitCastInst>(U)->eraseFromParent();
1922 SI.eraseFromParent();
1923 return CFGChanged;
1924}
1925
1926/// Compute an adjusted pointer from Ptr by Offset bytes where the
1927/// resulting pointer has PointerTy.
1928static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1930 const Twine &NamePrefix) {
1931 if (Offset != 0)
1932 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1933 NamePrefix + "sroa_idx");
1934 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1935 NamePrefix + "sroa_cast");
1936}
1937
1938/// Compute the adjusted alignment for a load or store from an offset.
1941}
1942
1943/// Test whether we can convert a value from the old to the new type.
1944///
1945/// This predicate should be used to guard calls to convertValue in order to
1946/// ensure that we only try to convert viable values. The strategy is that we
1947/// will peel off single element struct and array wrappings to get to an
1948/// underlying value, and convert that value.
1949static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1950 if (OldTy == NewTy)
1951 return true;
1952
1953 // For integer types, we can't handle any bit-width differences. This would
1954 // break both vector conversions with extension and introduce endianness
1955 // issues when in conjunction with loads and stores.
1956 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1957 assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1958 cast<IntegerType>(NewTy)->getBitWidth() &&
1959 "We can't have the same bitwidth for different int types");
1960 return false;
1961 }
1962
1963 if (DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1964 DL.getTypeSizeInBits(OldTy).getFixedValue())
1965 return false;
1966 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1967 return false;
1968
1969 // We can convert pointers to integers and vice-versa. Same for vectors
1970 // of pointers and integers.
1971 OldTy = OldTy->getScalarType();
1972 NewTy = NewTy->getScalarType();
1973 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1974 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1975 unsigned OldAS = OldTy->getPointerAddressSpace();
1976 unsigned NewAS = NewTy->getPointerAddressSpace();
1977 // Convert pointers if they are pointers from the same address space or
1978 // different integral (not non-integral) address spaces with the same
1979 // pointer size.
1980 return OldAS == NewAS ||
1981 (!DL.isNonIntegralAddressSpace(OldAS) &&
1982 !DL.isNonIntegralAddressSpace(NewAS) &&
1983 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1984 }
1985
1986 // We can convert integers to integral pointers, but not to non-integral
1987 // pointers.
1988 if (OldTy->isIntegerTy())
1989 return !DL.isNonIntegralPointerType(NewTy);
1990
1991 // We can convert integral pointers to integers, but non-integral pointers
1992 // need to remain pointers.
1993 if (!DL.isNonIntegralPointerType(OldTy))
1994 return NewTy->isIntegerTy();
1995
1996 return false;
1997 }
1998
1999 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
2000 return false;
2001
2002 return true;
2003}
2004
2005/// Generic routine to convert an SSA value to a value of a different
2006/// type.
2007///
2008/// This will try various different casting techniques, such as bitcasts,
2009/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
2010/// two types for viability with this routine.
2011static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2012 Type *NewTy) {
2013 Type *OldTy = V->getType();
2014 assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
2015
2016 if (OldTy == NewTy)
2017 return V;
2018
2019 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2020 "Integer types must be the exact same to convert.");
2021
2022 // See if we need inttoptr for this type pair. May require additional bitcast.
2023 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2024 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2025 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2026 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2027 // Directly handle i64 to i8*
2028 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
2029 NewTy);
2030 }
2031
2032 // See if we need ptrtoint for this type pair. May require additional bitcast.
2033 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2034 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2035 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2036 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2037 // Expand i8* to i64 --> i8* to i64 to i64
2038 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2039 NewTy);
2040 }
2041
2042 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2043 unsigned OldAS = OldTy->getPointerAddressSpace();
2044 unsigned NewAS = NewTy->getPointerAddressSpace();
2045 // To convert pointers with different address spaces (they are already
2046 // checked convertible, i.e. they have the same pointer size), so far we
2047 // cannot use `bitcast` (which has restrict on the same address space) or
2048 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2049 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2050 // size.
2051 if (OldAS != NewAS) {
2052 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2053 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2054 NewTy);
2055 }
2056 }
2057
2058 return IRB.CreateBitCast(V, NewTy);
2059}
2060
2061/// Test whether the given slice use can be promoted to a vector.
2062///
2063/// This function is called to test each entry in a partition which is slated
2064/// for a single slice.
2065static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2066 VectorType *Ty,
2067 uint64_t ElementSize,
2068 const DataLayout &DL) {
2069 // First validate the slice offsets.
2070 uint64_t BeginOffset =
2071 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2072 uint64_t BeginIndex = BeginOffset / ElementSize;
2073 if (BeginIndex * ElementSize != BeginOffset ||
2074 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2075 return false;
2076 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2077 uint64_t EndIndex = EndOffset / ElementSize;
2078 if (EndIndex * ElementSize != EndOffset ||
2079 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2080 return false;
2081
2082 assert(EndIndex > BeginIndex && "Empty vector!");
2083 uint64_t NumElements = EndIndex - BeginIndex;
2084 Type *SliceTy = (NumElements == 1)
2085 ? Ty->getElementType()
2086 : FixedVectorType::get(Ty->getElementType(), NumElements);
2087
2088 Type *SplitIntTy =
2089 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2090
2091 Use *U = S.getUse();
2092
2093 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2094 if (MI->isVolatile())
2095 return false;
2096 if (!S.isSplittable())
2097 return false; // Skip any unsplittable intrinsics.
2098 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2099 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2100 return false;
2101 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2102 if (LI->isVolatile())
2103 return false;
2104 Type *LTy = LI->getType();
2105 // Disable vector promotion when there are loads or stores of an FCA.
2106 if (LTy->isStructTy())
2107 return false;
2108 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2109 assert(LTy->isIntegerTy());
2110 LTy = SplitIntTy;
2111 }
2112 if (!canConvertValue(DL, SliceTy, LTy))
2113 return false;
2114 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2115 if (SI->isVolatile())
2116 return false;
2117 Type *STy = SI->getValueOperand()->getType();
2118 // Disable vector promotion when there are loads or stores of an FCA.
2119 if (STy->isStructTy())
2120 return false;
2121 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2122 assert(STy->isIntegerTy());
2123 STy = SplitIntTy;
2124 }
2125 if (!canConvertValue(DL, STy, SliceTy))
2126 return false;
2127 } else {
2128 return false;
2129 }
2130
2131 return true;
2132}
2133
2134/// Test whether a vector type is viable for promotion.
2135///
2136/// This implements the necessary checking for \c checkVectorTypesForPromotion
2137/// (and thus isVectorPromotionViable) over all slices of the alloca for the
2138/// given VectorType.
2139static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
2140 const DataLayout &DL) {
2141 uint64_t ElementSize =
2142 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2143
2144 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2145 // that aren't byte sized.
2146 if (ElementSize % 8)
2147 return false;
2148 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2149 "vector size not a multiple of element size?");
2150 ElementSize /= 8;
2151
2152 for (const Slice &S : P)
2153 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
2154 return false;
2155
2156 for (const Slice *S : P.splitSliceTails())
2157 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
2158 return false;
2159
2160 return true;
2161}
2162
2163/// Test whether any vector type in \p CandidateTys is viable for promotion.
2164///
2165/// This implements the necessary checking for \c isVectorPromotionViable over
2166/// all slices of the alloca for the given VectorType.
2167static VectorType *
2169 SmallVectorImpl<VectorType *> &CandidateTys,
2170 bool HaveCommonEltTy, Type *CommonEltTy,
2171 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2172 VectorType *CommonVecPtrTy) {
2173 // If we didn't find a vector type, nothing to do here.
2174 if (CandidateTys.empty())
2175 return nullptr;
2176
2177 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2178 // then we should choose it, not some other alternative.
2179 // But, we can't perform a no-op pointer address space change via bitcast,
2180 // so if we didn't have a common pointer element type, bail.
2181 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2182 return nullptr;
2183
2184 // Try to pick the "best" element type out of the choices.
2185 if (!HaveCommonEltTy && HaveVecPtrTy) {
2186 // If there was a pointer element type, there's really only one choice.
2187 CandidateTys.clear();
2188 CandidateTys.push_back(CommonVecPtrTy);
2189 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2190 // Integer-ify vector types.
2191 for (VectorType *&VTy : CandidateTys) {
2192 if (!VTy->getElementType()->isIntegerTy())
2193 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2194 VTy->getContext(), VTy->getScalarSizeInBits())));
2195 }
2196
2197 // Rank the remaining candidate vector types. This is easy because we know
2198 // they're all integer vectors. We sort by ascending number of elements.
2199 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2200 (void)DL;
2201 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2202 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2203 "Cannot have vector types of different sizes!");
2204 assert(RHSTy->getElementType()->isIntegerTy() &&
2205 "All non-integer types eliminated!");
2206 assert(LHSTy->getElementType()->isIntegerTy() &&
2207 "All non-integer types eliminated!");
2208 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2209 cast<FixedVectorType>(LHSTy)->getNumElements();
2210 };
2211 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2212 (void)DL;
2213 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2214 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2215 "Cannot have vector types of different sizes!");
2216 assert(RHSTy->getElementType()->isIntegerTy() &&
2217 "All non-integer types eliminated!");
2218 assert(LHSTy->getElementType()->isIntegerTy() &&
2219 "All non-integer types eliminated!");
2220 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2221 cast<FixedVectorType>(LHSTy)->getNumElements();
2222 };
2223 llvm::sort(CandidateTys, RankVectorTypesComp);
2224 CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2225 RankVectorTypesEq),
2226 CandidateTys.end());
2227 } else {
2228// The only way to have the same element type in every vector type is to
2229// have the same vector type. Check that and remove all but one.
2230#ifndef NDEBUG
2231 for (VectorType *VTy : CandidateTys) {
2232 assert(VTy->getElementType() == CommonEltTy &&
2233 "Unaccounted for element type!");
2234 assert(VTy == CandidateTys[0] &&
2235 "Different vector types with the same element type!");
2236 }
2237#endif
2238 CandidateTys.resize(1);
2239 }
2240
2241 // FIXME: hack. Do we have a named constant for this?
2242 // SDAG SDNode can't have more than 65535 operands.
2243 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2244 return cast<FixedVectorType>(VTy)->getNumElements() >
2245 std::numeric_limits<unsigned short>::max();
2246 });
2247
2248 for (VectorType *VTy : CandidateTys)
2249 if (checkVectorTypeForPromotion(P, VTy, DL))
2250 return VTy;
2251
2252 return nullptr;
2253}
2254
2256 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2257 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2258 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2259 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2260 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
2261 [[maybe_unused]] VectorType *OriginalElt =
2262 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2263 // Consider additional vector types where the element type size is a
2264 // multiple of load/store element size.
2265 for (Type *Ty : OtherTys) {
2266 if (!VectorType::isValidElementType(Ty))
2267 continue;
2268 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2269 // Make a copy of CandidateTys and iterate through it, because we
2270 // might append to CandidateTys in the loop.
2271 for (VectorType *const VTy : CandidateTysCopy) {
2272 // The elements in the copy should remain invariant throughout the loop
2273 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2274 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2275 unsigned ElementSize =
2276 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2277 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2278 VectorSize % TypeSize == 0) {
2279 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2280 CheckCandidateType(NewVTy);
2281 }
2282 }
2283 }
2284
2285 return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2286 CommonEltTy, HaveVecPtrTy,
2287 HaveCommonVecPtrTy, CommonVecPtrTy);
2288}
2289
2290/// Test whether the given alloca partitioning and range of slices can be
2291/// promoted to a vector.
2292///
2293/// This is a quick test to check whether we can rewrite a particular alloca
2294/// partition (and its newly formed alloca) into a vector alloca with only
2295/// whole-vector loads and stores such that it could be promoted to a vector
2296/// SSA value. We only can ensure this for a limited set of operations, and we
2297/// don't want to do the rewrites unless we are confident that the result will
2298/// be promotable, so we have an early test here.
2299static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2300 // Collect the candidate types for vector-based promotion. Also track whether
2301 // we have different element types.
2302 SmallVector<VectorType *, 4> CandidateTys;
2303 SetVector<Type *> LoadStoreTys;
2304 SetVector<Type *> DeferredTys;
2305 Type *CommonEltTy = nullptr;
2306 VectorType *CommonVecPtrTy = nullptr;
2307 bool HaveVecPtrTy = false;
2308 bool HaveCommonEltTy = true;
2309 bool HaveCommonVecPtrTy = true;
2310 auto CheckCandidateType = [&](Type *Ty) {
2311 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2312 // Return if bitcast to vectors is different for total size in bits.
2313 if (!CandidateTys.empty()) {
2314 VectorType *V = CandidateTys[0];
2315 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2316 DL.getTypeSizeInBits(V).getFixedValue()) {
2317 CandidateTys.clear();
2318 return;
2319 }
2320 }
2321 CandidateTys.push_back(VTy);
2322 Type *EltTy = VTy->getElementType();
2323
2324 if (!CommonEltTy)
2325 CommonEltTy = EltTy;
2326 else if (CommonEltTy != EltTy)
2327 HaveCommonEltTy = false;
2328
2329 if (EltTy->isPointerTy()) {
2330 HaveVecPtrTy = true;
2331 if (!CommonVecPtrTy)
2332 CommonVecPtrTy = VTy;
2333 else if (CommonVecPtrTy != VTy)
2334 HaveCommonVecPtrTy = false;
2335 }
2336 }
2337 };
2338
2339 // Put load and store types into a set for de-duplication.
2340 for (const Slice &S : P) {
2341 Type *Ty;
2342 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2343 Ty = LI->getType();
2344 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2345 Ty = SI->getValueOperand()->getType();
2346 else
2347 continue;
2348
2349 auto CandTy = Ty->getScalarType();
2350 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2351 S.endOffset() != P.endOffset())) {
2352 DeferredTys.insert(Ty);
2353 continue;
2354 }
2355
2356 LoadStoreTys.insert(Ty);
2357 // Consider any loads or stores that are the exact size of the slice.
2358 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2359 CheckCandidateType(Ty);
2360 }
2361
2362 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2364 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2365 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2366 HaveCommonVecPtrTy, CommonVecPtrTy))
2367 return VTy;
2368
2369 CandidateTys.clear();
2371 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2372 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2373 CommonVecPtrTy);
2374}
2375
2376/// Test whether a slice of an alloca is valid for integer widening.
2377///
2378/// This implements the necessary checking for the \c isIntegerWideningViable
2379/// test below on a single slice of the alloca.
2380static bool isIntegerWideningViableForSlice(const Slice &S,
2381 uint64_t AllocBeginOffset,
2382 Type *AllocaTy,
2383 const DataLayout &DL,
2384 bool &WholeAllocaOp) {
2385 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2386
2387 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2388 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2389
2390 Use *U = S.getUse();
2391
2392 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2393 // larger than other load/store slices (RelEnd > Size). But lifetime are
2394 // always promotable and should not impact other slices' promotability of the
2395 // partition.
2396 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2397 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2398 return true;
2399 }
2400
2401 // We can't reasonably handle cases where the load or store extends past
2402 // the end of the alloca's type and into its padding.
2403 if (RelEnd > Size)
2404 return false;
2405
2406 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2407 if (LI->isVolatile())
2408 return false;
2409 // We can't handle loads that extend past the allocated memory.
2410 if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size)
2411 return false;
2412 // So far, AllocaSliceRewriter does not support widening split slice tails
2413 // in rewriteIntegerLoad.
2414 if (S.beginOffset() < AllocBeginOffset)
2415 return false;
2416 // Note that we don't count vector loads or stores as whole-alloca
2417 // operations which enable integer widening because we would prefer to use
2418 // vector widening instead.
2419 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2420 WholeAllocaOp = true;
2421 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2422 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2423 return false;
2424 } else if (RelBegin != 0 || RelEnd != Size ||
2425 !canConvertValue(DL, AllocaTy, LI->getType())) {
2426 // Non-integer loads need to be convertible from the alloca type so that
2427 // they are promotable.
2428 return false;
2429 }
2430 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2431 Type *ValueTy = SI->getValueOperand()->getType();
2432 if (SI->isVolatile())
2433 return false;
2434 // We can't handle stores that extend past the allocated memory.
2435 if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size)
2436 return false;
2437 // So far, AllocaSliceRewriter does not support widening split slice tails
2438 // in rewriteIntegerStore.
2439 if (S.beginOffset() < AllocBeginOffset)
2440 return false;
2441 // Note that we don't count vector loads or stores as whole-alloca
2442 // operations which enable integer widening because we would prefer to use
2443 // vector widening instead.
2444 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2445 WholeAllocaOp = true;
2446 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2447 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2448 return false;
2449 } else if (RelBegin != 0 || RelEnd != Size ||
2450 !canConvertValue(DL, ValueTy, AllocaTy)) {
2451 // Non-integer stores need to be convertible to the alloca type so that
2452 // they are promotable.
2453 return false;
2454 }
2455 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2456 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2457 return false;
2458 if (!S.isSplittable())
2459 return false; // Skip any unsplittable intrinsics.
2460 } else {
2461 return false;
2462 }
2463
2464 return true;
2465}
2466
2467/// Test whether the given alloca partition's integer operations can be
2468/// widened to promotable ones.
2469///
2470/// This is a quick test to check whether we can rewrite the integer loads and
2471/// stores to a particular alloca into wider loads and stores and be able to
2472/// promote the resulting alloca.
2473static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2474 const DataLayout &DL) {
2475 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2476 // Don't create integer types larger than the maximum bitwidth.
2477 if (SizeInBits > IntegerType::MAX_INT_BITS)
2478 return false;
2479
2480 // Don't try to handle allocas with bit-padding.
2481 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2482 return false;
2483
2484 // We need to ensure that an integer type with the appropriate bitwidth can
2485 // be converted to the alloca type, whatever that is. We don't want to force
2486 // the alloca itself to have an integer type if there is a more suitable one.
2487 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2488 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2489 !canConvertValue(DL, IntTy, AllocaTy))
2490 return false;
2491
2492 // While examining uses, we ensure that the alloca has a covering load or
2493 // store. We don't want to widen the integer operations only to fail to
2494 // promote due to some other unsplittable entry (which we may make splittable
2495 // later). However, if there are only splittable uses, go ahead and assume
2496 // that we cover the alloca.
2497 // FIXME: We shouldn't consider split slices that happen to start in the
2498 // partition here...
2499 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2500
2501 for (const Slice &S : P)
2502 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2503 WholeAllocaOp))
2504 return false;
2505
2506 for (const Slice *S : P.splitSliceTails())
2507 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2508 WholeAllocaOp))
2509 return false;
2510
2511 return WholeAllocaOp;
2512}
2513
2514static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2516 const Twine &Name) {
2517 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2518 IntegerType *IntTy = cast<IntegerType>(V->getType());
2519 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2520 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2521 "Element extends past full value");
2522 uint64_t ShAmt = 8 * Offset;
2523 if (DL.isBigEndian())
2524 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2525 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2526 if (ShAmt) {
2527 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2528 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2529 }
2530 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2531 "Cannot extract to a larger integer!");
2532 if (Ty != IntTy) {
2533 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2534 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2535 }
2536 return V;
2537}
2538
2539static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2540 Value *V, uint64_t Offset, const Twine &Name) {
2541 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2542 IntegerType *Ty = cast<IntegerType>(V->getType());
2543 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2544 "Cannot insert a larger integer!");
2545 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2546 if (Ty != IntTy) {
2547 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2548 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2549 }
2550 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2551 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2552 "Element store outside of alloca store");
2553 uint64_t ShAmt = 8 * Offset;
2554 if (DL.isBigEndian())
2555 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2556 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2557 if (ShAmt) {
2558 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2559 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2560 }
2561
2562 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2563 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2564 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2565 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2566 V = IRB.CreateOr(Old, V, Name + ".insert");
2567 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2568 }
2569 return V;
2570}
2571
2572static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2573 unsigned EndIndex, const Twine &Name) {
2574 auto *VecTy = cast<FixedVectorType>(V->getType());
2575 unsigned NumElements = EndIndex - BeginIndex;
2576 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2577
2578 if (NumElements == VecTy->getNumElements())
2579 return V;
2580
2581 if (NumElements == 1) {
2582 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2583 Name + ".extract");
2584 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2585 return V;
2586 }
2587
2588 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2589 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2590 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2591 return V;
2592}
2593
2594static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2595 unsigned BeginIndex, const Twine &Name) {
2596 VectorType *VecTy = cast<VectorType>(Old->getType());
2597 assert(VecTy && "Can only insert a vector into a vector");
2598
2599 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2600 if (!Ty) {
2601 // Single element to insert.
2602 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2603 Name + ".insert");
2604 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2605 return V;
2606 }
2607
2608 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2609 cast<FixedVectorType>(VecTy)->getNumElements() &&
2610 "Too many elements!");
2611 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2612 cast<FixedVectorType>(VecTy)->getNumElements()) {
2613 assert(V->getType() == VecTy && "Vector type mismatch");
2614 return V;
2615 }
2616 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2617
2618 // When inserting a smaller vector into the larger to store, we first
2619 // use a shuffle vector to widen it with undef elements, and then
2620 // a second shuffle vector to select between the loaded vector and the
2621 // incoming vector.
2623 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2624 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2625 if (i >= BeginIndex && i < EndIndex)
2626 Mask.push_back(i - BeginIndex);
2627 else
2628 Mask.push_back(-1);
2629 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2630 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2631
2633 Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2634 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2635 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2636
2637 V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
2638
2639 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2640 return V;
2641}
2642
2643namespace {
2644
2645/// Visitor to rewrite instructions using p particular slice of an alloca
2646/// to use a new alloca.
2647///
2648/// Also implements the rewriting to vector-based accesses when the partition
2649/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2650/// lives here.
2651class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2652 // Befriend the base class so it can delegate to private visit methods.
2653 friend class InstVisitor<AllocaSliceRewriter, bool>;
2654
2656
2657 const DataLayout &DL;
2658 AllocaSlices &AS;
2659 SROA &Pass;
2660 AllocaInst &OldAI, &NewAI;
2661 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2662 Type *NewAllocaTy;
2663
2664 // This is a convenience and flag variable that will be null unless the new
2665 // alloca's integer operations should be widened to this integer type due to
2666 // passing isIntegerWideningViable above. If it is non-null, the desired
2667 // integer type will be stored here for easy access during rewriting.
2668 IntegerType *IntTy;
2669
2670 // If we are rewriting an alloca partition which can be written as pure
2671 // vector operations, we stash extra information here. When VecTy is
2672 // non-null, we have some strict guarantees about the rewritten alloca:
2673 // - The new alloca is exactly the size of the vector type here.
2674 // - The accesses all either map to the entire vector or to a single
2675 // element.
2676 // - The set of accessing instructions is only one of those handled above
2677 // in isVectorPromotionViable. Generally these are the same access kinds
2678 // which are promotable via mem2reg.
2679 VectorType *VecTy;
2680 Type *ElementTy;
2681 uint64_t ElementSize;
2682
2683 // The original offset of the slice currently being rewritten relative to
2684 // the original alloca.
2685 uint64_t BeginOffset = 0;
2686 uint64_t EndOffset = 0;
2687
2688 // The new offsets of the slice currently being rewritten relative to the
2689 // original alloca.
2690 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2691
2692 uint64_t SliceSize = 0;
2693 bool IsSplittable = false;
2694 bool IsSplit = false;
2695 Use *OldUse = nullptr;
2696 Instruction *OldPtr = nullptr;
2697
2698 // Track post-rewrite users which are PHI nodes and Selects.
2701
2702 // Utility IR builder, whose name prefix is setup for each visited use, and
2703 // the insertion point is set to point to the user.
2704 IRBuilderTy IRB;
2705
2706 // Return the new alloca, addrspacecasted if required to avoid changing the
2707 // addrspace of a volatile access.
2708 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2709 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2710 return &NewAI;
2711
2712 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2713 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2714 }
2715
2716public:
2717 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2718 AllocaInst &OldAI, AllocaInst &NewAI,
2719 uint64_t NewAllocaBeginOffset,
2720 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2721 VectorType *PromotableVecTy,
2724 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2725 NewAllocaBeginOffset(NewAllocaBeginOffset),
2726 NewAllocaEndOffset(NewAllocaEndOffset),
2727 NewAllocaTy(NewAI.getAllocatedType()),
2728 IntTy(
2729 IsIntegerPromotable
2730 ? Type::getIntNTy(NewAI.getContext(),
2731 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2732 .getFixedValue())
2733 : nullptr),
2734 VecTy(PromotableVecTy),
2735 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2736 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2737 : 0),
2738 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2739 IRB(NewAI.getContext(), ConstantFolder()) {
2740 if (VecTy) {
2741 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2742 "Only multiple-of-8 sized vector elements are viable");
2743 ++NumVectorized;
2744 }
2745 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2746 }
2747
2748 bool visit(AllocaSlices::const_iterator I) {
2749 bool CanSROA = true;
2750 BeginOffset = I->beginOffset();
2751 EndOffset = I->endOffset();
2752 IsSplittable = I->isSplittable();
2753 IsSplit =
2754 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2755 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2756 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2757 LLVM_DEBUG(dbgs() << "\n");
2758
2759 // Compute the intersecting offset range.
2760 assert(BeginOffset < NewAllocaEndOffset);
2761 assert(EndOffset > NewAllocaBeginOffset);
2762 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2763 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2764
2765 SliceSize = NewEndOffset - NewBeginOffset;
2766 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2767 << ") NewBegin:(" << NewBeginOffset << ", "
2768 << NewEndOffset << ") NewAllocaBegin:("
2769 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2770 << ")\n");
2771 assert(IsSplit || NewBeginOffset == BeginOffset);
2772 OldUse = I->getUse();
2773 OldPtr = cast<Instruction>(OldUse->get());
2774
2775 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2776 IRB.SetInsertPoint(OldUserI);
2777 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2778 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2779 Twine(BeginOffset) + ".");
2780
2781 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2782 if (VecTy || IntTy)
2783 assert(CanSROA);
2784 return CanSROA;
2785 }
2786
2787private:
2788 // Make sure the other visit overloads are visible.
2789 using Base::visit;
2790
2791 // Every instruction which can end up as a user must have a rewrite rule.
2792 bool visitInstruction(Instruction &I) {
2793 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2794 llvm_unreachable("No rewrite rule for this instruction!");
2795 }
2796
2797 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2798 // Note that the offset computation can use BeginOffset or NewBeginOffset
2799 // interchangeably for unsplit slices.
2800 assert(IsSplit || BeginOffset == NewBeginOffset);
2801 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2802
2803#ifndef NDEBUG
2804 StringRef OldName = OldPtr->getName();
2805 // Skip through the last '.sroa.' component of the name.
2806 size_t LastSROAPrefix = OldName.rfind(".sroa.");
2807 if (LastSROAPrefix != StringRef::npos) {
2808 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2809 // Look for an SROA slice index.
2810 size_t IndexEnd = OldName.find_first_not_of("0123456789");
2811 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2812 // Strip the index and look for the offset.
2813 OldName = OldName.substr(IndexEnd + 1);
2814 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2815 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2816 // Strip the offset.
2817 OldName = OldName.substr(OffsetEnd + 1);
2818 }
2819 }
2820 // Strip any SROA suffixes as well.
2821 OldName = OldName.substr(0, OldName.find(".sroa_"));
2822#endif
2823
2824 return getAdjustedPtr(IRB, DL, &NewAI,
2825 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
2826 PointerTy,
2827#ifndef NDEBUG
2828 Twine(OldName) + "."
2829#else
2830 Twine()
2831#endif
2832 );
2833 }
2834
2835 /// Compute suitable alignment to access this slice of the *new*
2836 /// alloca.
2837 ///
2838 /// You can optionally pass a type to this routine and if that type's ABI
2839 /// alignment is itself suitable, this will return zero.
2840 Align getSliceAlign() {
2841 return commonAlignment(NewAI.getAlign(),
2842 NewBeginOffset - NewAllocaBeginOffset);
2843 }
2844
2845 unsigned getIndex(uint64_t Offset) {
2846 assert(VecTy && "Can only call getIndex when rewriting a vector");
2847 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2848 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2849 uint32_t Index = RelOffset / ElementSize;
2850 assert(Index * ElementSize == RelOffset);
2851 return Index;
2852 }
2853
2854 void deleteIfTriviallyDead(Value *V) {
2855 Instruction *I = cast<Instruction>(V);
2857 Pass.DeadInsts.push_back(I);
2858 }
2859
2860 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
2861 unsigned BeginIndex = getIndex(NewBeginOffset);
2862 unsigned EndIndex = getIndex(NewEndOffset);
2863 assert(EndIndex > BeginIndex && "Empty vector!");
2864
2865 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2866 NewAI.getAlign(), "load");
2867
2868 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2869 LLVMContext::MD_access_group});
2870 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
2871 }
2872
2873 Value *rewriteIntegerLoad(LoadInst &LI) {
2874 assert(IntTy && "We cannot insert an integer to the alloca");
2875 assert(!LI.isVolatile());
2876 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2877 NewAI.getAlign(), "load");
2878 V = convertValue(DL, IRB, V, IntTy);
2879 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2880 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2881 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2882 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2883 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2884 }
2885 // It is possible that the extracted type is not the load type. This
2886 // happens if there is a load past the end of the alloca, and as
2887 // a consequence the slice is narrower but still a candidate for integer
2888 // lowering. To handle this case, we just zero extend the extracted
2889 // integer.
2890 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
2891 "Can only handle an extract for an overly wide load");
2892 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2893 V = IRB.CreateZExt(V, LI.getType());
2894 return V;
2895 }
2896
2897 bool visitLoadInst(LoadInst &LI) {
2898 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
2899 Value *OldOp = LI.getOperand(0);
2900 assert(OldOp == OldPtr);
2901
2902 AAMDNodes AATags = LI.getAAMetadata();
2903
2904 unsigned AS = LI.getPointerAddressSpace();
2905
2906 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2907 : LI.getType();
2908 const bool IsLoadPastEnd =
2909 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2910 bool IsPtrAdjusted = false;
2911 Value *V;
2912 if (VecTy) {
2913 V = rewriteVectorizedLoadInst(LI);
2914 } else if (IntTy && LI.getType()->isIntegerTy()) {
2915 V = rewriteIntegerLoad(LI);
2916 } else if (NewBeginOffset == NewAllocaBeginOffset &&
2917 NewEndOffset == NewAllocaEndOffset &&
2918 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2919 (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2920 TargetTy->isIntegerTy() && !LI.isVolatile()))) {
2921 Value *NewPtr =
2922 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
2923 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
2924 NewAI.getAlign(), LI.isVolatile(),
2925 LI.getName());
2926 if (LI.isVolatile())
2927 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2928 if (NewLI->isAtomic())
2929 NewLI->setAlignment(LI.getAlign());
2930
2931 // Copy any metadata that is valid for the new load. This may require
2932 // conversion to a different kind of metadata, e.g. !nonnull might change
2933 // to !range or vice versa.
2934 copyMetadataForLoad(*NewLI, LI);
2935
2936 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
2937 if (AATags)
2938 NewLI->setAAMetadata(AATags.adjustForAccess(
2939 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2940
2941 // Try to preserve nonnull metadata
2942 V = NewLI;
2943
2944 // If this is an integer load past the end of the slice (which means the
2945 // bytes outside the slice are undef or this load is dead) just forcibly
2946 // fix the integer size with correct handling of endianness.
2947 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2948 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2949 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2950 V = IRB.CreateZExt(V, TITy, "load.ext");
2951 if (DL.isBigEndian())
2952 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2953 "endian_shift");
2954 }
2955 } else {
2956 Type *LTy = IRB.getPtrTy(AS);
2957 LoadInst *NewLI =
2958 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2959 getSliceAlign(), LI.isVolatile(), LI.getName());
2960
2961 if (AATags)
2962 NewLI->setAAMetadata(AATags.adjustForAccess(
2963 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2964
2965 if (LI.isVolatile())
2966 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2967 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2968 LLVMContext::MD_access_group});
2969
2970 V = NewLI;
2971 IsPtrAdjusted = true;
2972 }
2973 V = convertValue(DL, IRB, V, TargetTy);
2974
2975 if (IsSplit) {
2976 assert(!LI.isVolatile());
2977 assert(LI.getType()->isIntegerTy() &&
2978 "Only integer type loads and stores are split");
2979 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
2980 "Split load isn't smaller than original load");
2981 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
2982 "Non-byte-multiple bit width");
2983 // Move the insertion point just past the load so that we can refer to it.
2984 BasicBlock::iterator LIIt = std::next(LI.getIterator());
2985 // Ensure the insertion point comes before any debug-info immediately
2986 // after the load, so that variable values referring to the load are
2987 // dominated by it.
2988 LIIt.setHeadBit(true);
2989 IRB.SetInsertPoint(LI.getParent(), LIIt);
2990 // Create a placeholder value with the same type as LI to use as the
2991 // basis for the new value. This allows us to replace the uses of LI with
2992 // the computed value, and then replace the placeholder with LI, leaving
2993 // LI only used for this computation.
2994 Value *Placeholder =
2995 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
2996 false, Align(1));
2997 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2998 "insert");
2999 LI.replaceAllUsesWith(V);
3000 Placeholder->replaceAllUsesWith(&LI);
3001 Placeholder->deleteValue();
3002 } else {
3003 LI.replaceAllUsesWith(V);
3004 }
3005
3006 Pass.DeadInsts.push_back(&LI);
3007 deleteIfTriviallyDead(OldOp);
3008 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3009 return !LI.isVolatile() && !IsPtrAdjusted;
3010 }
3011
3012 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3013 AAMDNodes AATags) {
3014 // Capture V for the purpose of debug-info accounting once it's converted
3015 // to a vector store.
3016 Value *OrigV = V;
3017 if (V->getType() != VecTy) {
3018 unsigned BeginIndex = getIndex(NewBeginOffset);
3019 unsigned EndIndex = getIndex(NewEndOffset);
3020 assert(EndIndex > BeginIndex && "Empty vector!");
3021 unsigned NumElements = EndIndex - BeginIndex;
3022 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3023 "Too many elements!");
3024 Type *SliceTy = (NumElements == 1)
3025 ? ElementTy
3026 : FixedVectorType::get(ElementTy, NumElements);
3027 if (V->getType() != SliceTy)
3028 V = convertValue(DL, IRB, V, SliceTy);
3029
3030 // Mix in the existing elements.
3031 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3032 NewAI.getAlign(), "load");
3033 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3034 }
3035 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3036 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3037 LLVMContext::MD_access_group});
3038 if (AATags)
3039 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3040 V->getType(), DL));
3041 Pass.DeadInsts.push_back(&SI);
3042
3043 // NOTE: Careful to use OrigV rather than V.
3044 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3045 Store, Store->getPointerOperand(), OrigV, DL);
3046 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3047 return true;
3048 }
3049
3050 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3051 assert(IntTy && "We cannot extract an integer from the alloca");
3052 assert(!SI.isVolatile());
3053 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3054 IntTy->getBitWidth()) {
3055 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3056 NewAI.getAlign(), "oldload");
3057 Old = convertValue(DL, IRB, Old, IntTy);
3058 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3059 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3060 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3061 }
3062 V = convertValue(DL, IRB, V, NewAllocaTy);
3063 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3064 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3065 LLVMContext::MD_access_group});
3066 if (AATags)
3067 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3068 V->getType(), DL));
3069
3070 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3071 Store, Store->getPointerOperand(),
3072 Store->getValueOperand(), DL);
3073
3074 Pass.DeadInsts.push_back(&SI);
3075 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3076 return true;
3077 }
3078
3079 bool visitStoreInst(StoreInst &SI) {
3080 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3081 Value *OldOp = SI.getOperand(1);
3082 assert(OldOp == OldPtr);
3083
3084 AAMDNodes AATags = SI.getAAMetadata();
3085 Value *V = SI.getValueOperand();
3086
3087 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3088 // alloca that should be re-examined after promoting this alloca.
3089 if (V->getType()->isPointerTy())
3090 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3091 Pass.PostPromotionWorklist.insert(AI);
3092
3093 if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) {
3094 assert(!SI.isVolatile());
3095 assert(V->getType()->isIntegerTy() &&
3096 "Only integer type loads and stores are split");
3097 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3098 "Non-byte-multiple bit width");
3099 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3100 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3101 "extract");
3102 }
3103
3104 if (VecTy)
3105 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3106 if (IntTy && V->getType()->isIntegerTy())
3107 return rewriteIntegerStore(V, SI, AATags);
3108
3109 StoreInst *NewSI;
3110 if (NewBeginOffset == NewAllocaBeginOffset &&
3111 NewEndOffset == NewAllocaEndOffset &&
3112 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3113 V = convertValue(DL, IRB, V, NewAllocaTy);
3114 Value *NewPtr =
3115 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3116
3117 NewSI =
3118 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3119 } else {
3120 unsigned AS = SI.getPointerAddressSpace();
3121 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3122 NewSI =
3123 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3124 }
3125 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3126 LLVMContext::MD_access_group});
3127 if (AATags)
3128 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3129 V->getType(), DL));
3130 if (SI.isVolatile())
3131 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3132 if (NewSI->isAtomic())
3133 NewSI->setAlignment(SI.getAlign());
3134
3135 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3136 NewSI, NewSI->getPointerOperand(),
3137 NewSI->getValueOperand(), DL);
3138
3139 Pass.DeadInsts.push_back(&SI);
3140 deleteIfTriviallyDead(OldOp);
3141
3142 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3143 return NewSI->getPointerOperand() == &NewAI &&
3144 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3145 !SI.isVolatile();
3146 }
3147
3148 /// Compute an integer value from splatting an i8 across the given
3149 /// number of bytes.
3150 ///
3151 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3152 /// call this routine.
3153 /// FIXME: Heed the advice above.
3154 ///
3155 /// \param V The i8 value to splat.
3156 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3157 Value *getIntegerSplat(Value *V, unsigned Size) {
3158 assert(Size > 0 && "Expected a positive number of bytes.");
3159 IntegerType *VTy = cast<IntegerType>(V->getType());
3160 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3161 if (Size == 1)
3162 return V;
3163
3164 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3165 V = IRB.CreateMul(
3166 IRB.CreateZExt(V, SplatIntTy, "zext"),
3167 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3168 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3169 SplatIntTy)),
3170 "isplat");
3171 return V;
3172 }
3173
3174 /// Compute a vector splat for a given element value.
3175 Value *getVectorSplat(Value *V, unsigned NumElements) {
3176 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3177 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3178 return V;
3179 }
3180
3181 bool visitMemSetInst(MemSetInst &II) {
3182 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3183 assert(II.getRawDest() == OldPtr);
3184
3185 AAMDNodes AATags = II.getAAMetadata();
3186
3187 // If the memset has a variable size, it cannot be split, just adjust the
3188 // pointer to the new alloca.
3189 if (!isa<ConstantInt>(II.getLength())) {
3190 assert(!IsSplit);
3191 assert(NewBeginOffset == BeginOffset);
3192 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3193 II.setDestAlignment(getSliceAlign());
3194 // In theory we should call migrateDebugInfo here. However, we do not
3195 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3196 // constant geps, or storing a variable number of bytes.
3197 assert(at::getAssignmentMarkers(&II).empty() &&
3198 at::getDPVAssignmentMarkers(&II).empty() &&
3199 "AT: Unexpected link to non-const GEP");
3200 deleteIfTriviallyDead(OldPtr);
3201 return false;
3202 }
3203
3204 // Record this instruction for deletion.
3205 Pass.DeadInsts.push_back(&II);
3206
3207 Type *AllocaTy = NewAI.getAllocatedType();
3208 Type *ScalarTy = AllocaTy->getScalarType();
3209
3210 const bool CanContinue = [&]() {
3211 if (VecTy || IntTy)
3212 return true;
3213 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3214 return false;
3215 // Length must be in range for FixedVectorType.
3216 auto *C = cast<ConstantInt>(II.getLength());
3217 const uint64_t Len = C->getLimitedValue();
3218 if (Len > std::numeric_limits<unsigned>::max())
3219 return false;
3220 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3221 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3222 return canConvertValue(DL, SrcTy, AllocaTy) &&
3223 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3224 }();
3225
3226 // If this doesn't map cleanly onto the alloca type, and that type isn't
3227 // a single value type, just emit a memset.
3228 if (!CanContinue) {
3229 Type *SizeTy = II.getLength()->getType();
3230 unsigned Sz = NewEndOffset - NewBeginOffset;
3231 Constant *Size = ConstantInt::get(SizeTy, Sz);
3232 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3233 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3234 MaybeAlign(getSliceAlign()), II.isVolatile()));
3235 if (AATags)
3236 New->setAAMetadata(
3237 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3238
3239 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3240 New, New->getRawDest(), nullptr, DL);
3241
3242 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3243 return false;
3244 }
3245
3246 // If we can represent this as a simple value, we have to build the actual
3247 // value to store, which requires expanding the byte present in memset to
3248 // a sensible representation for the alloca type. This is essentially
3249 // splatting the byte to a sufficiently wide integer, splatting it across
3250 // any desired vector width, and bitcasting to the final type.
3251 Value *V;
3252
3253 if (VecTy) {
3254 // If this is a memset of a vectorized alloca, insert it.
3255 assert(ElementTy == ScalarTy);
3256
3257 unsigned BeginIndex = getIndex(NewBeginOffset);
3258 unsigned EndIndex = getIndex(NewEndOffset);
3259 assert(EndIndex > BeginIndex && "Empty vector!");
3260 unsigned NumElements = EndIndex - BeginIndex;
3261 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3262 "Too many elements!");
3263
3264 Value *Splat = getIntegerSplat(
3265 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3266 Splat = convertValue(DL, IRB, Splat, ElementTy);
3267 if (NumElements > 1)
3268 Splat = getVectorSplat(Splat, NumElements);
3269
3270 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3271 NewAI.getAlign(), "oldload");
3272 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3273 } else if (IntTy) {
3274 // If this is a memset on an alloca where we can widen stores, insert the
3275 // set integer.
3276 assert(!II.isVolatile());
3277
3278 uint64_t Size = NewEndOffset - NewBeginOffset;
3279 V = getIntegerSplat(II.getValue(), Size);
3280
3281 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3282 EndOffset != NewAllocaBeginOffset)) {
3283 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3284 NewAI.getAlign(), "oldload");
3285 Old = convertValue(DL, IRB, Old, IntTy);
3286 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3287 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3288 } else {
3289 assert(V->getType() == IntTy &&
3290 "Wrong type for an alloca wide integer!");
3291 }
3292 V = convertValue(DL, IRB, V, AllocaTy);
3293 } else {
3294 // Established these invariants above.
3295 assert(NewBeginOffset == NewAllocaBeginOffset);
3296 assert(NewEndOffset == NewAllocaEndOffset);
3297
3298 V = getIntegerSplat(II.getValue(),
3299 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3300 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3301 V = getVectorSplat(
3302 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3303
3304 V = convertValue(DL, IRB, V, AllocaTy);
3305 }
3306
3307 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3308 StoreInst *New =
3309 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3310 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3311 LLVMContext::MD_access_group});
3312 if (AATags)
3313 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3314 V->getType(), DL));
3315
3316 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3317 New, New->getPointerOperand(), V, DL);
3318
3319 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3320 return !II.isVolatile();
3321 }
3322
3323 bool visitMemTransferInst(MemTransferInst &II) {
3324 // Rewriting of memory transfer instructions can be a bit tricky. We break
3325 // them into two categories: split intrinsics and unsplit intrinsics.
3326
3327 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3328
3329 AAMDNodes AATags = II.getAAMetadata();
3330
3331 bool IsDest = &II.getRawDestUse() == OldUse;
3332 assert((IsDest && II.getRawDest() == OldPtr) ||
3333 (!IsDest && II.getRawSource() == OldPtr));
3334
3335 Align SliceAlign = getSliceAlign();
3336 // For unsplit intrinsics, we simply modify the source and destination
3337 // pointers in place. This isn't just an optimization, it is a matter of
3338 // correctness. With unsplit intrinsics we may be dealing with transfers
3339 // within a single alloca before SROA ran, or with transfers that have
3340 // a variable length. We may also be dealing with memmove instead of
3341 // memcpy, and so simply updating the pointers is the necessary for us to
3342 // update both source and dest of a single call.
3343 if (!IsSplittable) {
3344 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3345 if (IsDest) {
3346 // Update the address component of linked dbg.assigns.
3347 auto UpdateAssignAddress = [&](auto *DbgAssign) {
3348 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3349 DbgAssign->getAddress() == II.getDest())
3350 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3351 };
3352 for_each(at::getAssignmentMarkers(&II), UpdateAssignAddress);
3353 for_each(at::getDPVAssignmentMarkers(&II), UpdateAssignAddress);
3354 II.setDest(AdjustedPtr);
3355 II.setDestAlignment(SliceAlign);
3356 } else {
3357 II.setSource(AdjustedPtr);
3358 II.setSourceAlignment(SliceAlign);
3359 }
3360
3361 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3362 deleteIfTriviallyDead(OldPtr);
3363 return false;
3364 }
3365 // For split transfer intrinsics we have an incredibly useful assurance:
3366 // the source and destination do not reside within the same alloca, and at
3367 // least one of them does not escape. This means that we can replace
3368 // memmove with memcpy, and we don't need to worry about all manner of
3369 // downsides to splitting and transforming the operations.
3370
3371 // If this doesn't map cleanly onto the alloca type, and that type isn't
3372 // a single value type, just emit a memcpy.
3373 bool EmitMemCpy =
3374 !VecTy && !IntTy &&
3375 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3376 SliceSize !=
3377 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
3378 !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
3380
3381 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3382 // size hasn't been shrunk based on analysis of the viable range, this is
3383 // a no-op.
3384 if (EmitMemCpy && &OldAI == &NewAI) {
3385 // Ensure the start lines up.
3386 assert(NewBeginOffset == BeginOffset);
3387
3388 // Rewrite the size as needed.
3389 if (NewEndOffset != EndOffset)
3390 II.setLength(ConstantInt::get(II.getLength()->getType(),
3391 NewEndOffset - NewBeginOffset));
3392 return false;
3393 }
3394 // Record this instruction for deletion.
3395 Pass.DeadInsts.push_back(&II);
3396
3397 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3398 // alloca that should be re-examined after rewriting this instruction.
3399 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3400 if (AllocaInst *AI =
3401 dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
3402 assert(AI != &OldAI && AI != &NewAI &&
3403 "Splittable transfers cannot reach the same alloca on both ends.");
3404 Pass.Worklist.insert(AI);
3405 }
3406
3407 Type *OtherPtrTy = OtherPtr->getType();
3408 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3409
3410 // Compute the relative offset for the other pointer within the transfer.
3411 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3412 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3413 Align OtherAlign =
3414 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3415 OtherAlign =
3416 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3417
3418 if (EmitMemCpy) {
3419 // Compute the other pointer, folding as much as possible to produce
3420 // a single, simple GEP in most cases.
3421 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3422 OtherPtr->getName() + ".");
3423
3424 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3425 Type *SizeTy = II.getLength()->getType();
3426 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3427
3428 Value *DestPtr, *SrcPtr;
3429 MaybeAlign DestAlign, SrcAlign;
3430 // Note: IsDest is true iff we're copying into the new alloca slice
3431 if (IsDest) {
3432 DestPtr = OurPtr;
3433 DestAlign = SliceAlign;
3434 SrcPtr = OtherPtr;
3435 SrcAlign = OtherAlign;
3436 } else {
3437 DestPtr = OtherPtr;
3438 DestAlign = OtherAlign;
3439 SrcPtr = OurPtr;
3440 SrcAlign = SliceAlign;
3441 }
3442 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3443 Size, II.isVolatile());
3444 if (AATags)
3445 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3446
3447 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3448 if (IsDest) {
3449 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3450 &II, New, DestPtr, nullptr, DL);
3451 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3453 DL, Offset, /*AllowNonInbounds*/ true))) {
3454 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3455 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3456 }
3457 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3458 return false;
3459 }
3460
3461 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3462 NewEndOffset == NewAllocaEndOffset;
3463 uint64_t Size = NewEndOffset - NewBeginOffset;
3464 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3465 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3466 unsigned NumElements = EndIndex - BeginIndex;
3467 IntegerType *SubIntTy =
3468 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3469
3470 // Reset the other pointer type to match the register type we're going to
3471 // use, but using the address space of the original other pointer.
3472 Type *OtherTy;
3473 if (VecTy && !IsWholeAlloca) {
3474 if (NumElements == 1)
3475 OtherTy = VecTy->getElementType();
3476 else
3477 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3478 } else if (IntTy && !IsWholeAlloca) {
3479 OtherTy = SubIntTy;
3480 } else {
3481 OtherTy = NewAllocaTy;
3482 }
3483
3484 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3485 OtherPtr->getName() + ".");
3486 MaybeAlign SrcAlign = OtherAlign;
3487 MaybeAlign DstAlign = SliceAlign;
3488 if (!IsDest)
3489 std::swap(SrcAlign, DstAlign);
3490
3491 Value *SrcPtr;
3492 Value *DstPtr;
3493
3494 if (IsDest) {
3495 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3496 SrcPtr = AdjPtr;
3497 } else {
3498 DstPtr = AdjPtr;
3499 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3500 }
3501
3502 Value *Src;
3503 if (VecTy && !IsWholeAlloca && !IsDest) {
3504 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3505 NewAI.getAlign(), "load");
3506 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3507 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3508 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3509 NewAI.getAlign(), "load");
3510 Src = convertValue(DL, IRB, Src, IntTy);
3511 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3512 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3513 } else {
3514 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3515 II.isVolatile(), "copyload");
3516 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3517 LLVMContext::MD_access_group});
3518 if (AATags)
3519 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3520 Load->getType(), DL));
3521 Src = Load;
3522 }
3523
3524 if (VecTy && !IsWholeAlloca && IsDest) {
3525 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3526 NewAI.getAlign(), "oldload");
3527 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3528 } else if (IntTy && !IsWholeAlloca && IsDest) {
3529 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3530 NewAI.getAlign(), "oldload");
3531 Old = convertValue(DL, IRB, Old, IntTy);
3532 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3533 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3534 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3535 }
3536
3537 StoreInst *Store = cast<StoreInst>(
3538 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3539 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3540 LLVMContext::MD_access_group});
3541 if (AATags)
3542 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3543 Src->getType(), DL));
3544
3545 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3546 if (IsDest) {
3547
3548 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3549 Store, DstPtr, Src, DL);
3550 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3552 DL, Offset, /*AllowNonInbounds*/ true))) {
3553 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3554 &II, Store, DstPtr, Src, DL);
3555 }
3556
3557 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3558 return !II.isVolatile();
3559 }
3560
3561 bool visitIntrinsicInst(IntrinsicInst &II) {
3563 II.isDroppable()) &&
3564 "Unexpected intrinsic!");
3565 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3566
3567 // Record this instruction for deletion.
3568 Pass.DeadInsts.push_back(&II);
3569
3570 if (II.isDroppable()) {
3571 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3572 // TODO For now we forget assumed information, this can be improved.
3573 OldPtr->dropDroppableUsesIn(II);
3574 return true;
3575 }
3576
3578 return true;
3579
3580 assert(II.getArgOperand(1) == OldPtr);
3581 // Lifetime intrinsics are only promotable if they cover the whole alloca.
3582 // Therefore, we drop lifetime intrinsics which don't cover the whole
3583 // alloca.
3584 // (In theory, intrinsics which partially cover an alloca could be
3585 // promoted, but PromoteMemToReg doesn't handle that case.)
3586 // FIXME: Check whether the alloca is promotable before dropping the
3587 // lifetime intrinsics?
3588 if (NewBeginOffset != NewAllocaBeginOffset ||
3589 NewEndOffset != NewAllocaEndOffset)
3590 return true;
3591
3592 ConstantInt *Size =
3593 ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
3594 NewEndOffset - NewBeginOffset);
3595 // Lifetime intrinsics always expect an i8* so directly get such a pointer
3596 // for the new alloca slice.
3597 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3598 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3599 Value *New;
3600 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3601 New = IRB.CreateLifetimeStart(Ptr, Size);
3602 else
3603 New = IRB.CreateLifetimeEnd(Ptr, Size);
3604
3605 (void)New;
3606 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3607
3608 return true;
3609 }
3610
3611 void fixLoadStoreAlign(Instruction &Root) {
3612 // This algorithm implements the same visitor loop as
3613 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3614 // or store found.
3617 Visited.insert(&Root);
3618 Uses.push_back(&Root);
3619 do {
3620 Instruction *I = Uses.pop_back_val();
3621
3622 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3623 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3624 continue;
3625 }
3626 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3627 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3628 continue;
3629 }
3630
3631 assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
3632 isa<PHINode>(I) || isa<SelectInst>(I) ||
3633 isa<GetElementPtrInst>(I));
3634 for (User *U : I->users())
3635 if (Visited.insert(cast<Instruction>(U)).second)
3636 Uses.push_back(cast<Instruction>(U));
3637 } while (!Uses.empty());
3638 }
3639
3640 bool visitPHINode(PHINode &PN) {
3641 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3642 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3643 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3644
3645 // We would like to compute a new pointer in only one place, but have it be
3646 // as local as possible to the PHI. To do that, we re-use the location of
3647 // the old pointer, which necessarily must be in the right position to
3648 // dominate the PHI.
3650 if (isa<PHINode>(OldPtr))
3651 IRB.SetInsertPoint(OldPtr->getParent(),
3652 OldPtr->getParent()->getFirstInsertionPt());
3653 else
3654 IRB.SetInsertPoint(OldPtr);
3655 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3656
3657 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3658 // Replace the operands which were using the old pointer.
3659 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3660
3661 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
3662 deleteIfTriviallyDead(OldPtr);
3663
3664 // Fix the alignment of any loads or stores using this PHI node.
3665 fixLoadStoreAlign(PN);
3666
3667 // PHIs can't be promoted on their own, but often can be speculated. We
3668 // check the speculation outside of the rewriter so that we see the
3669 // fully-rewritten alloca.
3670 PHIUsers.insert(&PN);
3671 return true;
3672 }
3673
3674 bool visitSelectInst(SelectInst &SI) {
3675 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3676 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3677 "Pointer isn't an operand!");
3678 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3679 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3680
3681 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3682 // Replace the operands which were using the old pointer.
3683 if (SI.getOperand(1) == OldPtr)
3684 SI.setOperand(1, NewPtr);
3685 if (SI.getOperand(2) == OldPtr)
3686 SI.setOperand(2, NewPtr);
3687
3688 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
3689 deleteIfTriviallyDead(OldPtr);
3690
3691 // Fix the alignment of any loads or stores using this select.
3692 fixLoadStoreAlign(SI);
3693
3694 // Selects can't be promoted on their own, but often can be speculated. We
3695 // check the speculation outside of the rewriter so that we see the
3696 // fully-rewritten alloca.
3697 SelectUsers.insert(&SI);
3698 return true;
3699 }
3700};
3701
3702/// Visitor to rewrite aggregate loads and stores as scalar.
3703///
3704/// This pass aggressively rewrites all aggregate loads and stores on
3705/// a particular pointer (or any pointer derived from it which we can identify)
3706/// with scalar loads and stores.
3707class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3708 // Befriend the base class so it can delegate to private visit methods.
3709 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3710
3711 /// Queue of pointer uses to analyze and potentially rewrite.
3713
3714 /// Set to prevent us from cycling with phi nodes and loops.
3715 SmallPtrSet<User *, 8> Visited;
3716
3717 /// The current pointer use being rewritten. This is used to dig up the used
3718 /// value (as opposed to the user).
3719 Use *U = nullptr;
3720
3721 /// Used to calculate offsets, and hence alignment, of subobjects.
3722 const DataLayout &DL;
3723
3724 IRBuilderTy &IRB;
3725
3726public:
3727 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
3728 : DL(DL), IRB(IRB) {}
3729
3730 /// Rewrite loads and stores through a pointer and all pointers derived from
3731 /// it.
3732 bool rewrite(Instruction &I) {
3733 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3734 enqueueUsers(I);
3735 bool Changed = false;
3736 while (!Queue.empty()) {
3737 U = Queue.pop_back_val();
3738 Changed |= visit(cast<Instruction>(U->getUser()));
3739 }
3740 return Changed;
3741 }
3742
3743private:
3744 /// Enqueue all the users of the given instruction for further processing.
3745 /// This uses a set to de-duplicate users.
3746 void enqueueUsers(Instruction &I) {
3747 for (Use &U : I.uses())
3748 if (Visited.insert(U.getUser()).second)
3749 Queue.push_back(&U);
3750 }
3751
3752 // Conservative default is to not rewrite anything.
3753 bool visitInstruction(Instruction &I) { return false; }
3754
3755 /// Generic recursive split emission class.
3756 template <typename Derived> class OpSplitter {
3757 protected:
3758 /// The builder used to form new instructions.
3759 IRBuilderTy &IRB;
3760
3761 /// The indices which to be used with insert- or extractvalue to select the
3762 /// appropriate value within the aggregate.
3764
3765 /// The indices to a GEP instruction which will move Ptr to the correct slot
3766 /// within the aggregate.
3767 SmallVector<Value *, 4> GEPIndices;
3768
3769 /// The base pointer of the original op, used as a base for GEPing the
3770 /// split operations.
3771 Value *Ptr;
3772
3773 /// The base pointee type being GEPed into.
3774 Type *BaseTy;
3775
3776 /// Known alignment of the base pointer.
3777 Align BaseAlign;
3778
3779 /// To calculate offset of each component so we can correctly deduce
3780 /// alignments.
3781 const DataLayout &DL;
3782
3783 /// Initialize the splitter with an insertion point, Ptr and start with a
3784 /// single zero GEP index.
3785 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3786 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
3787 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
3788 BaseAlign(BaseAlign), DL(DL) {
3789 IRB.SetInsertPoint(InsertionPoint);
3790 }
3791
3792 public:
3793 /// Generic recursive split emission routine.
3794 ///
3795 /// This method recursively splits an aggregate op (load or store) into
3796 /// scalar or vector ops. It splits recursively until it hits a single value
3797 /// and emits that single value operation via the template argument.
3798 ///
3799 /// The logic of this routine relies on GEPs and insertvalue and
3800 /// extractvalue all operating with the same fundamental index list, merely
3801 /// formatted differently (GEPs need actual values).
3802 ///
3803 /// \param Ty The type being split recursively into smaller ops.
3804 /// \param Agg The aggregate value being built up or stored, depending on
3805 /// whether this is splitting a load or a store respectively.
3806 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3807 if (Ty->isSingleValueType()) {
3808 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3809 return static_cast<Derived *>(this)->emitFunc(
3810 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
3811 }
3812
3813 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3814 unsigned OldSize = Indices.size();
3815 (void)OldSize;
3816 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3817 ++Idx) {
3818 assert(Indices.size() == OldSize && "Did not return to the old size");
3819 Indices.push_back(Idx);
3820 GEPIndices.push_back(IRB.getInt32(Idx));
3821 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3822 GEPIndices.pop_back();
3823 Indices.pop_back();
3824 }
3825 return;
3826 }
3827
3828 if (StructType *STy = dyn_cast<StructType>(Ty)) {
3829 unsigned OldSize = Indices.size();
3830 (void)OldSize;
3831 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3832 ++Idx) {
3833 assert(Indices.size() == OldSize && "Did not return to the old size");
3834 Indices.push_back(Idx);
3835 GEPIndices.push_back(IRB.getInt32(Idx));
3836 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3837 GEPIndices.pop_back();
3838 Indices.pop_back();
3839 }
3840 return;
3841 }
3842
3843 llvm_unreachable("Only arrays and structs are aggregate loadable types");
3844 }
3845 };
3846
3847 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3848 AAMDNodes AATags;
3849
3850 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3851 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
3852 IRBuilderTy &IRB)
3853 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
3854 IRB),
3855 AATags(AATags) {}
3856
3857 /// Emit a leaf load of a single value. This is called at the leaves of the
3858 /// recursive emission to actually load values.
3859 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3861 // Load the single value and insert it using the indices.
3862 Value *GEP =
3863 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3864 LoadInst *Load =
3865 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
3866
3867 APInt Offset(
3868 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3869 if (AATags &&
3871 Load->setAAMetadata(
3872 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
3873
3874 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3875 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
3876 }
3877 };
3878
3879 bool visitLoadInst(LoadInst &LI) {
3880 assert(LI.getPointerOperand() == *U);
3881 if (!LI.isSimple() || LI.getType()->isSingleValueType())
3882 return false;
3883
3884 // We have an aggregate being loaded, split it apart.
3885 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3886 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
3887 getAdjustedAlignment(&LI, 0), DL, IRB);
3889 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3890 Visited.erase(&LI);
3891 LI.replaceAllUsesWith(V);
3892 LI.eraseFromParent();
3893 return true;
3894 }
3895
3896 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3897 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3898 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
3899 const DataLayout &DL, IRBuilderTy &IRB)
3900 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3901 DL, IRB),
3902 AATags(AATags), AggStore(AggStore) {}
3903 AAMDNodes AATags;
3904 StoreInst *AggStore;
3905 /// Emit a leaf store of a single value. This is called at the leaves of the
3906 /// recursive emission to actually produce stores.
3907 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3909 // Extract the single value and store it using the indices.
3910 //
3911 // The gep and extractvalue values are factored out of the CreateStore
3912 // call to make the output independent of the argument evaluation order.
3913 Value *ExtractValue =
3914 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3915 Value *InBoundsGEP =
3916 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3917 StoreInst *Store =
3918 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3919
3920 APInt Offset(
3921 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3923 if (AATags) {
3924 Store->setAAMetadata(AATags.adjustForAccess(
3925 Offset.getZExtValue(), ExtractValue->getType(), DL));
3926 }
3927
3928 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
3929 // If we cannot (because there's an intervening non-const or unbounded
3930 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
3931 // this instruction.
3933 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
3934 uint64_t SizeInBits =
3935 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
3936 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
3937 SizeInBits, AggStore, Store,
3938 Store->getPointerOperand(), Store->getValueOperand(),
3939 DL);
3940 } else {
3941 assert(at::getAssignmentMarkers(Store).empty() &&
3942 at::getDPVAssignmentMarkers(Store).empty() &&
3943 "AT: unexpected debug.assign linked to store through "
3944 "unbounded GEP");
3945 }
3946 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3947 }
3948 };
3949
3950 bool visitStoreInst(StoreInst &SI) {
3951 if (!SI.isSimple() || SI.getPointerOperand() != *U)
3952 return false;
3953 Value *V = SI.getValueOperand();
3954 if (V->getType()->isSingleValueType())
3955 return false;
3956
3957 // We have an aggregate being stored, split it apart.
3958 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3959 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
3960 getAdjustedAlignment(&SI, 0), DL, IRB);
3961 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3962 Visited.erase(&SI);
3963 // The stores replacing SI each have markers describing fragments of the
3964 // assignment so delete the assignment markers linked to SI.
3966 SI.eraseFromParent();
3967 return true;
3968 }
3969
3970 bool visitBitCastInst(BitCastInst &BC) {
3971 enqueueUsers(BC);
3972 return false;
3973 }
3974
3975 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
3976 enqueueUsers(ASC);
3977 return false;
3978 }
3979
3980 // Unfold gep (select cond, ptr1, ptr2), idx
3981 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
3982 // and gep ptr, (select cond, idx1, idx2)
3983 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
3984 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
3985 // Check whether the GEP has exactly one select operand and all indices
3986 // will become constant after the transform.
3987 SelectInst *Sel = dyn_cast<SelectInst>(GEPI.getPointerOperand());
3988 for (Value *Op : GEPI.indices()) {
3989 if (auto *SI = dyn_cast<SelectInst>(Op)) {
3990 if (Sel)
3991 return false;
3992
3993 Sel = SI;
3994 if (!isa<ConstantInt>(Sel->getTrueValue()) ||
3995 !isa<ConstantInt>(Sel->getFalseValue()))
3996 return false;
3997 continue;
3998 }
3999
4000 if (!isa<ConstantInt>(Op))
4001 return false;
4002 }
4003
4004 if (!Sel)
4005 return false;
4006
4007 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4008 dbgs() << " original: " << *Sel << "\n";
4009 dbgs() << " " << GEPI << "\n";);
4010
4011 auto GetNewOps = [&](Value *SelOp) {
4012 SmallVector<Value *> NewOps;
4013 for (Value *Op : GEPI.operands())
4014 if (Op == Sel)
4015 NewOps.push_back(SelOp);
4016 else
4017 NewOps.push_back(Op);
4018 return NewOps;
4019 };
4020
4021 Value *True = Sel->getTrueValue();
4022 Value *False = Sel->getFalseValue();
4023 SmallVector<Value *> TrueOps = GetNewOps(True);
4024 SmallVector<Value *> FalseOps = GetNewOps(False);
4025
4026 IRB.SetInsertPoint(&GEPI);
4027 bool IsInBounds = GEPI.isInBounds();
4028
4029 Type *Ty = GEPI.getSourceElementType();
4030 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4031 True->getName() + ".sroa.gep", IsInBounds);
4032
4033 Value *NFalse =
4034 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4035 False->getName() + ".sroa.gep", IsInBounds);
4036
4037 Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
4038 Sel->getName() + ".sroa.sel");
4039 Visited.erase(&GEPI);
4040 GEPI.replaceAllUsesWith(NSel);
4041 GEPI.eraseFromParent();
4042 Instruction *NSelI = cast<Instruction>(NSel);
4043 Visited.insert(NSelI);
4044 enqueueUsers(*NSelI);
4045
4046 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4047 dbgs() << " " << *NFalse << "\n";
4048 dbgs() << " " << *NSel << "\n";);
4049
4050 return true;
4051 }
4052
4053 // Unfold gep (phi ptr1, ptr2), idx
4054 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4055 // and gep ptr, (phi idx1, idx2)
4056 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4057 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4058 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4059 // operand (looking through the phi if it is the phi we want to unfold) is
4060 // an instruction besides a static alloca.
4061 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4062 auto IsInvalidPointerOperand = [](Value *V) {
4063 if (!isa<Instruction>(V))
4064 return false;
4065 if (auto *AI = dyn_cast<AllocaInst>(V))
4066 return !AI->isStaticAlloca();
4067 return true;
4068 };
4069 if (Phi) {
4070 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4071 return false;
4072 } else {
4073 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4074 return false;
4075 }
4076 // Check whether the GEP has exactly one phi operand (including the pointer
4077 // operand) and all indices will become constant after the transform.
4078 for (Value *Op : GEPI.indices()) {
4079 if (auto *SI = dyn_cast<PHINode>(Op)) {
4080 if (Phi)
4081 return false;
4082
4083 Phi = SI;
4084 if (!all_of(Phi->incoming_values(),
4085 [](Value *V) { return isa<ConstantInt>(V); }))
4086 return false;
4087 continue;
4088 }
4089
4090 if (!isa<ConstantInt>(Op))
4091 return false;
4092 }
4093
4094 if (!Phi)
4095 return false;
4096
4097 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4098 dbgs() << " original: " << *Phi << "\n";
4099 dbgs() << " " << GEPI << "\n";);
4100
4101 auto GetNewOps = [&](Value *PhiOp) {
4102 SmallVector<Value *> NewOps;
4103 for (Value *Op : GEPI.operands())
4104 if (Op == Phi)
4105 NewOps.push_back(PhiOp);
4106 else
4107 NewOps.push_back(Op);
4108 return NewOps;
4109 };
4110
4111 IRB.SetInsertPoint(Phi);
4112 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4113 Phi->getName() + ".sroa.phi");
4114
4115 bool IsInBounds = GEPI.isInBounds();
4116 Type *SourceTy = GEPI.getSourceElementType();
4117 // We only handle arguments, constants, and static allocas here, so we can
4118 // insert GEPs at the end of the entry block.
4119 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4120 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4121 Value *Op = Phi->getIncomingValue(I);
4122 BasicBlock *BB = Phi->getIncomingBlock(I);
4123 Value *NewGEP;
4124 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4125 NewGEP = NewPhi->getIncomingValue(NI);
4126 } else {
4127 SmallVector<Value *> NewOps = GetNewOps(Op);
4128 NewGEP =
4129 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4130 Phi->getName() + ".sroa.gep", IsInBounds);
4131 }
4132 NewPhi->addIncoming(NewGEP, BB);
4133 }
4134
4135 Visited.erase(&GEPI);
4136 GEPI.replaceAllUsesWith(NewPhi);
4137 GEPI.eraseFromParent();
4138 Visited.insert(NewPhi);
4139 enqueueUsers(*NewPhi);
4140
4141 LLVM_DEBUG(dbgs() << " to: ";
4142 for (Value *In
4143 : NewPhi->incoming_values()) dbgs()
4144 << "\n " << *In;
4145 dbgs() << "\n " << *NewPhi << '\n');
4146
4147 return true;
4148 }
4149
4150 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4151 if (unfoldGEPSelect(GEPI))
4152 return true;
4153
4154 if (unfoldGEPPhi(GEPI))
4155 return true;
4156
4157 enqueueUsers(GEPI);
4158 return false;
4159 }
4160
4161 bool visitPHINode(PHINode &PN) {
4162 enqueueUsers(PN);
4163 return false;
4164 }
4165
4166 bool visitSelectInst(SelectInst &SI) {
4167 enqueueUsers(SI);
4168 return false;
4169 }
4170};
4171
4172} // end anonymous namespace
4173
4174/// Strip aggregate type wrapping.
4175///
4176/// This removes no-op aggregate types wrapping an underlying type. It will
4177/// strip as many layers of types as it can without changing either the type
4178/// size or the allocated size.
4180 if (Ty->isSingleValueType())
4181 return Ty;
4182
4183 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4184 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4185
4186 Type *InnerTy;
4187 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4188 InnerTy = ArrTy->getElementType();
4189 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4190 const StructLayout *SL = DL.getStructLayout(STy);
4191 unsigned Index = SL->getElementContainingOffset(0);
4192 InnerTy = STy->getElementType(Index);
4193 } else {
4194 return Ty;
4195 }
4196
4197 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4198 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4199 return Ty;
4200
4201 return stripAggregateTypeWrapping(DL, InnerTy);
4202}
4203
4204/// Try to find a partition of the aggregate type passed in for a given
4205/// offset and size.
4206///
4207/// This recurses through the aggregate type and tries to compute a subtype
4208/// based on the offset and size. When the offset and size span a sub-section
4209/// of an array, it will even compute a new array type for that sub-section,
4210/// and the same for structs.
4211///
4212/// Note that this routine is very strict and tries to find a partition of the
4213/// type which produces the *exact* right offset and size. It is not forgiving
4214/// when the size or offset cause either end of type-based partition to be off.
4215/// Also, this is a best-effort routine. It is reasonable to give up and not
4216/// return a type if necessary.
4218 uint64_t Size) {
4219 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4220 return stripAggregateTypeWrapping(DL, Ty);
4221 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4222 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4223 return nullptr;
4224
4225 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4226 Type *ElementTy;
4227 uint64_t TyNumElements;
4228 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4229 ElementTy = AT->getElementType();
4230 TyNumElements = AT->getNumElements();
4231 } else {
4232 // FIXME: This isn't right for vectors with non-byte-sized or
4233 // non-power-of-two sized elements.
4234 auto *VT = cast<FixedVectorType>(Ty);
4235 ElementTy = VT->getElementType();
4236 TyNumElements = VT->getNumElements();
4237 }
4238 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4239 uint64_t NumSkippedElements = Offset / ElementSize;
4240 if (NumSkippedElements >= TyNumElements)
4241 return nullptr;
4242 Offset -= NumSkippedElements * ElementSize;
4243
4244 // First check if we need to recurse.
4245 if (Offset > 0 || Size < ElementSize) {
4246 // Bail if the partition ends in a different array element.
4247 if ((Offset + Size) > ElementSize)
4248 return nullptr;
4249 // Recurse through the element type trying to peel off offset bytes.
4250 return getTypePartition(DL, ElementTy, Offset, Size);
4251 }
4252 assert(Offset == 0);
4253
4254 if (Size == ElementSize)
4255 return stripAggregateTypeWrapping(DL, ElementTy);
4256 assert(Size > ElementSize);
4257 uint64_t NumElements = Size / ElementSize;
4258 if (NumElements * ElementSize != Size)
4259 return nullptr;
4260 return ArrayType::get(ElementTy, NumElements);
4261 }
4262
4263 StructType *STy = dyn_cast<StructType>(Ty);
4264 if (!STy)
4265 return nullptr;
4266
4267 const StructLayout *SL = DL.getStructLayout(STy);
4268
4269 if (SL->getSizeInBits().isScalable())
4270 return nullptr;
4271
4272 if (Offset >= SL->getSizeInBytes())
4273 return nullptr;
4274 uint64_t EndOffset = Offset + Size;
4275 if (EndOffset > SL->getSizeInBytes())
4276 return nullptr;
4277
4278 unsigned Index = SL->getElementContainingOffset(Offset);
4280
4281 Type *ElementTy = STy->getElementType(Index);
4282 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4283 if (Offset >= ElementSize)
4284 return nullptr; // The offset points into alignment padding.
4285
4286 // See if any partition must be contained by the element.
4287 if (Offset > 0 || Size < ElementSize) {
4288 if ((Offset + Size) > ElementSize)
4289 return nullptr;
4290 return getTypePartition(DL, ElementTy, Offset, Size);
4291 }
4292 assert(Offset == 0);
4293
4294 if (Size == ElementSize)
4295 return stripAggregateTypeWrapping(DL, ElementTy);
4296
4298 EE = STy->element_end();
4299 if (EndOffset < SL->getSizeInBytes()) {
4300 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4301 if (Index == EndIndex)
4302 return nullptr; // Within a single element and its padding.
4303
4304 // Don't try to form "natural" types if the elements don't line up with the
4305 // expected size.
4306 // FIXME: We could potentially recurse down through the last element in the
4307 // sub-struct to find a natural end point.
4308 if (SL->getElementOffset(EndIndex) != EndOffset)
4309 return nullptr;
4310
4311 assert(Index < EndIndex);
4312 EE = STy->element_begin() + EndIndex;
4313 }
4314
4315 // Try to build up a sub-structure.
4316 StructType *SubTy =
4317 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4318 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4319 if (Size != SubSL->getSizeInBytes())
4320 return nullptr; // The sub-struct doesn't have quite the size needed.
4321
4322 return SubTy;
4323}
4324
4325/// Pre-split loads and stores to simplify rewriting.
4326///
4327/// We want to break up the splittable load+store pairs as much as
4328/// possible. This is important to do as a preprocessing step, as once we
4329/// start rewriting the accesses to partitions of the alloca we lose the
4330/// necessary information to correctly split apart paired loads and stores
4331/// which both point into this alloca. The case to consider is something like
4332/// the following:
4333///
4334/// %a = alloca [12 x i8]
4335/// %gep1 = getelementptr i8, ptr %a, i32 0
4336/// %gep2 = getelementptr i8, ptr %a, i32 4
4337/// %gep3 = getelementptr i8, ptr %a, i32 8
4338/// store float 0.0, ptr %gep1
4339/// store float 1.0, ptr %gep2
4340/// %v = load i64, ptr %gep1
4341/// store i64 %v, ptr %gep2
4342/// %f1 = load float, ptr %gep2
4343/// %f2 = load float, ptr %gep3
4344///
4345/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4346/// promote everything so we recover the 2 SSA values that should have been
4347/// there all along.
4348///
4349/// \returns true if any changes are made.
4350bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4351 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4352
4353 // Track the loads and stores which are candidates for pre-splitting here, in
4354 // the order they first appear during the partition scan. These give stable
4355 // iteration order and a basis for tracking which loads and stores we
4356 // actually split.
4359
4360 // We need to accumulate the splits required of each load or store where we
4361 // can find them via a direct lookup. This is important to cross-check loads
4362 // and stores against each other. We also track the slice so that we can kill
4363 // all the slices that end up split.
4364 struct SplitOffsets {
4365 Slice *S;
4366 std::vector<uint64_t> Splits;
4367 };
4369
4370 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4371 // This is important as we also cannot pre-split stores of those loads!
4372 // FIXME: This is all pretty gross. It means that we can be more aggressive
4373 // in pre-splitting when the load feeding the store happens to come from
4374 // a separate alloca. Put another way, the effectiveness of SROA would be
4375 // decreased by a frontend which just concatenated all of its local allocas
4376 // into one big flat alloca. But defeating such patterns is exactly the job
4377 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4378 // change store pre-splitting to actually force pre-splitting of the load
4379 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4380 // maybe it would make it more principled?
4381 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4382
4383 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4384 for (auto &P : AS.partitions()) {
4385 for (Slice &S : P) {
4386 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4387 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4388 // If this is a load we have to track that it can't participate in any
4389 // pre-splitting. If this is a store of a load we have to track that
4390 // that load also can't participate in any pre-splitting.
4391 if (auto *LI = dyn_cast<LoadInst>(I))
4392 UnsplittableLoads.insert(LI);
4393 else if (auto *SI = dyn_cast<StoreInst>(I))
4394 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4395 UnsplittableLoads.insert(LI);
4396 continue;
4397 }
4398 assert(P.endOffset() > S.beginOffset() &&
4399 "Empty or backwards partition!");
4400
4401 // Determine if this is a pre-splittable slice.
4402 if (auto *LI = dyn_cast<LoadInst>(I)) {
4403 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4404
4405 // The load must be used exclusively to store into other pointers for
4406 // us to be able to arbitrarily pre-split it. The stores must also be
4407 // simple to avoid changing semantics.
4408 auto IsLoadSimplyStored = [](LoadInst *LI) {
4409 for (User *LU : LI->users()) {
4410 auto *SI = dyn_cast<StoreInst>(LU);
4411 if (!SI || !SI->isSimple())
4412 return false;
4413 }
4414 return true;
4415 };
4416 if (!IsLoadSimplyStored(LI)) {
4417 UnsplittableLoads.insert(LI);
4418 continue;
4419 }
4420
4421 Loads.push_back(LI);
4422 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4423 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4424 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4425 continue;
4426 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4427 if (!StoredLoad || !StoredLoad->isSimple())
4428 continue;
4429 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4430
4431 Stores.push_back(SI);
4432 } else {
4433 // Other uses cannot be pre-split.
4434 continue;
4435 }
4436
4437 // Record the initial split.
4438 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4439 auto &Offsets = SplitOffsetsMap[I];
4440 assert(Offsets.Splits.empty() &&
4441 "Should not have splits the first time we see an instruction!");
4442 Offsets.S = &S;
4443 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4444 }
4445
4446 // Now scan the already split slices, and add a split for any of them which
4447 // we're going to pre-split.
4448 for (Slice *S : P.splitSliceTails()) {
4449 auto SplitOffsetsMapI =
4450 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4451 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4452 continue;
4453 auto &Offsets = SplitOffsetsMapI->second;
4454
4455 assert(Offsets.S == S && "Found a mismatched slice!");
4456 assert(!Offsets.Splits.empty() &&
4457 "Cannot have an empty set of splits on the second partition!");
4458 assert(Offsets.Splits.back() ==
4459 P.beginOffset() - Offsets.S->beginOffset() &&
4460 "Previous split does not end where this one begins!");
4461
4462 // Record each split. The last partition's end isn't needed as the size
4463 // of the slice dictates that.
4464 if (S->endOffset() > P.endOffset())
4465 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4466 }
4467 }
4468
4469 // We may have split loads where some of their stores are split stores. For
4470 // such loads and stores, we can only pre-split them if their splits exactly
4471 // match relative to their starting offset. We have to verify this prior to
4472 // any rewriting.
4473 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4474 // Lookup the load we are storing in our map of split
4475 // offsets.
4476 auto *LI = cast<LoadInst>(SI->getValueOperand());
4477 // If it was completely unsplittable, then we're done,
4478 // and this store can't be pre-split.
4479 if (UnsplittableLoads.count(LI))
4480 return true;
4481
4482 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4483 if (LoadOffsetsI == SplitOffsetsMap.end())
4484 return false; // Unrelated loads are definitely safe.
4485 auto &LoadOffsets = LoadOffsetsI->second;
4486
4487 // Now lookup the store's offsets.
4488 auto &StoreOffsets = SplitOffsetsMap[SI];
4489
4490 // If the relative offsets of each split in the load and
4491 // store match exactly, then we can split them and we
4492 // don't need to remove them here.
4493 if (LoadOffsets.Splits == StoreOffsets.Splits)
4494 return false;
4495
4496 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4497 << " " << *LI << "\n"
4498 << " " << *SI << "\n");
4499
4500 // We've found a store and load that we need to split
4501 // with mismatched relative splits. Just give up on them
4502 // and remove both instructions from our list of
4503 // candidates.
4504 UnsplittableLoads.insert(LI);
4505 return true;
4506 });
4507 // Now we have to go *back* through all the stores, because a later store may
4508 // have caused an earlier store's load to become unsplittable and if it is
4509 // unsplittable for the later store, then we can't rely on it being split in
4510 // the earlier store either.
4511 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4512 auto *LI = cast<LoadInst>(SI->getValueOperand());
4513 return UnsplittableLoads.count(LI);
4514 });
4515 // Once we've established all the loads that can't be split for some reason,
4516 // filter any that made it into our list out.
4517 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4518 return UnsplittableLoads.count(LI);
4519 });
4520
4521 // If no loads or stores are left, there is no pre-splitting to be done for
4522 // this alloca.
4523 if (Loads.empty() && Stores.empty())
4524 return false;
4525
4526 // From here on, we can't fail and will be building new accesses, so rig up
4527 // an IR builder.
4528 IRBuilderTy IRB(&AI);
4529
4530 // Collect the new slices which we will merge into the alloca slices.
4531 SmallVector<Slice, 4> NewSlices;
4532
4533 // Track any allocas we end up splitting loads and stores for so we iterate
4534 // on them.
4535 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4536
4537 // At this point, we have collected all of the loads and stores we can
4538 // pre-split, and the specific splits needed for them. We actually do the
4539 // splitting in a specific order in order to handle when one of the loads in
4540 // the value operand to one of the stores.
4541 //
4542 // First, we rewrite all of the split loads, and just accumulate each split
4543 // load in a parallel structure. We also build the slices for them and append
4544 // them to the alloca slices.
4546 std::vector<LoadInst *> SplitLoads;
4547 const DataLayout &DL = AI.getModule()->getDataLayout();
4548 for (LoadInst *LI : Loads) {
4549 SplitLoads.clear();
4550
4551 auto &Offsets = SplitOffsetsMap[LI];
4552 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4553 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4554 "Load must have type size equal to store size");
4555 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4556 "Load must be >= slice size");
4557
4558 uint64_t BaseOffset = Offsets.S->beginOffset();
4559 assert(BaseOffset + SliceSize > BaseOffset &&
4560 "Cannot represent alloca access size using 64-bit integers!");
4561
4562 Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
4563 IRB.SetInsertPoint(LI);
4564
4565 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4566
4567 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4568 int Idx = 0, Size = Offsets.Splits.size();
4569 for (;;) {
4570 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4571 auto AS = LI->getPointerAddressSpace();
4572 auto *PartPtrTy = LI->getPointerOperandType();
4573 LoadInst *PLoad = IRB.CreateAlignedLoad(
4574 PartTy,
4575 getAdjustedPtr(IRB, DL, BasePtr,
4576 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4577 PartPtrTy, BasePtr->getName() + "."),
4578 getAdjustedAlignment(LI, PartOffset),
4579 /*IsVolatile*/ false, LI->getName());
4580 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4581 LLVMContext::MD_access_group});
4582
4583 // Append this load onto the list of split loads so we can find it later
4584 // to rewrite the stores.
4585 SplitLoads.push_back(PLoad);
4586
4587 // Now build a new slice for the alloca.
4588 NewSlices.push_back(
4589 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4590 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4591 /*IsSplittable*/ false));
4592 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4593 << ", " << NewSlices.back().endOffset()
4594 << "): " << *PLoad << "\n");
4595
4596 // See if we've handled all the splits.
4597 if (Idx >= Size)
4598 break;
4599
4600 // Setup the next partition.
4601 PartOffset = Offsets.Splits[Idx];
4602 ++Idx;
4603 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4604 }
4605
4606 // Now that we have the split loads, do the slow walk over all uses of the
4607 // load and rewrite them as split stores, or save the split loads to use
4608 // below if the store is going to be split there anyways.
4609 bool DeferredStores = false;
4610 for (User *LU : LI->users()) {
4611 StoreInst *SI = cast<StoreInst>(LU);
4612 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4613 DeferredStores = true;
4614 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
4615 << "\n");
4616 continue;
4617 }
4618
4619 Value *StoreBasePtr = SI->getPointerOperand();
4620 IRB.SetInsertPoint(SI);
4621 AAMDNodes AATags = SI->getAAMetadata();
4622
4623 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
4624
4625 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4626 LoadInst *PLoad = SplitLoads[Idx];
4627 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4628 auto *PartPtrTy = SI->getPointerOperandType();
4629
4630 auto AS = SI->getPointerAddressSpace();
4631 StoreInst *PStore = IRB.CreateAlignedStore(
4632 PLoad,
4633 getAdjustedPtr(IRB, DL, StoreBasePtr,
4634 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4635 PartPtrTy, StoreBasePtr->getName() + "."),
4636 getAdjustedAlignment(SI, PartOffset),
4637 /*IsVolatile*/ false);
4638 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4639 LLVMContext::MD_access_group,
4640 LLVMContext::MD_DIAssignID});
4641
4642 if (AATags)
4643 PStore->setAAMetadata(
4644 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
4645 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
4646 }
4647
4648 // We want to immediately iterate on any allocas impacted by splitting
4649 // this store, and we have to track any promotable alloca (indicated by
4650 // a direct store) as needing to be resplit because it is no longer
4651 // promotable.
4652 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4653 ResplitPromotableAllocas.insert(OtherAI);
4654 Worklist.insert(OtherAI);
4655 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4656 StoreBasePtr->stripInBoundsOffsets())) {
4657 Worklist.insert(OtherAI);
4658 }
4659
4660 // Mark the original store as dead.
4661 DeadInsts.push_back(SI);
4662 }
4663
4664 // Save the split loads if there are deferred stores among the users.
4665 if (DeferredStores)
4666 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
4667
4668 // Mark the original load as dead and kill the original slice.
4669 DeadInsts.push_back(LI);
4670 Offsets.S->kill();
4671 }
4672
4673 // Second, we rewrite all of the split stores. At this point, we know that
4674 // all loads from this alloca have been split already. For stores of such
4675 // loads, we can simply look up the pre-existing split loads. For stores of
4676 // other loads, we split those loads first and then write split stores of
4677 // them.
4678 for (StoreInst *SI : Stores) {
4679 auto *LI = cast<LoadInst>(SI->getValueOperand());
4680 IntegerType *Ty = cast<IntegerType>(LI->getType());
4681 assert(Ty->getBitWidth() % 8 == 0);
4682 uint64_t StoreSize = Ty->getBitWidth() / 8;
4683 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
4684
4685 auto &Offsets = SplitOffsetsMap[SI];
4686 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
4687 "Slice size should always match load size exactly!");
4688 uint64_t BaseOffset = Offsets.S->beginOffset();
4689 assert(BaseOffset + StoreSize > BaseOffset &&
4690 "Cannot represent alloca access size using 64-bit integers!");
4691
4692 Value *LoadBasePtr = LI->getPointerOperand();
4693 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
4694
4695 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
4696
4697 // Check whether we have an already split load.
4698 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
4699 std::vector<LoadInst *> *SplitLoads = nullptr;
4700 if (SplitLoadsMapI != SplitLoadsMap.end()) {
4701 SplitLoads = &SplitLoadsMapI->second;
4702 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
4703 "Too few split loads for the number of splits in the store!");
4704 } else {
4705 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
4706 }
4707
4708 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4709 int Idx = 0, Size = Offsets.Splits.size();
4710 for (;;) {
4711 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
4712 auto *LoadPartPtrTy = LI->getPointerOperandType();
4713 auto *StorePartPtrTy = SI->getPointerOperandType();
4714
4715 // Either lookup a split load or create one.
4716 LoadInst *PLoad;
4717 if (SplitLoads) {
4718 PLoad = (*SplitLoads)[Idx];
4719 } else {
4720 IRB.SetInsertPoint(LI);
4721 auto AS = LI->getPointerAddressSpace();
4722 PLoad = IRB.CreateAlignedLoad(
4723 PartTy,
4724 getAdjustedPtr(IRB, DL, LoadBasePtr,
4725 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4726 LoadPartPtrTy, LoadBasePtr->getName() + "."),
4727 getAdjustedAlignment(LI, PartOffset),
4728 /*IsVolatile*/ false, LI->getName());
4729 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4730 LLVMContext::MD_access_group});
4731 }
4732
4733 // And store this partition.
4734 IRB.SetInsertPoint(SI);
4735 auto AS = SI->getPointerAddressSpace();
4736 StoreInst *PStore = IRB.CreateAlignedStore(
4737 PLoad,
4738 getAdjustedPtr(IRB, DL, StoreBasePtr,
4739 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4740 StorePartPtrTy, StoreBasePtr->getName() + "."),
4741 getAdjustedAlignment(SI, PartOffset),
4742 /*IsVolatile*/ false);
4743 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4744 LLVMContext::MD_access_group});
4745
4746 // Now build a new slice for the alloca.
4747 NewSlices.push_back(
4748 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4749 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
4750 /*IsSplittable*/ false));
4751 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4752 << ", " << NewSlices.back().endOffset()
4753 << "): " << *PStore << "\n");
4754 if (!SplitLoads) {
4755 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
4756 }
4757
4758 // See if we've finished all the splits.
4759 if (Idx >= Size)
4760 break;
4761
4762 // Setup the next partition.
4763 PartOffset = Offsets.Splits[Idx];
4764 ++Idx;
4765 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
4766 }
4767
4768 // We want to immediately iterate on any allocas impacted by splitting
4769 // this load, which is only relevant if it isn't a load of this alloca and
4770 // thus we didn't already split the loads above. We also have to keep track
4771 // of any promotable allocas we split loads on as they can no longer be
4772 // promoted.
4773 if (!SplitLoads) {
4774 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4775 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4776 ResplitPromotableAllocas.insert(OtherAI);
4777 Worklist.insert(OtherAI);
4778 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4779 LoadBasePtr->stripInBoundsOffsets())) {
4780 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4781 Worklist.insert(OtherAI);
4782 }
4783 }
4784
4785 // Mark the original store as dead now that we've split it up and kill its
4786 // slice. Note that we leave the original load in place unless this store
4787 // was its only use. It may in turn be split up if it is an alloca load
4788 // for some other alloca, but it may be a normal load. This may introduce
4789 // redundant loads, but where those can be merged the rest of the optimizer
4790 // should handle the merging, and this uncovers SSA splits which is more
4791 // important. In practice, the original loads will almost always be fully
4792 // split and removed eventually, and the splits will be merged by any
4793 // trivial CSE, including instcombine.
4794 if (LI->hasOneUse()) {
4795 assert(*LI->user_begin() == SI && "Single use isn't this store!");
4796 DeadInsts.push_back(LI);
4797 }
4798 DeadInsts.push_back(SI);
4799 Offsets.S->kill();
4800 }
4801
4802 // Remove the killed slices that have ben pre-split.
4803 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
4804
4805 // Insert our new slices. This will sort and merge them into the sorted
4806 // sequence.
4807 AS.insert(NewSlices);
4808
4809 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
4810#ifndef NDEBUG
4811 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4812 LLVM_DEBUG(AS.print(dbgs(), I, " "));
4813#endif
4814
4815 // Finally, don't try to promote any allocas that new require re-splitting.
4816 // They have already been added to the worklist above.
4817 llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
4818 return ResplitPromotableAllocas.count(AI);
4819 });
4820
4821 return true;
4822}
4823
4824/// Rewrite an alloca partition's users.
4825///
4826/// This routine drives both of the rewriting goals of the SROA pass. It tries
4827/// to rewrite uses of an alloca partition to be conducive for SSA value
4828/// promotion. If the partition needs a new, more refined alloca, this will
4829/// build that new alloca, preserving as much type information as possible, and
4830/// rewrite the uses of the old alloca to point at the new one and have the
4831/// appropriate new offsets. It also evaluates how successful the rewrite was
4832/// at enabling promotion and if it was successful queues the alloca to be
4833/// promoted.
4834AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4835 Partition &P) {
4836 // Try to compute a friendly type for this partition of the alloca. This
4837 // won't always succeed, in which case we fall back to a legal integer type
4838 // or an i8 array of an appropriate size.
4839 Type *SliceTy = nullptr;
4840 VectorType *SliceVecTy = nullptr;
4841 const DataLayout &DL = AI.getModule()->getDataLayout();
4842 std::pair<Type *, IntegerType *> CommonUseTy =
4843 findCommonType(P.begin(), P.end(), P.endOffset());
4844 // Do all uses operate on the same type?
4845 if (CommonUseTy.first)
4846 if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) {
4847 SliceTy = CommonUseTy.first;
4848 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4849 }
4850 // If not, can we find an appropriate subtype in the original allocated type?
4851 if (!SliceTy)
4852 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4853 P.beginOffset(), P.size()))
4854 SliceTy = TypePartitionTy;
4855
4856 // If still not, can we use the largest bitwidth integer type used?
4857 if (!SliceTy && CommonUseTy.second)
4858 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) {
4859 SliceTy = CommonUseTy.second;
4860 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4861 }
4862 if ((!SliceTy || (SliceTy->isArrayTy() &&
4863 SliceTy->getArrayElementType()->isIntegerTy())) &&
4864 DL.isLegalInteger(P.size() * 8)) {
4865 SliceTy = Type::getIntNTy(*C, P.size() * 8);
4866 }
4867
4868 // If the common use types are not viable for promotion then attempt to find
4869 // another type that is viable.
4870 if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL))
4871 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4872 P.beginOffset(), P.size())) {
4873 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4874 if (TypePartitionVecTy &&
4875 checkVectorTypeForPromotion(P, TypePartitionVecTy, DL))
4876 SliceTy = TypePartitionTy;
4877 }
4878
4879 if (!SliceTy)
4880 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4881 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
4882
4883 bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4884
4885 VectorType *VecTy =
4886 IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4887 if (VecTy)
4888 SliceTy = VecTy;
4889
4890 // Check for the case where we're going to rewrite to a new alloca of the
4891 // exact same type as the original, and with the same access offsets. In that
4892 // case, re-use the existing alloca, but still run through the rewriter to
4893 // perform phi and select speculation.
4894 // P.beginOffset() can be non-zero even with the same type in a case with
4895 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
4896 AllocaInst *NewAI;
4897 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
4898 NewAI = &AI;
4899 // FIXME: We should be able to bail at this point with "nothing changed".
4900 // FIXME: We might want to defer PHI speculation until after here.
4901 // FIXME: return nullptr;
4902 } else {
4903 // Make sure the alignment is compatible with P.beginOffset().
4904 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
4905 // If we will get at least this much alignment from the type alone, leave
4906 // the alloca's alignment unconstrained.
4907 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
4908 NewAI = new AllocaInst(
4909 SliceTy, AI.getAddressSpace(), nullptr,
4910 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
4911 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
4912 AI.getIterator());
4913 // Copy the old AI debug location over to the new one.
4914 NewAI->setDebugLoc(AI.getDebugLoc());
4915 ++NumNewAllocas;
4916 }
4917
4918 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
4919 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
4920
4921 // Track the high watermark on the worklist as it is only relevant for
4922 // promoted allocas. We will reset it to this point if the alloca is not in
4923 // fact scheduled for promotion.
4924 unsigned PPWOldSize = PostPromotionWorklist.size();
4925 unsigned NumUses = 0;
4928
4929 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4930 P.endOffset(), IsIntegerPromotable, VecTy,
4931 PHIUsers, SelectUsers);
4932 bool Promotable = true;
4933 for (Slice *S : P.splitSliceTails()) {
4934 Promotable &= Rewriter.visit(S);
4935 ++NumUses;
4936 }
4937 for (Slice &S : P) {
4938 Promotable &= Rewriter.visit(&S);
4939 ++NumUses;
4940 }
4941
4942 NumAllocaPartitionUses += NumUses;
4943 MaxUsesPerAllocaPartition.updateMax(NumUses);
4944
4945 // Now that we've processed all the slices in the new partition, check if any
4946 // PHIs or Selects would block promotion.
4947 for (PHINode *PHI : PHIUsers)
4948 if (!isSafePHIToSpeculate(*PHI)) {
4949 Promotable = false;
4950 PHIUsers.clear();
4951 SelectUsers.clear();
4952 break;
4953 }
4954
4956 NewSelectsToRewrite;
4957 NewSelectsToRewrite.reserve(SelectUsers.size());
4958 for (SelectInst *Sel : SelectUsers) {
4959 std::optional<RewriteableMemOps> Ops =
4960 isSafeSelectToSpeculate(*Sel, PreserveCFG);
4961 if (!Ops) {
4962 Promotable = false;
4963 PHIUsers.clear();
4964 SelectUsers.clear();
4965 NewSelectsToRewrite.clear();
4966 break;
4967 }
4968 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
4969 }
4970
4971 if (Promotable) {
4972 for (Use *U : AS.getDeadUsesIfPromotable()) {
4973 auto *OldInst = dyn_cast<Instruction>(U->get());
4975 if (OldInst)
4976 if (isInstructionTriviallyDead(OldInst))
4977 DeadInsts.push_back(OldInst);
4978 }
4979 if (PHIUsers.empty() && SelectUsers.empty()) {
4980 // Promote the alloca.
4981 PromotableAllocas.push_back(NewAI);
4982 } else {
4983 // If we have either PHIs or Selects to speculate, add them to those
4984 // worklists and re-queue the new alloca so that we promote in on the
4985 // next iteration.
4986 for (PHINode *PHIUser : PHIUsers)
4987 SpeculatablePHIs.insert(PHIUser);
4988 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4989 NewSelectsToRewrite.size());
4990 for (auto &&KV : llvm::make_range(
4991 std::make_move_iterator(NewSelectsToRewrite.begin()),
4992 std::make_move_iterator(NewSelectsToRewrite.end())))
4993 SelectsToRewrite.insert(std::move(KV));
4994 Worklist.insert(NewAI);
4995 }
4996 } else {
4997 // Drop any post-promotion work items if promotion didn't happen.
4998 while (PostPromotionWorklist.size() > PPWOldSize)
4999 PostPromotionWorklist.pop_back();
5000
5001 // We couldn't promote and we didn't create a new partition, nothing
5002 // happened.
5003 if (NewAI == &AI)
5004 return nullptr;
5005
5006 // If we can't promote the alloca, iterate on it to check for new
5007 // refinements exposed by splitting the current alloca. Don't iterate on an
5008 // alloca which didn't actually change and didn't get promoted.
5009 Worklist.insert(NewAI);
5010 }
5011
5012 return NewAI;
5013}
5014
5016 AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
5017 Instruction *BeforeInst) {
5018 DIB.insertDeclare(NewAddr, Orig->getVariable(), NewFragmentExpr,
5019 Orig->getDebugLoc(), BeforeInst);
5020}
5022 AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
5023 Instruction *BeforeInst) {
5024 (void)BeforeInst;
5025 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5026 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5028 }
5029 Instruction *NewAssign =
5030 DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(),
5031 NewFragmentExpr, NewAddr,
5032 Orig->getAddressExpression(), Orig->getDebugLoc())
5033 .get<Instruction *>();
5034 LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
5035 (void)NewAssign;
5036}
5037static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr,
5038 DIExpression *NewFragmentExpr,
5039 Instruction *BeforeInst) {
5040 (void)DIB;
5041 if (Orig->isDbgDeclare()) {
5043 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5044 BeforeInst->getParent()->insertDbgRecordBefore(DPV,
5045 BeforeInst->getIterator());
5046 return;
5047 }
5048 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5049 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5051 }
5053 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5054 Orig->getAddressExpression(), Orig->getDebugLoc());
5055 LLVM_DEBUG(dbgs() << "Created new DPVAssign: " << *NewAssign << "\n");
5056 (void)NewAssign;
5057}
5058
5059/// Walks the slices of an alloca and form partitions based on them,
5060/// rewriting each of their uses.
5061bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5062 if (AS.begin() == AS.end())
5063 return false;
5064
5065 unsigned NumPartitions = 0;
5066 bool Changed = false;
5067 const DataLayout &DL = AI.getModule()->getDataLayout();
5068
5069 // First try to pre-split loads and stores.
5070 Changed |= presplitLoadsAndStores(AI, AS);
5071
5072 // Now that we have identified any pre-splitting opportunities,
5073 // mark loads and stores unsplittable except for the following case.
5074 // We leave a slice splittable if all other slices are disjoint or fully
5075 // included in the slice, such as whole-alloca loads and stores.
5076 // If we fail to split these during pre-splitting, we want to force them
5077 // to be rewritten into a partition.
5078 bool IsSorted = true;
5079
5080 uint64_t AllocaSize =
5081 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5082 const uint64_t MaxBitVectorSize = 1024;
5083 if (AllocaSize <= MaxBitVectorSize) {
5084 // If a byte boundary is included in any load or store, a slice starting or
5085 // ending at the boundary is not splittable.
5086 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5087 for (Slice &S : AS)
5088 for (unsigned O = S.beginOffset() + 1;
5089 O < S.endOffset() && O < AllocaSize; O++)
5090 SplittableOffset.reset(O);
5091
5092 for (Slice &S : AS) {
5093 if (!S.isSplittable())
5094 continue;
5095
5096 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5097 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5098 continue;
5099
5100 if (isa<LoadInst>(S.getUse()->getUser()) ||
5101 isa<StoreInst>(S.getUse()->getUser())) {
5102 S.makeUnsplittable();
5103 IsSorted = false;
5104 }
5105 }
5106 } else {
5107 // We only allow whole-alloca splittable loads and stores
5108 // for a large alloca to avoid creating too large BitVector.
5109 for (Slice &S : AS) {
5110 if (!S.isSplittable())
5111 continue;
5112
5113 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5114 continue;
5115
5116 if (isa<LoadInst>(S.getUse()->getUser()) ||
5117 isa<StoreInst>(S.getUse()->getUser())) {
5118 S.makeUnsplittable();
5119 IsSorted = false;
5120 }
5121 }
5122 }
5123
5124 if (!IsSorted)
5125 llvm::sort(AS);
5126
5127 /// Describes the allocas introduced by rewritePartition in order to migrate
5128 /// the debug info.
5129 struct Fragment {
5130 AllocaInst *Alloca;
5132 uint64_t Size;
5133 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5134 : Alloca(AI), Offset(O), Size(S) {}
5135 };
5136 SmallVector<Fragment, 4> Fragments;
5137
5138 // Rewrite each partition.
5139 for (auto &P : AS.partitions()) {
5140 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5141 Changed = true;
5142 if (NewAI != &AI) {
5143 uint64_t SizeOfByte = 8;
5144 uint64_t AllocaSize =
5145 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5146 // Don't include any padding.
5147 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5148 Fragments.push_back(
5149 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5150 }
5151 }
5152 ++NumPartitions;
5153 }
5154
5155 NumAllocaPartitions += NumPartitions;
5156 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5157
5158 // Migrate debug information from the old alloca to the new alloca(s)
5159 // and the individual partitions.
5160 auto MigrateOne = [&](auto *DbgVariable) {
5161 auto *Expr = DbgVariable->getExpression();
5162 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5163 uint64_t AllocaSize =
5164 DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedValue();
5165 for (auto Fragment : Fragments) {
5166 // Create a fragment expression describing the new partition or reuse AI's
5167 // expression if there is only one partition.
5168 auto *FragmentExpr = Expr;
5169 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
5170 // If this alloca is already a scalar replacement of a larger aggregate,
5171 // Fragment.Offset describes the offset inside the scalar.
5172 auto ExprFragment = Expr->getFragmentInfo();
5173 uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
5174 uint64_t Start = Offset + Fragment.Offset;
5175 uint64_t Size = Fragment.Size;
5176 if (ExprFragment) {
5177 uint64_t AbsEnd =
5178 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
5179 if (Start >= AbsEnd) {
5180 // No need to describe a SROAed padding.
5181 continue;
5182 }
5183 Size = std::min(Size, AbsEnd - Start);
5184 }
5185 // The new, smaller fragment is stenciled out from the old fragment.
5186 if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
5187 assert(Start >= OrigFragment->OffsetInBits &&
5188 "new fragment is outside of original fragment");
5189 Start -= OrigFragment->OffsetInBits;
5190 }
5191
5192 // The alloca may be larger than the variable.
5193 auto VarSize = DbgVariable->getVariable()->getSizeInBits();
5194 if (VarSize) {
5195 if (Size > *VarSize)
5196 Size = *VarSize;
5197 if (Size == 0 || Start + Size > *VarSize)
5198 continue;
5199 }
5200
5201 // Avoid creating a fragment expression that covers the entire variable.
5202 if (!VarSize || *VarSize != Size) {
5203 if (auto E =
5205 FragmentExpr = *E;
5206 else
5207 continue;
5208 }
5209 }
5210
5211 // Remove any existing intrinsics on the new alloca describing
5212 // the variable fragment.
5213 auto RemoveOne = [DbgVariable](auto *OldDII) {
5214 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5215 return LHS->getVariable() == RHS->getVariable() &&
5216 LHS->getDebugLoc()->getInlinedAt() ==
5217 RHS->getDebugLoc()->getInlinedAt();
5218 };
5219 if (SameVariableFragment(OldDII, DbgVariable))
5220 OldDII->eraseFromParent();
5221 };
5222 for_each(findDbgDeclares(Fragment.Alloca), RemoveOne);
5223 for_each(findDPVDeclares(Fragment.Alloca), RemoveOne);
5224
5225 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, FragmentExpr, &AI);
5226 }
5227 };
5228
5229 // Migrate debug information from the old alloca to the new alloca(s)
5230 // and the individual partitions.
5231 for_each(findDbgDeclares(&AI), MigrateOne);
5232 for_each(findDPVDeclares(&AI), MigrateOne);
5233 for_each(at::getAssignmentMarkers(&AI), MigrateOne);
5234 for_each(at::getDPVAssignmentMarkers(&AI), MigrateOne);
5235
5236 return Changed;
5237}
5238
5239/// Clobber a use with poison, deleting the used value if it becomes dead.
5240void SROA::clobberUse(Use &U) {
5241 Value *OldV = U;
5242 // Replace the use with an poison value.
5243 U = PoisonValue::get(OldV->getType());
5244
5245 // Check for this making an instruction dead. We have to garbage collect
5246 // all the dead instructions to ensure the uses of any alloca end up being
5247 // minimal.
5248 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5249 if (isInstructionTriviallyDead(OldI)) {
5250 DeadInsts.push_back(OldI);
5251 }
5252}
5253
5254/// Analyze an alloca for SROA.
5255///
5256/// This analyzes the alloca to ensure we can reason about it, builds
5257/// the slices of the alloca, and then hands it off to be split and
5258/// rewritten as needed.
5259std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5260SROA::runOnAlloca(AllocaInst &AI) {
5261 bool Changed = false;
5262 bool CFGChanged = false;
5263
5264 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5265 ++NumAllocasAnalyzed;
5266
5267 // Special case dead allocas, as they're trivial.
5268 if (AI.use_empty()) {
5269 AI.eraseFromParent();
5270 Changed = true;
5271 return {Changed, CFGChanged};
5272 }
5273 const DataLayout &DL = AI.getModule()->getDataLayout();
5274
5275 // Skip alloca forms that this analysis can't handle.
5276 auto *AT = AI.getAllocatedType();
5277 TypeSize Size = DL.getTypeAllocSize(AT);
5278 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5279 Size.getFixedValue() == 0)
5280 return {Changed, CFGChanged};
5281
5282 // First, split any FCA loads and stores touching this alloca to promote
5283 // better splitting and promotion opportunities.
5284 IRBuilderTy IRB(&AI);
5285 AggLoadStoreRewriter AggRewriter(DL, IRB);
5286 Changed |= AggRewriter.rewrite(AI);
5287
5288 // Build the slices using a recursive instruction-visiting builder.
5289 AllocaSlices AS(DL, AI);
5290 LLVM_DEBUG(AS.print(dbgs()));
5291 if (AS.isEscaped())
5292 return {Changed, CFGChanged};
5293
5294 // Delete all the dead users of this alloca before splitting and rewriting it.
5295 for (Instruction *DeadUser : AS.getDeadUsers()) {
5296 // Free up everything used by this instruction.
5297 for (Use &DeadOp : DeadUser->operands())
5298 clobberUse(DeadOp);
5299
5300 // Now replace the uses of this instruction.
5301 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5302
5303 // And mark it for deletion.
5304 DeadInsts.push_back(DeadUser);
5305 Changed = true;
5306 }
5307 for (Use *DeadOp : AS.getDeadOperands()) {
5308 clobberUse(*DeadOp);
5309 Changed = true;
5310 }
5311
5312 // No slices to split. Leave the dead alloca for a later pass to clean up.
5313 if (AS.begin() == AS.end())
5314 return {Changed, CFGChanged};
5315
5316 Changed |= splitAlloca(AI, AS);
5317
5318 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
5319 while (!SpeculatablePHIs.empty())
5320 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
5321
5322 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
5323 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5324 while (!RemainingSelectsToRewrite.empty()) {
5325 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5326 CFGChanged |=
5327 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
5328 }
5329
5330 return {Changed, CFGChanged};
5331}
5332
5333/// Delete the dead instructions accumulated in this run.
5334///
5335/// Recursively deletes the dead instructions we've accumulated. This is done
5336/// at the very end to maximize locality of the recursive delete and to
5337/// minimize the problems of invalidated instruction pointers as such pointers
5338/// are used heavily in the intermediate stages of the algorithm.
5339///
5340/// We also record the alloca instructions deleted here so that they aren't
5341/// subsequently handed to mem2reg to promote.
5342bool SROA::deleteDeadInstructions(
5343 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5344 bool Changed = false;
5345 while (!DeadInsts.empty()) {
5346 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
5347 if (!I)
5348 continue;
5349 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
5350
5351 // If the instruction is an alloca, find the possible dbg.declare connected
5352 // to it, and remove it too. We must do this before calling RAUW or we will
5353 // not be able to find it.
5354 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5355 DeletedAllocas.insert(AI);
5356 for (DbgDeclareInst *OldDII : findDbgDeclares(AI))
5357 OldDII->eraseFromParent();
5358 for (DPValue *OldDII : findDPVDeclares(AI))
5359 OldDII->eraseFromParent();
5360 }
5361
5363 I->replaceAllUsesWith(UndefValue::get(I->getType()));
5364
5365 for (Use &Operand : I->operands())
5366 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
5367 // Zero out the operand and see if it becomes trivially dead.
5368 Operand = nullptr;
5370 DeadInsts.push_back(U);
5371 }
5372
5373 ++NumDeleted;
5374 I->eraseFromParent();
5375 Changed = true;
5376 }
5377 return Changed;
5378}
5379
5380/// Promote the allocas, using the best available technique.
5381///
5382/// This attempts to promote whatever allocas have been identified as viable in
5383/// the PromotableAllocas list. If that list is empty, there is nothing to do.
5384/// This function returns whether any promotion occurred.
5385bool SROA::promoteAllocas(Function &F) {
5386 if (PromotableAllocas.empty())
5387 return false;
5388
5389 NumPromoted += PromotableAllocas.size();
5390
5391 if (SROASkipMem2Reg) {
5392 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
5393 } else {
5394 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
5395 PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC);
5396 }
5397
5398 PromotableAllocas.clear();
5399 return true;
5400}
5401
5402std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
5403 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
5404
5405 const DataLayout &DL = F.getParent()->getDataLayout();
5406 BasicBlock &EntryBB = F.getEntryBlock();
5407 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
5408 I != E; ++I) {
5409 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5410 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5412 PromotableAllocas.push_back(AI);
5413 else
5414 Worklist.insert(AI);
5415 }
5416 }
5417
5418 bool Changed = false;
5419 bool CFGChanged = false;
5420 // A set of deleted alloca instruction pointers which should be removed from
5421 // the list of promotable allocas.
5422 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
5423
5424 do {
5425 while (!Worklist.empty()) {
5426 auto [IterationChanged, IterationCFGChanged] =
5427 runOnAlloca(*Worklist.pop_back_val());
5428 Changed |= IterationChanged;
5429 CFGChanged |= IterationCFGChanged;
5430
5431 Changed |= deleteDeadInstructions(DeletedAllocas);
5432
5433 // Remove the deleted allocas from various lists so that we don't try to
5434 // continue processing them.
5435 if (!DeletedAllocas.empty()) {
5436 auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
5437 Worklist.remove_if(IsInSet);
5438 PostPromotionWorklist.remove_if(IsInSet);
5439 llvm::erase_if(PromotableAllocas, IsInSet);
5440 DeletedAllocas.clear();
5441 }
5442 }
5443
5444 Changed |= promoteAllocas(F);
5445
5446 Worklist = PostPromotionWorklist;
5447 PostPromotionWorklist.clear();
5448 } while (!Worklist.empty());
5449
5450 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
5451 assert((!CFGChanged || !PreserveCFG) &&
5452 "Should not have modified the CFG when told to preserve it.");
5453
5454 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
5455 for (auto &BB : F) {
5457 }
5458 }
5459
5460 return {Changed, CFGChanged};
5461}
5462
5466 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5467 auto [Changed, CFGChanged] =
5468 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5469 if (!Changed)
5470 return PreservedAnalyses::all();
5472 if (!CFGChanged)
5475 return PA;
5476}
5477
5479 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5480 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
5481 OS, MapClassName2PassName);
5482 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
5483 : "<modify-cfg>");
5484}
5485
5487
5488namespace {
5489
5490/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
5491class SROALegacyPass : public FunctionPass {
5493
5494public:
5495 static char ID;
5496
5497 SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG)
5500 }
5501
5502 bool runOnFunction(Function &F) override {
5503 if (skipFunction(F))
5504 return false;
5505
5506 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5507 AssumptionCache &AC =
5508 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5509 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5510 auto [Changed, _] =
5511 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5512 return Changed;
5513 }
5514
5515 void getAnalysisUsage(AnalysisUsage &AU) const override {
5520 }
5521
5522 StringRef getPassName() const override { return "SROA"; }
5523};
5524
5525} // end anonymous namespace
5526
5527char SROALegacyPass::ID = 0;
5528
5530 return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG
5532}
5533
5534INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
5535 "Scalar Replacement Of Aggregates", false, false)
5538INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
Rewrite Partial Register Uses
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
#define P(N)
if(VerifyEach)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
static bool rewrite(Function &F)
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition: SROA.cpp:353
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1939
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition: SROA.cpp:1506
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:4179
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1949
static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI)
Definition: SROA.cpp:318
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy)
Definition: SROA.cpp:2255
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition: SROA.cpp:272
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2539
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition: SROA.cpp:1928
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:2380
Scalar Replacement Of Aggregates
Definition: SROA.cpp:5538
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2572
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:1005
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition: SROA.cpp:1894
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition: SROA.cpp:1827
sroa
Definition: SROA.cpp:5538
static Value * foldSelectInst(SelectInst &SI)
Definition: SROA.cpp:992
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL)
Test whether a vector type is viable for promotion.
Definition: SROA.cpp:2139
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2594
DPValue * UnwrapDbgInstPtr(DbgInstPtr P, DPValue *Unused)
Helpers for handling new and old debug info modes in migrateDebugInfo.
Definition: SROA.cpp:330
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition: SROA.cpp:2473
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition: SROA.cpp:1646
static void insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig, AllocaInst *NewAddr, DIExpression *NewFragmentExpr, Instruction *BeforeInst)
Definition: SROA.cpp:5015
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition: SROA.cpp:1572
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2514
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:2065
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition: SROA.cpp:1789
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:4217
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition: SROA.cpp:1727
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:2011
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition: SROA.cpp:2299
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy)
Test whether any vector type in CandidateTys is viable for promotion.
Definition: SROA.cpp:2168
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This defines the Use class.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition: SROA.cpp:1017
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:1033
An iterator over partitions of the alloca's slices.
Definition: SROA.cpp:805
bool operator==(const partition_iterator &RHS) const
Definition: SROA.cpp:952
partition_iterator & operator++()
Definition: SROA.cpp:972
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:107
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:125
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:112
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
void insertDbgRecordBefore(DbgRecord *DPV, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
This class represents a function call, abstracting a target machine's calling convention.
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
Definition: DIBuilder.cpp:944
DWARF expression.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
std::optional< uint64_t > getSizeInBits() const
Determines the size of the variable's type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Value * getValue(unsigned OpIdx=0) const
static DPValue * createDPVDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static DPValue * createLinkedDPVAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
DILocalVariable * getVariable() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type.
Definition: DataLayout.h:492
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:720
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
This represents the llvm.dbg.assign instruction.
DIExpression * getAddressExpression() const
This represents the llvm.dbg.declare instruction.
DebugLoc getDebugLoc() const
Value * getValue(unsigned OpIdx=0) const
This is the common base class for debug info intrinsics for variables.
DILocalVariable * getVariable() const
This class is used to track local variable information.
Definition: DwarfDebug.h:214
const DILocalVariable * getVariable() const
Definition: DwarfDebug.h:246
DILocation * getInlinedAt() const
Definition: DebugLoc.cpp:39
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition: Operator.cpp:97
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition: IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
Definition: IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Base class for instruction visitors.
Definition: InstVisitor.h:78
bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1718
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:340
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:148
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:358
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1704
bool isLaunderOrStripInvariantGroup() const LLVM_READONLY
Return true if the instruction is a llvm.launder.invariant.group or llvm.strip.invariant....
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:52
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
void setAlignment(Align Align)
Definition: Instructions.h:240
Value * getPointerOperand()
Definition: Instructions.h:280
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:230
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
Type * getPointerOperandType() const
Definition: Instructions.h:283
static unsigned getPointerOperandIndex()
Definition: Instructions.h:282
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
const Use & getRawDestUse() const
Value * getLength() const
Value * getRawDest() const
void setLength(Value *L)
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
void setDestAlignment(MaybeAlign Alignment)
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
MaybeAlign getDestAlign() const
unsigned getDestAddressSpace() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
void setSource(Value *Ptr)
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getSourceAddressSpace() const
MaybeAlign getSourceAlign() const
void setSourceAlignment(MaybeAlign Alignment)
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
PointerIntPair - This class implements a pair of a pointer and small integer.
void setPointer(PointerTy PtrVal) &
IntType getInt() const
void setInt(IntType IntVal) &
PointerTy getPointer() const
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
A base class for visitors over the uses of a pointer value.
void visitGetElementPtrInst(GetElementPtrInst &GEPI)
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitBitCastInst(BitCastInst &BC)
void visitIntrinsicInst(IntrinsicInst &II)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: SROA.cpp:5463
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SROA.cpp:5478
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition: SROA.cpp:5486
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getTrueValue() const
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:591
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setAlignment(Align Align)
Definition: Instructions.h:373
Value * getValueOperand()
Definition: Instructions.h:414
static unsigned getPointerOperandIndex()
Definition: Instructions.h:419
Value * getPointerOperand()
Definition: Instructions.h:417
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:567
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:343
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:293
static constexpr size_t npos
Definition: StringRef.h:52
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:251
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getSizeInBytes() const
Definition: DataLayout.h:629
unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
Definition: DataLayout.cpp:92
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
TypeSize getSizeInBits() const
Definition: DataLayout.h:631
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
element_iterator element_end() const
Definition: DerivedTypes.h:332
element_iterator element_begin() const
Definition: DerivedTypes.h:331
bool isPacked() const
Definition: DerivedTypes.h:278
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:342
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:329
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Type * getArrayElementType() const
Definition: Type.h:404
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:207
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
op_iterator op_begin()
Definition: User.h:234
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
op_iterator op_end()
Definition: User.h:236
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:785
iterator_range< user_iterator > users()
Definition: Value.h:421
static void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition: Value.cpp:217
void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition: Value.cpp:209
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void setEscapedAndAborted(Instruction *I=nullptr)
Mark the pointer as escaped, and the visit as aborted.
void setAborted(Instruction *I=nullptr)
Mark the visit as aborted.
Definition: PtrUseVisitor.h:84
APInt Offset
The constant offset of the use if that is known.
void enqueueUsers(Instruction &I)
Enqueue the users of this instruction in the visit worklist.
bool IsOffsetKnown
True if we have a known constant offset for the use currently being visited.
PtrInfo PI
The info collected about the pointer being visited thus far.
Use * U
The use currently being visited.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:227
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1522
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1787
SmallVector< DPValue * > getDPVAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:234
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1801
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1724
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
TinyPtrVector< DbgDeclareInst * > findDbgDeclares(Value *V)
Finds dbg.declare intrinsics declaring local variables as living in the memory that 'V' points to.
Definition: DebugInfo.cpp:47
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
auto successors(const MachineBasicBlock *BB)
void * PointerTy
Definition: GenericValue.h:21
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3331
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1356
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
gep_type_iterator gep_type_end(const User *GEP)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2068
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
TinyPtrVector< DPValue * > findDPVDeclares(Value *V)
As above, for DPVDeclares.
Definition: DebugInfo.cpp:66
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
void initializeSROALegacyPassPass(PassRegistry &)
bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
Definition: DebugInfo.cpp:2322
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:350
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition: SROA.cpp:5529
SROAOptions
Definition: SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NDEBUG
Definition: regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition: Metadata.h:814
AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Describes an element of a Bitfield.
Definition: Bitfields.h:223
Holds functions to get, set or test bitfields.
Definition: Bitfields.h:212
Holds the characteristics of one fragment of a larger variable.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:91
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254