LLVM 23.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/Metadata.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/PassManager.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
75#include "llvm/IR/ValueHandle.h"
77#include "llvm/Pass.h"
81#include "llvm/Support/Debug.h"
89#include <algorithm>
90#include <cassert>
91#include <cstddef>
92#include <cstdint>
93#include <cstring>
94#include <iterator>
95#include <queue>
96#include <string>
97#include <tuple>
98#include <utility>
99#include <variant>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "sroa"
105
106STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
114STATISTIC(NumLoadsPredicated,
115 "Number of loads rewritten into predicated loads to allow promotion");
117 NumStoresPredicated,
118 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumDeleted, "Number of instructions deleted");
120STATISTIC(NumVectorized, "Number of vectorized aggregates");
121
122namespace llvm {
123/// Disable running mem2reg during SROA in order to test or debug SROA.
124static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
125 cl::Hidden);
127} // namespace llvm
128
129namespace {
130
131class AllocaSliceRewriter;
132class AllocaSlices;
133class Partition;
134
135class SelectHandSpeculativity {
136 unsigned char Storage = 0; // None are speculatable by default.
137 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
138 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
139public:
140 SelectHandSpeculativity() = default;
141 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
142 bool isSpeculatable(bool isTrueVal) const;
143 bool areAllSpeculatable() const;
144 bool areAnySpeculatable() const;
145 bool areNoneSpeculatable() const;
146 // For interop as int half of PointerIntPair.
147 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149};
150static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
151
152using PossiblySpeculatableLoad =
154using UnspeculatableStore = StoreInst *;
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
157using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
158
159/// An optimization pass providing Scalar Replacement of Aggregates.
160///
161/// This pass takes allocations which can be completely analyzed (that is, they
162/// don't escape) and tries to turn them into scalar SSA values. There are
163/// a few steps to this process.
164///
165/// 1) It takes allocations of aggregates and analyzes the ways in which they
166/// are used to try to split them into smaller allocations, ideally of
167/// a single scalar data type. It will split up memcpy and memset accesses
168/// as necessary and try to isolate individual scalar accesses.
169/// 2) It will transform accesses into forms which are suitable for SSA value
170/// promotion. This can be replacing a memset with a scalar store of an
171/// integer value, or it can involve speculating operations on a PHI or
172/// select to be a PHI or select of the results.
173/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
174/// onto insert and extract operations on a vector value, and convert them to
175/// this form. By doing so, it will enable promotion of vector aggregates to
176/// SSA vector values.
177class SROA {
178 LLVMContext *const C;
179 DomTreeUpdater *const DTU;
180 AssumptionCache *const AC;
181 const bool PreserveCFG;
182
183 /// Worklist of alloca instructions to simplify.
184 ///
185 /// Each alloca in the function is added to this. Each new alloca formed gets
186 /// added to it as well to recursively simplify unless that alloca can be
187 /// directly promoted. Finally, each time we rewrite a use of an alloca other
188 /// the one being actively rewritten, we add it back onto the list if not
189 /// already present to ensure it is re-visited.
190 SmallSetVector<AllocaInst *, 16> Worklist;
191
192 /// A collection of instructions to delete.
193 /// We try to batch deletions to simplify code and make things a bit more
194 /// efficient. We also make sure there is no dangling pointers.
195 SmallVector<WeakVH, 8> DeadInsts;
196
197 /// Post-promotion worklist.
198 ///
199 /// Sometimes we discover an alloca which has a high probability of becoming
200 /// viable for SROA after a round of promotion takes place. In those cases,
201 /// the alloca is enqueued here for re-processing.
202 ///
203 /// Note that we have to be very careful to clear allocas out of this list in
204 /// the event they are deleted.
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
206
207 /// A collection of alloca instructions we can directly promote.
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
210 PromotableAllocas;
211
212 /// A worklist of PHIs to speculate prior to promoting allocas.
213 ///
214 /// All of these PHIs have been checked for the safety of speculation and by
215 /// being speculated will allow promoting allocas currently in the promotable
216 /// queue.
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
218
219 /// A worklist of select instructions to rewrite prior to promoting
220 /// allocas.
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
222
223 /// Select instructions that use an alloca and are subsequently loaded can be
224 /// rewritten to load both input pointers and then select between the result,
225 /// allowing the load of the alloca to be promoted.
226 /// From this:
227 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
228 /// %V = load <type>, ptr %P2
229 /// to:
230 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
231 /// %V2 = load <type>, ptr %Other
232 /// %V = select i1 %cond, <type> %V1, <type> %V2
233 ///
234 /// We can do this to a select if its only uses are loads
235 /// and if either the operand to the select can be loaded unconditionally,
236 /// or if we are allowed to perform CFG modifications.
237 /// If found an intervening bitcast with a single use of the load,
238 /// allow the promotion.
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
241
242public:
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 SROAOptions PreserveCFG_)
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
247
248 /// Main run method used by both the SROAPass and by the legacy pass.
249 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
250
251private:
252 friend class AllocaSliceRewriter;
253
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
262};
263
264} // end anonymous namespace
265
266/// Calculate the fragment of a variable to use when slicing a store
267/// based on the slice dimensions, existing fragment, and base storage
268/// fragment.
269/// Results:
270/// UseFrag - Use Target as the new fragment.
271/// UseNoFrag - The new slice already covers the whole variable.
272/// Skip - The new alloca slice doesn't include this variable.
273/// FIXME: Can we use calculateFragmentIntersect instead?
274namespace {
275enum FragCalcResult { UseFrag, UseNoFrag, Skip };
276}
277static FragCalcResult
279 uint64_t NewStorageSliceOffsetInBits,
280 uint64_t NewStorageSliceSizeInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
284 // If the base storage describes part of the variable apply the offset and
285 // the size constraint.
286 if (StorageFragment) {
287 Target.SizeInBits =
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
289 Target.OffsetInBits =
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
291 } else {
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
294 }
295
296 // If this slice extracts the entirety of an independent variable from a
297 // larger alloca, do not produce a fragment expression, as the variable is
298 // not fragmented.
299 if (!CurrentFragment) {
300 if (auto Size = Variable->getSizeInBits()) {
301 // Treat the current fragment as covering the whole variable.
302 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
303 if (Target == CurrentFragment)
304 return UseNoFrag;
305 }
306 }
307
308 // No additional work to do if there isn't a fragment already, or there is
309 // but it already exactly describes the new assignment.
310 if (!CurrentFragment || *CurrentFragment == Target)
311 return UseFrag;
312
313 // Reject the target fragment if it doesn't fit wholly within the current
314 // fragment. TODO: We could instead chop up the target to fit in the case of
315 // a partial overlap.
316 if (Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
318 return Skip;
319
320 // Target fits within the current fragment, return it.
321 return UseFrag;
322}
323
325 return DebugVariable(DVR->getVariable(), std::nullopt,
326 DVR->getDebugLoc().getInlinedAt());
327}
328
329/// Find linked dbg.assign and generate a new one with the correct
330/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
331/// value component is copied from the old dbg.assign to the new.
332/// \param OldAlloca Alloca for the variable before splitting.
333/// \param IsSplit True if the store (not necessarily alloca)
334/// is being split.
335/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
336/// \param SliceSizeInBits New number of bits being written to.
337/// \param OldInst Instruction that is being split.
338/// \param Inst New instruction performing this part of the
339/// split store.
340/// \param Dest Store destination.
341/// \param Value Stored value.
342/// \param DL Datalayout.
343static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
344 uint64_t OldAllocaOffsetInBits,
345 uint64_t SliceSizeInBits, Instruction *OldInst,
346 Instruction *Inst, Value *Dest, Value *Value,
347 const DataLayout &DL) {
348 // If we want allocas to be migrated using this helper then we need to ensure
349 // that the BaseFragments map code still works. A simple solution would be
350 // to choose to always clone alloca dbg_assigns (rather than sometimes
351 // "stealing" them).
352 assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
353
354 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
355 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
356 if (DVRAssignMarkerRange.empty())
357 return;
358
359 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
360 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
361 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
362 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
363 << "\n");
364 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
365 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
366 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
367 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
368 if (Value)
369 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
370
371 /// Map of aggregate variables to their fragment associated with OldAlloca.
373 BaseFragments;
374 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
375 BaseFragments[getAggregateVariable(DVR)] =
376 DVR->getExpression()->getFragmentInfo();
377
378 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
379 // one). It shouldn't already have one: assert this assumption.
380 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
381 DIAssignID *NewID = nullptr;
382 auto &Ctx = Inst->getContext();
383 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
384 assert(OldAlloca->isStaticAlloca());
385
386 auto MigrateDbgAssign = [&](DbgVariableRecord *DbgAssign) {
387 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
388 << "\n");
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation = false;
391
392 if (IsSplit) {
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
394 {
395 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
396 if (R == BaseFragments.end())
397 return;
398 BaseFragment = R->second;
399 }
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
402 DIExpression::FragmentInfo NewFragment;
403 FragCalcResult Result = calculateFragment(
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
406
407 if (Result == Skip)
408 return;
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
411 // Rewrite NewFragment to be relative to the existing one (this is
412 // what createFragmentExpression wants). CalculateFragment has
413 // already resolved the size for us. FIXME: Should it return the
414 // relative fragment too?
415 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
416 }
417 // Add the new fragment info to the existing expression if possible.
419 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
420 Expr = *E;
421 } else {
422 // Otherwise, add the new fragment info to an empty expression and
423 // discard the value component of this dbg.assign as the value cannot
424 // be computed with the new fragment.
426 DIExpression::get(Expr->getContext(), {}),
427 NewFragment.OffsetInBits, NewFragment.SizeInBits);
428 SetKillLocation = true;
429 }
430 }
431 }
432
433 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
434 if (!NewID) {
435 NewID = DIAssignID::getDistinct(Ctx);
436 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
437 }
438
439 DbgVariableRecord *NewAssign;
440 if (IsSplit) {
441 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
443 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
444 Dest, DIExpression::get(Expr->getContext(), {}),
445 DbgAssign->getDebugLoc())));
446 } else {
447 // The store is not split, simply steal the existing dbg_assign.
448 NewAssign = DbgAssign;
449 NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
450 NewAssign->setAddress(Dest);
451 if (Value)
452 NewAssign->replaceVariableLocationOp(0u, Value);
453 assert(Expr == NewAssign->getExpression());
454 }
455
456 // If we've updated the value but the original dbg.assign has an arglist
457 // then kill it now - we can't use the requested new value.
458 // We can't replace the DIArgList with the new value as it'd leave
459 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
460 // an arglist). And we can't keep the DIArgList in case the linked store
461 // is being split - in which case the DIArgList + expression may no longer
462 // be computing the correct value.
463 // This should be a very rare situation as it requires the value being
464 // stored to differ from the dbg.assign (i.e., the value has been
465 // represented differently in the debug intrinsic for some reason).
466 SetKillLocation |=
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
469 if (SetKillLocation)
470 NewAssign->setKillLocation();
471
472 // We could use more precision here at the cost of some additional (code)
473 // complexity - if the original dbg.assign was adjacent to its store, we
474 // could position this new dbg.assign adjacent to its store rather than the
475 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
476 // what we get now:
477 // split store !1
478 // split store !2
479 // dbg.assign !1
480 // dbg.assign !2
481 // This (current behaviour) results results in debug assignments being
482 // noted as slightly offset (in code) from the store. In practice this
483 // should have little effect on the debugging experience due to the fact
484 // that all the split stores should get the same line number.
485 if (NewAssign != DbgAssign) {
486 NewAssign->moveBefore(DbgAssign->getIterator());
487 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
488 }
489 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
490 };
491
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
493}
494
495namespace {
496
497/// A custom IRBuilder inserter which prefixes all names, but only in
498/// Assert builds.
499class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
500 std::string Prefix;
501
502 Twine getNameWithPrefix(const Twine &Name) const {
503 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
504 }
505
506public:
507 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
508
509 void InsertHelper(Instruction *I, const Twine &Name,
510 BasicBlock::iterator InsertPt) const override {
511 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name),
512 InsertPt);
513 }
514};
515
516/// Provide a type for IRBuilder that drops names in release builds.
518
519/// A used slice of an alloca.
520///
521/// This structure represents a slice of an alloca used by some instruction. It
522/// stores both the begin and end offsets of this use, a pointer to the use
523/// itself, and a flag indicating whether we can classify the use as splittable
524/// or not when forming partitions of the alloca.
525class Slice {
526 /// The beginning offset of the range.
527 uint64_t BeginOffset = 0;
528
529 /// The ending offset, not included in the range.
530 uint64_t EndOffset = 0;
531
532 /// Storage for both the use of this slice and whether it can be
533 /// split.
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
535
536public:
537 Slice() = default;
538
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable,
540 Value *ProtectedFieldDisc)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(U, IsSplittable),
543 ProtectedFieldDisc(ProtectedFieldDisc) {}
544
545 uint64_t beginOffset() const { return BeginOffset; }
546 uint64_t endOffset() const { return EndOffset; }
547
548 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
550
551 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
552
553 bool isDead() const { return getUse() == nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(nullptr); }
555
556 // When this access is via an llvm.protected.field.ptr intrinsic, contains
557 // the second argument to the intrinsic, the discriminator.
558 Value *ProtectedFieldDisc;
559
560 /// Support for ordering ranges.
561 ///
562 /// This provides an ordering over ranges such that start offsets are
563 /// always increasing, and within equal start offsets, the end offsets are
564 /// decreasing. Thus the spanning range comes first in a cluster with the
565 /// same start position.
566 bool operator<(const Slice &RHS) const {
567 if (beginOffset() < RHS.beginOffset())
568 return true;
569 if (beginOffset() > RHS.beginOffset())
570 return false;
571 if (isSplittable() != RHS.isSplittable())
572 return !isSplittable();
573 if (endOffset() > RHS.endOffset())
574 return true;
575 return false;
576 }
577
578 /// Support comparison with a single offset to allow binary searches.
579 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
580 return LHS.beginOffset() < RHSOffset;
581 }
582 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
583 return LHSOffset < RHS.beginOffset();
584 }
585
586 bool operator==(const Slice &RHS) const {
587 return isSplittable() == RHS.isSplittable() &&
588 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
589 }
590 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
591};
592
593/// Representation of the alloca slices.
594///
595/// This class represents the slices of an alloca which are formed by its
596/// various uses. If a pointer escapes, we can't fully build a representation
597/// for the slices used and we reflect that in this structure. The uses are
598/// stored, sorted by increasing beginning offset and with unsplittable slices
599/// starting at a particular offset before splittable slices.
600class AllocaSlices {
601public:
602 /// Construct the slices of a particular alloca.
603 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
604
605 /// Test whether a pointer to the allocation escapes our analysis.
606 ///
607 /// If this is true, the slices are never fully built and should be
608 /// ignored.
609 bool isEscaped() const { return PointerEscapingInstr; }
610 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
611
612 /// Support for iterating over the slices.
613 /// @{
614 using iterator = SmallVectorImpl<Slice>::iterator;
615 using range = iterator_range<iterator>;
616
617 iterator begin() { return Slices.begin(); }
618 iterator end() { return Slices.end(); }
619
620 using const_iterator = SmallVectorImpl<Slice>::const_iterator;
621 using const_range = iterator_range<const_iterator>;
622
623 const_iterator begin() const { return Slices.begin(); }
624 const_iterator end() const { return Slices.end(); }
625 /// @}
626
627 /// Erase a range of slices.
628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
629
630 /// Insert new slices for this alloca.
631 ///
632 /// This moves the slices into the alloca's slices collection, and re-sorts
633 /// everything so that the usual ordering properties of the alloca's slices
634 /// hold.
635 void insert(ArrayRef<Slice> NewSlices) {
636 int OldSize = Slices.size();
637 Slices.append(NewSlices.begin(), NewSlices.end());
638 auto SliceI = Slices.begin() + OldSize;
639 std::stable_sort(SliceI, Slices.end());
640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
641 }
642
643 // Forward declare the iterator and range accessor for walking the
644 // partitions.
645 class partition_iterator;
647
648 /// Access the dead users for this alloca.
649 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
650
651 /// Access the users for this alloca that are llvm.protected.field.ptr
652 /// intrinsics.
653 ArrayRef<IntrinsicInst *> getPFPUsers() const { return PFPUsers; }
654
655 /// Access Uses that should be dropped if the alloca is promotable.
656 ArrayRef<Use *> getDeadUsesIfPromotable() const {
657 return DeadUseIfPromotable;
658 }
659
660 /// Access the dead operands referring to this alloca.
661 ///
662 /// These are operands which have cannot actually be used to refer to the
663 /// alloca as they are outside its range and the user doesn't correct for
664 /// that. These mostly consist of PHI node inputs and the like which we just
665 /// need to replace with undef.
666 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
667
668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
670 void printSlice(raw_ostream &OS, const_iterator I,
671 StringRef Indent = " ") const;
672 void printUse(raw_ostream &OS, const_iterator I,
673 StringRef Indent = " ") const;
674 void print(raw_ostream &OS) const;
675 void dump(const_iterator I) const;
676 void dump() const;
677#endif
678
679private:
680 template <typename DerivedT, typename RetT = void> class BuilderBase;
681 class SliceBuilder;
682
683 friend class AllocaSlices::SliceBuilder;
684
685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686 /// Handle to alloca instruction to simplify method interfaces.
687 AllocaInst &AI;
688#endif
689
690 /// The instruction responsible for this alloca not having a known set
691 /// of slices.
692 ///
693 /// When an instruction (potentially) escapes the pointer to the alloca, we
694 /// store a pointer to that here and abort trying to form slices of the
695 /// alloca. This will be null if the alloca slices are analyzed successfully.
696 Instruction *PointerEscapingInstr;
697 Instruction *PointerEscapingInstrReadOnly;
698
699 /// The slices of the alloca.
700 ///
701 /// We store a vector of the slices formed by uses of the alloca here. This
702 /// vector is sorted by increasing begin offset, and then the unsplittable
703 /// slices before the splittable ones. See the Slice inner class for more
704 /// details.
706
707 /// Instructions which will become dead if we rewrite the alloca.
708 ///
709 /// Note that these are not separated by slice. This is because we expect an
710 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
711 /// all these instructions can simply be removed and replaced with poison as
712 /// they come from outside of the allocated space.
713 SmallVector<Instruction *, 8> DeadUsers;
714
715 /// Users that are llvm.protected.field.ptr intrinsics. These will be RAUW'd
716 /// to their first argument if we rewrite the alloca.
718
719 /// Uses which will become dead if can promote the alloca.
720 SmallVector<Use *, 8> DeadUseIfPromotable;
721
722 /// Operands which will become dead if we rewrite the alloca.
723 ///
724 /// These are operands that in their particular use can be replaced with
725 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
726 /// to PHI nodes and the like. They aren't entirely dead (there might be
727 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
728 /// want to swap this particular input for poison to simplify the use lists of
729 /// the alloca.
730 SmallVector<Use *, 8> DeadOperands;
731};
732
733/// A partition of the slices.
734///
735/// An ephemeral representation for a range of slices which can be viewed as
736/// a partition of the alloca. This range represents a span of the alloca's
737/// memory which cannot be split, and provides access to all of the slices
738/// overlapping some part of the partition.
739///
740/// Objects of this type are produced by traversing the alloca's slices, but
741/// are only ephemeral and not persistent.
742class Partition {
743private:
744 friend class AllocaSlices;
745 friend class AllocaSlices::partition_iterator;
746
747 using iterator = AllocaSlices::iterator;
748
749 /// The beginning and ending offsets of the alloca for this
750 /// partition.
751 uint64_t BeginOffset = 0, EndOffset = 0;
752
753 /// The start and end iterators of this partition.
754 iterator SI, SJ;
755
756 /// A collection of split slice tails overlapping the partition.
757 SmallVector<Slice *, 4> SplitTails;
758
759 /// Raw constructor builds an empty partition starting and ending at
760 /// the given iterator.
761 Partition(iterator SI) : SI(SI), SJ(SI) {}
762
763public:
764 /// The start offset of this partition.
765 ///
766 /// All of the contained slices start at or after this offset.
767 uint64_t beginOffset() const { return BeginOffset; }
768
769 /// The end offset of this partition.
770 ///
771 /// All of the contained slices end at or before this offset.
772 uint64_t endOffset() const { return EndOffset; }
773
774 /// The size of the partition.
775 ///
776 /// Note that this can never be zero.
777 uint64_t size() const {
778 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
779 return EndOffset - BeginOffset;
780 }
781
782 /// Test whether this partition contains no slices, and merely spans
783 /// a region occupied by split slices.
784 bool empty() const { return SI == SJ; }
785
786 /// \name Iterate slices that start within the partition.
787 /// These may be splittable or unsplittable. They have a begin offset >= the
788 /// partition begin offset.
789 /// @{
790 // FIXME: We should probably define a "concat_iterator" helper and use that
791 // to stitch together pointee_iterators over the split tails and the
792 // contiguous iterators of the partition. That would give a much nicer
793 // interface here. We could then additionally expose filtered iterators for
794 // split, unsplit, and unsplittable splices based on the usage patterns.
795 iterator begin() const { return SI; }
796 iterator end() const { return SJ; }
797 /// @}
798
799 /// Get the sequence of split slice tails.
800 ///
801 /// These tails are of slices which start before this partition but are
802 /// split and overlap into the partition. We accumulate these while forming
803 /// partitions.
804 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
805};
806
807} // end anonymous namespace
808
809/// An iterator over partitions of the alloca's slices.
810///
811/// This iterator implements the core algorithm for partitioning the alloca's
812/// slices. It is a forward iterator as we don't support backtracking for
813/// efficiency reasons, and re-use a single storage area to maintain the
814/// current set of split slices.
815///
816/// It is templated on the slice iterator type to use so that it can operate
817/// with either const or non-const slice iterators.
819 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
820 Partition> {
821 friend class AllocaSlices;
822
823 /// Most of the state for walking the partitions is held in a class
824 /// with a nice interface for examining them.
825 Partition P;
826
827 /// We need to keep the end of the slices to know when to stop.
828 AllocaSlices::iterator SE;
829
830 /// We also need to keep track of the maximum split end offset seen.
831 /// FIXME: Do we really?
832 uint64_t MaxSplitSliceEndOffset = 0;
833
834 /// Sets the partition to be empty at given iterator, and sets the
835 /// end iterator.
836 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
837 : P(SI), SE(SE) {
838 // If not already at the end, advance our state to form the initial
839 // partition.
840 if (SI != SE)
841 advance();
842 }
843
844 /// Advance the iterator to the next partition.
845 ///
846 /// Requires that the iterator not be at the end of the slices.
847 void advance() {
848 assert((P.SI != SE || !P.SplitTails.empty()) &&
849 "Cannot advance past the end of the slices!");
850
851 // Clear out any split uses which have ended.
852 if (!P.SplitTails.empty()) {
853 if (P.EndOffset >= MaxSplitSliceEndOffset) {
854 // If we've finished all splits, this is easy.
855 P.SplitTails.clear();
856 MaxSplitSliceEndOffset = 0;
857 } else {
858 // Remove the uses which have ended in the prior partition. This
859 // cannot change the max split slice end because we just checked that
860 // the prior partition ended prior to that max.
861 llvm::erase_if(P.SplitTails,
862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
863 assert(llvm::any_of(P.SplitTails,
864 [&](Slice *S) {
865 return S->endOffset() == MaxSplitSliceEndOffset;
866 }) &&
867 "Could not find the current max split slice offset!");
868 assert(llvm::all_of(P.SplitTails,
869 [&](Slice *S) {
870 return S->endOffset() <= MaxSplitSliceEndOffset;
871 }) &&
872 "Max split slice end offset is not actually the max!");
873 }
874 }
875
876 // If P.SI is already at the end, then we've cleared the split tail and
877 // now have an end iterator.
878 if (P.SI == SE) {
879 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
880 return;
881 }
882
883 // If we had a non-empty partition previously, set up the state for
884 // subsequent partitions.
885 if (P.SI != P.SJ) {
886 // Accumulate all the splittable slices which started in the old
887 // partition into the split list.
888 for (Slice &S : P)
889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
890 P.SplitTails.push_back(&S);
891 MaxSplitSliceEndOffset =
892 std::max(S.endOffset(), MaxSplitSliceEndOffset);
893 }
894
895 // Start from the end of the previous partition.
896 P.SI = P.SJ;
897
898 // If P.SI is now at the end, we at most have a tail of split slices.
899 if (P.SI == SE) {
900 P.BeginOffset = P.EndOffset;
901 P.EndOffset = MaxSplitSliceEndOffset;
902 return;
903 }
904
905 // If the we have split slices and the next slice is after a gap and is
906 // not splittable immediately form an empty partition for the split
907 // slices up until the next slice begins.
908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
909 !P.SI->isSplittable()) {
910 P.BeginOffset = P.EndOffset;
911 P.EndOffset = P.SI->beginOffset();
912 return;
913 }
914 }
915
916 // OK, we need to consume new slices. Set the end offset based on the
917 // current slice, and step SJ past it. The beginning offset of the
918 // partition is the beginning offset of the next slice unless we have
919 // pre-existing split slices that are continuing, in which case we begin
920 // at the prior end offset.
921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
922 P.EndOffset = P.SI->endOffset();
923 ++P.SJ;
924
925 // There are two strategies to form a partition based on whether the
926 // partition starts with an unsplittable slice or a splittable slice.
927 if (!P.SI->isSplittable()) {
928 // When we're forming an unsplittable region, it must always start at
929 // the first slice and will extend through its end.
930 assert(P.BeginOffset == P.SI->beginOffset());
931
932 // Form a partition including all of the overlapping slices with this
933 // unsplittable slice.
934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
935 if (!P.SJ->isSplittable())
936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
937 ++P.SJ;
938 }
939
940 // We have a partition across a set of overlapping unsplittable
941 // partitions.
942 return;
943 }
944
945 // If we're starting with a splittable slice, then we need to form
946 // a synthetic partition spanning it and any other overlapping splittable
947 // splices.
948 assert(P.SI->isSplittable() && "Forming a splittable partition!");
949
950 // Collect all of the overlapping splittable slices.
951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
952 P.SJ->isSplittable()) {
953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
954 ++P.SJ;
955 }
956
957 // Back upiP.EndOffset if we ended the span early when encountering an
958 // unsplittable slice. This synthesizes the early end offset of
959 // a partition spanning only splittable slices.
960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
961 assert(!P.SJ->isSplittable());
962 P.EndOffset = P.SJ->beginOffset();
963 }
964 }
965
966public:
967 bool operator==(const partition_iterator &RHS) const {
968 assert(SE == RHS.SE &&
969 "End iterators don't match between compared partition iterators!");
970
971 // The observed positions of partitions is marked by the P.SI iterator and
972 // the emptiness of the split slices. The latter is only relevant when
973 // P.SI == SE, as the end iterator will additionally have an empty split
974 // slices list, but the prior may have the same P.SI and a tail of split
975 // slices.
976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
977 assert(P.SJ == RHS.P.SJ &&
978 "Same set of slices formed two different sized partitions!");
979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
980 "Same slice position with differently sized non-empty split "
981 "slice tails!");
982 return true;
983 }
984 return false;
985 }
986
987 partition_iterator &operator++() {
988 advance();
989 return *this;
990 }
991
992 Partition &operator*() { return P; }
993};
994
995/// A forward range over the partitions of the alloca's slices.
996///
997/// This accesses an iterator range over the partitions of the alloca's
998/// slices. It computes these partitions on the fly based on the overlapping
999/// offsets of the slices and the ability to split them. It will visit "empty"
1000/// partitions to cover regions of the alloca only accessed via split
1001/// slices.
1002iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
1003 return make_range(partition_iterator(begin(), end()),
1004 partition_iterator(end(), end()));
1005}
1006
1008 // If the condition being selected on is a constant or the same value is
1009 // being selected between, fold the select. Yes this does (rarely) happen
1010 // early on.
1011 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
1012 return SI.getOperand(1 + CI->isZero());
1013 if (SI.getOperand(1) == SI.getOperand(2))
1014 return SI.getOperand(1);
1015
1016 return nullptr;
1017}
1018
1019/// A helper that folds a PHI node or a select.
1021 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1022 // If PN merges together the same value, return that value.
1023 return PN->hasConstantValue();
1024 }
1026}
1027
1028/// Builder for the alloca slices.
1029///
1030/// This class builds a set of alloca slices by recursively visiting the uses
1031/// of an alloca and making a slice for each load and store at each offset.
1032class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1033 friend class PtrUseVisitor<SliceBuilder>;
1034 friend class InstVisitor<SliceBuilder>;
1035
1036 using Base = PtrUseVisitor<SliceBuilder>;
1037
1038 const uint64_t AllocSize;
1039 AllocaSlices &AS;
1040
1041 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1043
1044 /// Set to de-duplicate dead instructions found in the use walk.
1045 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1046
1047 // When this access is via an llvm.protected.field.ptr intrinsic, contains
1048 // the second argument to the intrinsic, the discriminator.
1049 Value *ProtectedFieldDisc = nullptr;
1050
1051public:
1052 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1054 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1055 AS(AS) {}
1056
1057private:
1058 void markAsDead(Instruction &I) {
1059 if (VisitedDeadInsts.insert(&I).second)
1060 AS.DeadUsers.push_back(&I);
1061 }
1062
1063 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1064 bool IsSplittable = false) {
1065 // Completely skip uses which have a zero size or start either before or
1066 // past the end of the allocation.
1067 if (Size == 0 || Offset.uge(AllocSize)) {
1068 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1069 << Offset
1070 << " which has zero size or starts outside of the "
1071 << AllocSize << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 return markAsDead(I);
1075 }
1076
1077 uint64_t BeginOffset = Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset + Size;
1079
1080 // Clamp the end offset to the end of the allocation. Note that this is
1081 // formulated to handle even the case where "BeginOffset + Size" overflows.
1082 // This may appear superficially to be something we could ignore entirely,
1083 // but that is not so! There may be widened loads or PHI-node uses where
1084 // some instructions are dead but not others. We can't completely ignore
1085 // them, and so have to record at least the information here.
1086 assert(AllocSize >= BeginOffset); // Established above.
1087 if (Size > AllocSize - BeginOffset) {
1088 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1089 << Offset << " to remain within the " << AllocSize
1090 << " byte alloca:\n"
1091 << " alloca: " << AS.AI << "\n"
1092 << " use: " << I << "\n");
1093 EndOffset = AllocSize;
1094 }
1095
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1098 }
1099
1100 void visitBitCastInst(BitCastInst &BC) {
1101 if (BC.use_empty())
1102 return markAsDead(BC);
1103
1104 return Base::visitBitCastInst(BC);
1105 }
1106
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1108 if (ASC.use_empty())
1109 return markAsDead(ASC);
1110
1111 return Base::visitAddrSpaceCastInst(ASC);
1112 }
1113
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1115 if (GEPI.use_empty())
1116 return markAsDead(GEPI);
1117
1118 return Base::visitGetElementPtrInst(GEPI);
1119 }
1120
1121 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1122 uint64_t Size, bool IsVolatile) {
1123 // We allow splitting of non-volatile loads and stores where the type is an
1124 // integer type. These may be used to implement 'memcpy' or other "transfer
1125 // of bits" patterns.
1126 bool IsSplittable =
1127 Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
1128
1129 insertUse(I, Offset, Size, IsSplittable);
1130 }
1131
1132 void visitLoadInst(LoadInst &LI) {
1133 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1134 "All simple FCA loads should have been pre-split");
1135
1136 // If there is a load with an unknown offset, we can still perform store
1137 // to load forwarding for other known-offset loads.
1138 if (!IsOffsetKnown)
1139 return PI.setEscapedReadOnly(&LI);
1140
1141 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1142 if (Size.isScalable()) {
1143 unsigned VScale = LI.getFunction()->getVScaleValue();
1144 if (!VScale)
1145 return PI.setAborted(&LI);
1146
1147 Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
1148 }
1149
1150 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1151 LI.isVolatile());
1152 }
1153
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp = SI.getValueOperand();
1156 if (ValOp == *U)
1157 return PI.setEscapedAndAborted(&SI);
1158 if (!IsOffsetKnown)
1159 return PI.setAborted(&SI);
1160
1161 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1162 if (StoreSize.isScalable()) {
1163 unsigned VScale = SI.getFunction()->getVScaleValue();
1164 if (!VScale)
1165 return PI.setAborted(&SI);
1166
1167 StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
1168 }
1169
1170 uint64_t Size = StoreSize.getFixedValue();
1171
1172 // If this memory access can be shown to *statically* extend outside the
1173 // bounds of the allocation, it's behavior is undefined, so simply
1174 // ignore it. Note that this is more strict than the generic clamping
1175 // behavior of insertUse. We also try to handle cases which might run the
1176 // risk of overflow.
1177 // FIXME: We should instead consider the pointer to have escaped if this
1178 // function is being instrumented for addressing bugs or race conditions.
1179 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1180 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1181 << Offset << " which extends past the end of the "
1182 << AllocSize << " byte alloca:\n"
1183 << " alloca: " << AS.AI << "\n"
1184 << " use: " << SI << "\n");
1185 return markAsDead(SI);
1186 }
1187
1188 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1189 "All simple FCA stores should have been pre-split");
1190 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1191 }
1192
1193 void visitMemSetInst(MemSetInst &II) {
1194 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1195 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1196 if ((Length && Length->getValue() == 0) ||
1197 (IsOffsetKnown && Offset.uge(AllocSize)))
1198 // Zero-length mem transfer intrinsics can be ignored entirely.
1199 return markAsDead(II);
1200
1201 if (!IsOffsetKnown)
1202 return PI.setAborted(&II);
1203
1204 insertUse(II, Offset,
1205 Length ? Length->getLimitedValue()
1206 : AllocSize - Offset.getLimitedValue(),
1207 (bool)Length);
1208 }
1209
1210 void visitMemTransferInst(MemTransferInst &II) {
1211 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1212 if (Length && Length->getValue() == 0)
1213 // Zero-length mem transfer intrinsics can be ignored entirely.
1214 return markAsDead(II);
1215
1216 // Because we can visit these intrinsics twice, also check to see if the
1217 // first time marked this instruction as dead. If so, skip it.
1218 if (VisitedDeadInsts.count(&II))
1219 return;
1220
1221 if (!IsOffsetKnown)
1222 return PI.setAborted(&II);
1223
1224 // This side of the transfer is completely out-of-bounds, and so we can
1225 // nuke the entire transfer. However, we also need to nuke the other side
1226 // if already added to our partitions.
1227 // FIXME: Yet another place we really should bypass this when
1228 // instrumenting for ASan.
1229 if (Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.find(&II);
1232 if (MTPI != MemTransferSliceMap.end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(II);
1235 }
1236
1237 uint64_t RawOffset = Offset.getLimitedValue();
1238 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1239
1240 // Check for the special case where the same exact value is used for both
1241 // source and dest.
1242 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1243 // For non-volatile transfers this is a no-op.
1244 if (!II.isVolatile())
1245 return markAsDead(II);
1246
1247 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1248 }
1249
1250 // If we have seen both source and destination for a mem transfer, then
1251 // they both point to the same alloca.
1252 bool Inserted;
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1257 if (!Inserted) {
1258 Slice &PrevP = AS.Slices[PrevIdx];
1259
1260 // Check if the begin offsets match and this is a non-volatile transfer.
1261 // In that case, we can completely elide the transfer.
1262 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1263 PrevP.kill();
1264 return markAsDead(II);
1265 }
1266
1267 // Otherwise we have an offset transfer within the same alloca. We can't
1268 // split those.
1269 PrevP.makeUnsplittable();
1270 }
1271
1272 // Insert the use now that we've fixed up the splittable nature.
1273 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1274
1275 // Check that we ended up with a valid index in the map.
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1277 "Map index doesn't point back to a slice with this user.");
1278 }
1279
1280 // Disable SRoA for any intrinsics except for lifetime invariants.
1281 // FIXME: What about debug intrinsics? This matches old behavior, but
1282 // doesn't make sense.
1283 void visitIntrinsicInst(IntrinsicInst &II) {
1284 if (II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1286 return;
1287 }
1288
1289 if (!IsOffsetKnown)
1290 return PI.setAborted(&II);
1291
1292 if (II.isLifetimeStartOrEnd()) {
1293 insertUse(II, Offset, AllocSize, true);
1294 return;
1295 }
1296
1297 if (II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1298 // We only handle loads and stores as users of llvm.protected.field.ptr.
1299 // Other uses may add items to the worklist, which will cause
1300 // ProtectedFieldDisc to be tracked incorrectly.
1301 AS.PFPUsers.push_back(&II);
1302 ProtectedFieldDisc = II.getArgOperand(1);
1303 for (Use &U : II.uses()) {
1304 this->U = &U;
1305 if (auto *LI = dyn_cast<LoadInst>(U.getUser()))
1306 visitLoadInst(*LI);
1307 else if (auto *SI = dyn_cast<StoreInst>(U.getUser()))
1308 visitStoreInst(*SI);
1309 else
1310 PI.setAborted(&II);
1311 if (PI.isAborted())
1312 break;
1313 }
1314 ProtectedFieldDisc = nullptr;
1315 return;
1316 }
1317
1318 Base::visitIntrinsicInst(II);
1319 }
1320
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1322 // We consider any PHI or select that results in a direct load or store of
1323 // the same offset to be a viable use for slicing purposes. These uses
1324 // are considered unsplittable and the size is the maximum loaded or stored
1325 // size.
1326 SmallPtrSet<Instruction *, 4> Visited;
1328 Visited.insert(Root);
1329 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1330 const DataLayout &DL = Root->getDataLayout();
1331 // If there are no loads or stores, the access is dead. We mark that as
1332 // a size zero access.
1333 Size = 0;
1334 do {
1335 Instruction *I, *UsedI;
1336 std::tie(UsedI, I) = Uses.pop_back_val();
1337
1338 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1339 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1340 if (LoadSize.isScalable()) {
1341 PI.setAborted(LI);
1342 return nullptr;
1343 }
1344 Size = std::max(Size, LoadSize.getFixedValue());
1345 continue;
1346 }
1347 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1348 Value *Op = SI->getOperand(0);
1349 if (Op == UsedI)
1350 return SI;
1351 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1352 if (StoreSize.isScalable()) {
1353 PI.setAborted(SI);
1354 return nullptr;
1355 }
1356 Size = std::max(Size, StoreSize.getFixedValue());
1357 continue;
1358 }
1359
1360 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1361 if (!GEP->hasAllZeroIndices())
1362 return GEP;
1363 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1365 return I;
1366 }
1367
1368 for (User *U : I->users())
1369 if (Visited.insert(cast<Instruction>(U)).second)
1370 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1371 } while (!Uses.empty());
1372
1373 return nullptr;
1374 }
1375
1376 void visitPHINodeOrSelectInst(Instruction &I) {
1378 if (I.use_empty())
1379 return markAsDead(I);
1380
1381 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1382 // instructions in this BB, which may be required during rewriting. Bail out
1383 // on these cases.
1384 if (isa<PHINode>(I) && !I.getParent()->hasInsertionPt())
1385 return PI.setAborted(&I);
1386
1387 // TODO: We could use simplifyInstruction here to fold PHINodes and
1388 // SelectInsts. However, doing so requires to change the current
1389 // dead-operand-tracking mechanism. For instance, suppose neither loading
1390 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1391 // trap either. However, if we simply replace %U with undef using the
1392 // current dead-operand-tracking mechanism, "load (select undef, undef,
1393 // %other)" may trap because the select may return the first operand
1394 // "undef".
1395 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1396 if (Result == *U)
1397 // If the result of the constant fold will be the pointer, recurse
1398 // through the PHI/select as if we had RAUW'ed it.
1399 enqueueUsers(I);
1400 else
1401 // Otherwise the operand to the PHI/select is dead, and we can replace
1402 // it with poison.
1403 AS.DeadOperands.push_back(U);
1404
1405 return;
1406 }
1407
1408 if (!IsOffsetKnown)
1409 return PI.setAborted(&I);
1410
1411 // See if we already have computed info on this node.
1412 uint64_t &Size = PHIOrSelectSizes[&I];
1413 if (!Size) {
1414 // This is a new PHI/Select, check for an unsafe use of it.
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1416 return PI.setAborted(UnsafeI);
1417 }
1418
1419 // For PHI and select operands outside the alloca, we can't nuke the entire
1420 // phi or select -- the other side might still be relevant, so we special
1421 // case them here and use a separate structure to track the operands
1422 // themselves which should be replaced with poison.
1423 // FIXME: This should instead be escaped in the event we're instrumenting
1424 // for address sanitization.
1425 if (Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1427 return;
1428 }
1429
1430 insertUse(I, Offset, Size);
1431 }
1432
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1434
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1436
1437 /// Disable SROA entirely if there are unhandled users of the alloca.
1438 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1439
1440 void visitCallBase(CallBase &CB) {
1441 // If the call operand is read-only and only does a read-only or address
1442 // capture, then we mark it as EscapedReadOnly.
1443 if (CB.isDataOperand(U) &&
1444 !capturesFullProvenance(CB.getCaptureInfo(U->getOperandNo())) &&
1445 CB.onlyReadsMemory(U->getOperandNo())) {
1446 PI.setEscapedReadOnly(&CB);
1447 return;
1448 }
1449
1450 Base::visitCallBase(CB);
1451 }
1452};
1453
1454AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1455 :
1456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1457 AI(AI),
1458#endif
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1460 SliceBuilder PB(DL, AI, *this);
1461 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1463 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1464 // possibly by just storing the PtrInfo in the AllocaSlices.
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr && "Did not track a bad instruction");
1468 return;
1469 }
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1471
1472 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1473
1474 // Sort the uses. This arranges for the offsets to be in ascending order,
1475 // and the sizes to be in descending order.
1476 llvm::stable_sort(Slices);
1477}
1478
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1480
1481void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1482 StringRef Indent) const {
1483 printSlice(OS, I, Indent);
1484 OS << "\n";
1485 printUse(OS, I, Indent);
1486}
1487
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1489 StringRef Indent) const {
1490 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1491 << " slice #" << (I - begin())
1492 << (I->isSplittable() ? " (splittable)" : "");
1493}
1494
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1496 StringRef Indent) const {
1497 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1498}
1499
1500void AllocaSlices::print(raw_ostream &OS) const {
1501 if (PointerEscapingInstr) {
1502 OS << "Can't analyze slices for alloca: " << AI << "\n"
1503 << " A pointer to this alloca escaped by:\n"
1504 << " " << *PointerEscapingInstr << "\n";
1505 return;
1506 }
1507
1508 if (PointerEscapingInstrReadOnly)
1509 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1510
1511 OS << "Slices of alloca: " << AI << "\n";
1512 for (const_iterator I = begin(), E = end(); I != E; ++I)
1513 print(OS, I);
1514}
1515
1516LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1517 print(dbgs(), I);
1518}
1519LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1520
1521#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522
1523/// Walk the range of a partitioning looking for a common type to cover this
1524/// sequence of slices.
1525static std::pair<Type *, IntegerType *>
1526findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1527 uint64_t EndOffset) {
1528 Type *Ty = nullptr;
1529 bool TyIsCommon = true;
1530 IntegerType *ITy = nullptr;
1531
1532 // Note that we need to look at *every* alloca slice's Use to ensure we
1533 // always get consistent results regardless of the order of slices.
1534 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1535 Use *U = I->getUse();
1536 if (isa<IntrinsicInst>(*U->getUser()))
1537 continue;
1538 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1539 continue;
1540
1541 Type *UserTy = nullptr;
1542 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1543 UserTy = LI->getType();
1544 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1545 UserTy = SI->getValueOperand()->getType();
1546 }
1547
1548 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1549 // If the type is larger than the partition, skip it. We only encounter
1550 // this for split integer operations where we want to use the type of the
1551 // entity causing the split. Also skip if the type is not a byte width
1552 // multiple.
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1555 continue;
1556
1557 // Track the largest bitwidth integer type used in this way in case there
1558 // is no common type.
1559 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1560 ITy = UserITy;
1561 }
1562
1563 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1564 // depend on types skipped above.
1565 if (!UserTy || (Ty && Ty != UserTy))
1566 TyIsCommon = false; // Give up on anything but an iN type.
1567 else
1568 Ty = UserTy;
1569 }
1570
1571 return {TyIsCommon ? Ty : nullptr, ITy};
1572}
1573
1574/// PHI instructions that use an alloca and are subsequently loaded can be
1575/// rewritten to load both input pointers in the pred blocks and then PHI the
1576/// results, allowing the load of the alloca to be promoted.
1577/// From this:
1578/// %P2 = phi [i32* %Alloca, i32* %Other]
1579/// %V = load i32* %P2
1580/// to:
1581/// %V1 = load i32* %Alloca -> will be mem2reg'd
1582/// ...
1583/// %V2 = load i32* %Other
1584/// ...
1585/// %V = phi [i32 %V1, i32 %V2]
1586///
1587/// We can do this to a select if its only uses are loads and if the operands
1588/// to the select can be loaded unconditionally.
1589///
1590/// FIXME: This should be hoisted into a generic utility, likely in
1591/// Transforms/Util/Local.h
1593 const DataLayout &DL = PN.getDataLayout();
1594
1595 // For now, we can only do this promotion if the load is in the same block
1596 // as the PHI, and if there are no stores between the phi and load.
1597 // TODO: Allow recursive phi users.
1598 // TODO: Allow stores.
1599 BasicBlock *BB = PN.getParent();
1600 Align MaxAlign;
1601 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1602 Type *LoadType = nullptr;
1603 for (User *U : PN.users()) {
1605 if (!LI || !LI->isSimple())
1606 return false;
1607
1608 // For now we only allow loads in the same block as the PHI. This is
1609 // a common case that happens when instcombine merges two loads through
1610 // a PHI.
1611 if (LI->getParent() != BB)
1612 return false;
1613
1614 if (LoadType) {
1615 if (LoadType != LI->getType())
1616 return false;
1617 } else {
1618 LoadType = LI->getType();
1619 }
1620
1621 // Ensure that there are no instructions between the PHI and the load that
1622 // could store.
1623 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1624 if (BBI->mayWriteToMemory())
1625 return false;
1626
1627 MaxAlign = std::max(MaxAlign, LI->getAlign());
1628 }
1629
1630 if (!LoadType)
1631 return false;
1632
1633 APInt LoadSize =
1634 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1635
1636 // We can only transform this if it is safe to push the loads into the
1637 // predecessor blocks. The only thing to watch out for is that we can't put
1638 // a possibly trapping load in the predecessor if it is a critical edge.
1639 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1641 Value *InVal = PN.getIncomingValue(Idx);
1642
1643 // If the value is produced by the terminator of the predecessor (an
1644 // invoke) or it has side-effects, there is no valid place to put a load
1645 // in the predecessor.
1646 if (TI == InVal || TI->mayHaveSideEffects())
1647 return false;
1648
1649 // If the predecessor has a single successor, then the edge isn't
1650 // critical.
1651 if (TI->getNumSuccessors() == 1)
1652 continue;
1653
1654 // If this pointer is always safe to load, or if we can prove that there
1655 // is already a load in the block, then we can move the load to the pred
1656 // block.
1657 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1658 continue;
1659
1660 return false;
1661 }
1662
1663 return true;
1664}
1665
1666static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1667 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1668
1669 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1670 Type *LoadTy = SomeLoad->getType();
1671 IRB.SetInsertPoint(&PN);
1672 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1673 PN.getName() + ".sroa.speculated");
1674
1675 // Get the AA tags and alignment to use from one of the loads. It does not
1676 // matter which one we get and if any differ.
1677 AAMDNodes AATags = SomeLoad->getAAMetadata();
1678 Align Alignment = SomeLoad->getAlign();
1679
1680 // Rewrite all loads of the PN to use the new PHI.
1681 while (!PN.use_empty()) {
1682 LoadInst *LI = cast<LoadInst>(PN.user_back());
1683 LI->replaceAllUsesWith(NewPN);
1684 LI->eraseFromParent();
1685 }
1686
1687 // Inject loads into all of the pred blocks.
1688 DenseMap<BasicBlock *, Value *> InjectedLoads;
1689 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1690 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1691 Value *InVal = PN.getIncomingValue(Idx);
1692
1693 // A PHI node is allowed to have multiple (duplicated) entries for the same
1694 // basic block, as long as the value is the same. So if we already injected
1695 // a load in the predecessor, then we should reuse the same load for all
1696 // duplicated entries.
1697 if (Value *V = InjectedLoads.lookup(Pred)) {
1698 NewPN->addIncoming(V, Pred);
1699 continue;
1700 }
1701
1702 Instruction *TI = Pred->getTerminator();
1703 IRB.SetInsertPoint(TI);
1704
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1709 if (AATags)
1710 Load->setAAMetadata(AATags);
1711 NewPN->addIncoming(Load, Pred);
1712 InjectedLoads[Pred] = Load;
1713 }
1714
1715 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1716 PN.eraseFromParent();
1717}
1718
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1721 if (isTrueVal)
1723 else
1725 return *this;
1726}
1727
1728bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1729 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1730 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1731}
1732
1733bool SelectHandSpeculativity::areAllSpeculatable() const {
1734 return isSpeculatable(/*isTrueVal=*/true) &&
1735 isSpeculatable(/*isTrueVal=*/false);
1736}
1737
1738bool SelectHandSpeculativity::areAnySpeculatable() const {
1739 return isSpeculatable(/*isTrueVal=*/true) ||
1740 isSpeculatable(/*isTrueVal=*/false);
1741}
1742bool SelectHandSpeculativity::areNoneSpeculatable() const {
1743 return !areAnySpeculatable();
1744}
1745
1746static SelectHandSpeculativity
1748 assert(LI.isSimple() && "Only for simple loads");
1749 SelectHandSpeculativity Spec;
1750
1751 const DataLayout &DL = SI.getDataLayout();
1752 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1754 &LI))
1755 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1756 else if (PreserveCFG)
1757 return Spec;
1758
1759 return Spec;
1760}
1761
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1764 RewriteableMemOps Ops;
1765
1766 for (User *U : SI.users()) {
1767 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1768 U = *BC->user_begin();
1769
1770 if (auto *Store = dyn_cast<StoreInst>(U)) {
1771 // Note that atomic stores can be transformed; atomic semantics do not
1772 // have any meaning for a local alloca. Stores are not speculatable,
1773 // however, so if we can't turn it into a predicated store, we are done.
1774 if (Store->isVolatile() || PreserveCFG)
1775 return {}; // Give up on this `select`.
1776 Ops.emplace_back(Store);
1777 continue;
1778 }
1779
1780 auto *LI = dyn_cast<LoadInst>(U);
1781
1782 // Note that atomic loads can be transformed;
1783 // atomic semantics do not have any meaning for a local alloca.
1784 if (!LI || LI->isVolatile())
1785 return {}; // Give up on this `select`.
1786
1787 PossiblySpeculatableLoad Load(LI);
1788 if (!LI->isSimple()) {
1789 // If the `load` is not simple, we can't speculatively execute it,
1790 // but we could handle this via a CFG modification. But can we?
1791 if (PreserveCFG)
1792 return {}; // Give up on this `select`.
1793 Ops.emplace_back(Load);
1794 continue;
1795 }
1796
1797 SelectHandSpeculativity Spec =
1799 if (PreserveCFG && !Spec.areAllSpeculatable())
1800 return {}; // Give up on this `select`.
1801
1802 Load.setInt(Spec);
1803 Ops.emplace_back(Load);
1804 }
1805
1806 return Ops;
1807}
1808
1810 IRBuilderTy &IRB) {
1811 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1812
1813 Value *TV = SI.getTrueValue();
1814 Value *FV = SI.getFalseValue();
1815 // Replace the given load of the select with a select of two loads.
1816
1817 assert(LI.isSimple() && "We only speculate simple loads");
1818
1819 IRB.SetInsertPoint(&LI);
1820
1821 LoadInst *TL =
1822 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1823 LI.getName() + ".sroa.speculate.load.true");
1824 LoadInst *FL =
1825 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1826 LI.getName() + ".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1828
1829 // Transfer alignment and AA info if present.
1830 TL->setAlignment(LI.getAlign());
1831 FL->setAlignment(LI.getAlign());
1832
1833 AAMDNodes Tags = LI.getAAMetadata();
1834 if (Tags) {
1835 TL->setAAMetadata(Tags);
1836 FL->setAAMetadata(Tags);
1837 }
1838
1839 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1840 LI.getName() + ".sroa.speculated",
1841 ProfcheckDisableMetadataFixes ? nullptr : &SI);
1842
1843 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1844 LI.replaceAllUsesWith(V);
1845}
1846
1847template <typename T>
1849 SelectHandSpeculativity Spec,
1850 DomTreeUpdater &DTU) {
1851 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1852 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1853 BasicBlock *Head = I.getParent();
1854 Instruction *ThenTerm = nullptr;
1855 Instruction *ElseTerm = nullptr;
1856 if (Spec.areNoneSpeculatable())
1857 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1859 else {
1860 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1862 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1863 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1864 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1865 }
1866 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1867 Spec = {}; // Do not use `Spec` beyond this point.
1868 BasicBlock *Tail = I.getParent();
1869 Tail->setName(Head->getName() + ".cont");
1870 PHINode *PN;
1871 if (isa<LoadInst>(I))
1872 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1873 for (BasicBlock *SuccBB : successors(Head)) {
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1877 auto &CondMemOp = cast<T>(*I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1880 if (isa<LoadInst>(I))
1881 ++NumLoadsPredicated;
1882 else
1883 ++NumStoresPredicated;
1884 } else {
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1887 }
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr = SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1891 if (isa<LoadInst>(I)) {
1892 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1893 PN->addIncoming(&CondMemOp, NewMemOpBB);
1894 } else
1895 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1896 }
1897 if (isa<LoadInst>(I)) {
1898 PN->takeName(&I);
1899 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1900 I.replaceAllUsesWith(PN);
1901 }
1902}
1903
1905 SelectHandSpeculativity Spec,
1906 DomTreeUpdater &DTU) {
1907 if (auto *LI = dyn_cast<LoadInst>(&I))
1908 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1909 else if (auto *SI = dyn_cast<StoreInst>(&I))
1910 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1911 else
1912 llvm_unreachable_internal("Only for load and store.");
1913}
1914
1916 const RewriteableMemOps &Ops,
1917 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1918 bool CFGChanged = false;
1919 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1920
1921 for (const RewriteableMemOp &Op : Ops) {
1922 SelectHandSpeculativity Spec;
1923 Instruction *I;
1924 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1925 I = *US;
1926 } else {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1930 }
1931 if (Spec.areAllSpeculatable()) {
1933 } else {
1934 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1935 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1936 CFGChanged = true;
1937 }
1938 I->eraseFromParent();
1939 }
1940
1941 for (User *U : make_early_inc_range(SI.users()))
1942 cast<BitCastInst>(U)->eraseFromParent();
1943 SI.eraseFromParent();
1944 return CFGChanged;
1945}
1946
1947/// Compute an adjusted pointer from Ptr by Offset bytes where the
1948/// resulting pointer has PointerTy.
1949static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1951 const Twine &NamePrefix) {
1952 if (Offset != 0)
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1954 NamePrefix + "sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1956 NamePrefix + "sroa_cast");
1957}
1958
1959/// Compute the adjusted alignment for a load or store from an offset.
1963
1964/// Test whether we can convert a value from the old to the new type.
1965///
1966/// This predicate should be used to guard calls to convertValue in order to
1967/// ensure that we only try to convert viable values. The strategy is that we
1968/// will peel off single element struct and array wrappings to get to an
1969/// underlying value, and convert that value.
1970static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
1971 unsigned VScale = 0) {
1972 if (OldTy == NewTy)
1973 return true;
1974
1975 // For integer types, we can't handle any bit-width differences. This would
1976 // break both vector conversions with extension and introduce endianness
1977 // issues when in conjunction with loads and stores.
1978 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1980 cast<IntegerType>(NewTy)->getBitWidth() &&
1981 "We can't have the same bitwidth for different int types");
1982 return false;
1983 }
1984
1985 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1987
1988 if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
1989 (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
1990 // Conversion is only possible when the size of scalable vectors is known.
1991 if (!VScale)
1992 return false;
1993
1994 // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
1995 // a single domain (either fixed or scalable). Any additional conversion
1996 // between fixed and scalable types is handled through integer types.
1997 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1998 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1999
2000 if (isa<ScalableVectorType>(NewTy)) {
2002 return false;
2003
2004 NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
2005 } else {
2007 return false;
2008
2009 OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
2010 }
2011 }
2012
2013 if (NewSize != OldSize)
2014 return false;
2015 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
2016 return false;
2017
2018 // We can convert pointers to integers and vice-versa. Same for vectors
2019 // of pointers and integers.
2020 OldTy = OldTy->getScalarType();
2021 NewTy = NewTy->getScalarType();
2022 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
2023 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
2024 unsigned OldAS = OldTy->getPointerAddressSpace();
2025 unsigned NewAS = NewTy->getPointerAddressSpace();
2026 // Convert pointers if they are pointers from the same address space or
2027 // different integral (not non-integral) address spaces with the same
2028 // pointer size.
2029 return OldAS == NewAS ||
2030 (!DL.isNonIntegralAddressSpace(OldAS) &&
2031 !DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2033 }
2034
2035 // We can convert integers to integral pointers, but not to non-integral
2036 // pointers.
2037 if (OldTy->isIntegerTy())
2038 return !DL.isNonIntegralPointerType(NewTy);
2039
2040 // We can convert integral pointers to integers, but non-integral pointers
2041 // need to remain pointers.
2042 if (!DL.isNonIntegralPointerType(OldTy))
2043 return NewTy->isIntegerTy();
2044
2045 return false;
2046 }
2047
2048 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
2049 return false;
2050
2051 return true;
2052}
2053
2054/// Generic routine to convert an SSA value to a value of a different
2055/// type.
2056///
2057/// This will try various different casting techniques, such as bitcasts,
2058/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
2059/// two types for viability with this routine.
2060static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2061 Type *NewTy) {
2062 Type *OldTy = V->getType();
2063
2064#ifndef NDEBUG
2065 BasicBlock *BB = IRB.GetInsertBlock();
2066 assert(BB && BB->getParent() && "VScale unknown!");
2067 unsigned VScale = BB->getParent()->getVScaleValue();
2068 assert(canConvertValue(DL, OldTy, NewTy, VScale) &&
2069 "Value not convertable to type");
2070#endif
2071
2072 if (OldTy == NewTy)
2073 return V;
2074
2075 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2076 "Integer types must be the exact same to convert.");
2077
2078 // A variant of bitcast that supports a mixture of fixed and scalable types
2079 // that are know to have the same size.
2080 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2081 Type *InTy = In->getType();
2082 if (InTy == Ty)
2083 return In;
2084
2086 // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> -->
2087 // <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16>
2089 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2090 PoisonValue::get(VTy), In,
2091 IRB.getInt64(0)),
2092 Ty);
2093 }
2094
2096 // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> -->
2097 // <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32>
2099 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2100 IRB.getInt64(0));
2101 }
2102
2103 return IRB.CreateBitCast(In, Ty);
2104 };
2105
2106 // See if we need inttoptr for this type pair. May require additional bitcast.
2107 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2108 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2109 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2110 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2111 // Directly handle i64 to i8*
2112 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2113 NewTy);
2114 }
2115
2116 // See if we need ptrtoint for this type pair. May require additional bitcast.
2117 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2118 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2119 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2120 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2121 // Expand i8* to i64 --> i8* to i64 to i64
2122 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2123 NewTy);
2124 }
2125
2126 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2127 unsigned OldAS = OldTy->getPointerAddressSpace();
2128 unsigned NewAS = NewTy->getPointerAddressSpace();
2129 // To convert pointers with different address spaces (they are already
2130 // checked convertible, i.e. they have the same pointer size), so far we
2131 // cannot use `bitcast` (which has restrict on the same address space) or
2132 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2133 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2134 // size.
2135 if (OldAS != NewAS) {
2136 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2137 return IRB.CreateIntToPtr(
2138 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2139 DL.getIntPtrType(NewTy)),
2140 NewTy);
2141 }
2142 }
2143
2144 return CreateBitCastLike(V, NewTy);
2145}
2146
2147/// Test whether the given slice use can be promoted to a vector.
2148///
2149/// This function is called to test each entry in a partition which is slated
2150/// for a single slice.
2151static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2152 VectorType *Ty,
2153 uint64_t ElementSize,
2154 const DataLayout &DL,
2155 unsigned VScale) {
2156 // First validate the slice offsets.
2157 uint64_t BeginOffset =
2158 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2159 uint64_t BeginIndex = BeginOffset / ElementSize;
2160 if (BeginIndex * ElementSize != BeginOffset ||
2161 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2162 return false;
2163 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2164 uint64_t EndIndex = EndOffset / ElementSize;
2165 if (EndIndex * ElementSize != EndOffset ||
2166 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2167 return false;
2168
2169 assert(EndIndex > BeginIndex && "Empty vector!");
2170 uint64_t NumElements = EndIndex - BeginIndex;
2171 Type *SliceTy = (NumElements == 1)
2172 ? Ty->getElementType()
2173 : FixedVectorType::get(Ty->getElementType(), NumElements);
2174
2175 Type *SplitIntTy =
2176 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2177
2178 Use *U = S.getUse();
2179
2180 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2181 if (MI->isVolatile())
2182 return false;
2183 if (!S.isSplittable())
2184 return false; // Skip any unsplittable intrinsics.
2185 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2186 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2187 return false;
2188 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2189 if (LI->isVolatile())
2190 return false;
2191 Type *LTy = LI->getType();
2192 // Disable vector promotion when there are loads or stores of an FCA.
2193 if (LTy->isStructTy())
2194 return false;
2195 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2196 assert(LTy->isIntegerTy());
2197 LTy = SplitIntTy;
2198 }
2199 if (!canConvertValue(DL, SliceTy, LTy, VScale))
2200 return false;
2201 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2202 if (SI->isVolatile())
2203 return false;
2204 Type *STy = SI->getValueOperand()->getType();
2205 // Disable vector promotion when there are loads or stores of an FCA.
2206 if (STy->isStructTy())
2207 return false;
2208 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2209 assert(STy->isIntegerTy());
2210 STy = SplitIntTy;
2211 }
2212 if (!canConvertValue(DL, STy, SliceTy, VScale))
2213 return false;
2214 } else {
2215 return false;
2216 }
2217
2218 return true;
2219}
2220
2221/// Test whether any vector type in \p CandidateTys is viable for promotion.
2222///
2223/// This implements the necessary checking for \c isVectorPromotionViable over
2224/// all slices of the alloca for the given VectorType.
2225static VectorType *
2227 SmallVectorImpl<VectorType *> &CandidateTys,
2228 bool HaveCommonEltTy, Type *CommonEltTy,
2229 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2230 VectorType *CommonVecPtrTy, unsigned VScale) {
2231 // If we didn't find a vector type, nothing to do here.
2232 if (CandidateTys.empty())
2233 return nullptr;
2234
2235 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2236 // then we should choose it, not some other alternative.
2237 // But, we can't perform a no-op pointer address space change via bitcast,
2238 // so if we didn't have a common pointer element type, bail.
2239 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2240 return nullptr;
2241
2242 // Try to pick the "best" element type out of the choices.
2243 if (!HaveCommonEltTy && HaveVecPtrTy) {
2244 // If there was a pointer element type, there's really only one choice.
2245 CandidateTys.clear();
2246 CandidateTys.push_back(CommonVecPtrTy);
2247 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2248 // Integer-ify vector types.
2249 for (VectorType *&VTy : CandidateTys) {
2250 if (!VTy->getElementType()->isIntegerTy())
2251 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2252 VTy->getContext(), VTy->getScalarSizeInBits())));
2253 }
2254
2255 // Rank the remaining candidate vector types. This is easy because we know
2256 // they're all integer vectors. We sort by ascending number of elements.
2257 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2258 (void)DL;
2259 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261 "Cannot have vector types of different sizes!");
2262 assert(RHSTy->getElementType()->isIntegerTy() &&
2263 "All non-integer types eliminated!");
2264 assert(LHSTy->getElementType()->isIntegerTy() &&
2265 "All non-integer types eliminated!");
2266 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2267 cast<FixedVectorType>(LHSTy)->getNumElements();
2268 };
2269 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2270 (void)DL;
2271 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2272 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2273 "Cannot have vector types of different sizes!");
2274 assert(RHSTy->getElementType()->isIntegerTy() &&
2275 "All non-integer types eliminated!");
2276 assert(LHSTy->getElementType()->isIntegerTy() &&
2277 "All non-integer types eliminated!");
2278 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2279 cast<FixedVectorType>(LHSTy)->getNumElements();
2280 };
2281 llvm::sort(CandidateTys, RankVectorTypesComp);
2282 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2283 CandidateTys.end());
2284 } else {
2285// The only way to have the same element type in every vector type is to
2286// have the same vector type. Check that and remove all but one.
2287#ifndef NDEBUG
2288 for (VectorType *VTy : CandidateTys) {
2289 assert(VTy->getElementType() == CommonEltTy &&
2290 "Unaccounted for element type!");
2291 assert(VTy == CandidateTys[0] &&
2292 "Different vector types with the same element type!");
2293 }
2294#endif
2295 CandidateTys.resize(1);
2296 }
2297
2298 // FIXME: hack. Do we have a named constant for this?
2299 // SDAG SDNode can't have more than 65535 operands.
2300 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2301 return cast<FixedVectorType>(VTy)->getNumElements() >
2302 std::numeric_limits<unsigned short>::max();
2303 });
2304
2305 // Find a vector type viable for promotion by iterating over all slices.
2306 auto *VTy = llvm::find_if(CandidateTys, [&](VectorType *VTy) -> bool {
2307 uint64_t ElementSize =
2308 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2309
2310 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2311 // that aren't byte sized.
2312 if (ElementSize % 8)
2313 return false;
2314 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2315 "vector size not a multiple of element size?");
2316 ElementSize /= 8;
2317
2318 for (const Slice &S : P)
2319 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
2320 return false;
2321
2322 for (const Slice *S : P.splitSliceTails())
2323 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
2324 return false;
2325
2326 return true;
2327 });
2328 return VTy != CandidateTys.end() ? *VTy : nullptr;
2329}
2330
2332 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2333 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2334 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2335 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2336 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2337 [[maybe_unused]] VectorType *OriginalElt =
2338 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2339 // Consider additional vector types where the element type size is a
2340 // multiple of load/store element size.
2341 for (Type *Ty : OtherTys) {
2343 continue;
2344 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2345 // Make a copy of CandidateTys and iterate through it, because we
2346 // might append to CandidateTys in the loop.
2347 for (VectorType *const VTy : CandidateTysCopy) {
2348 // The elements in the copy should remain invariant throughout the loop
2349 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2350 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2351 unsigned ElementSize =
2352 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2353 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2354 VectorSize % TypeSize == 0) {
2355 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2356 CheckCandidateType(NewVTy);
2357 }
2358 }
2359 }
2360
2362 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2363 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2364}
2365
2366/// Test whether the given alloca partitioning and range of slices can be
2367/// promoted to a vector.
2368///
2369/// This is a quick test to check whether we can rewrite a particular alloca
2370/// partition (and its newly formed alloca) into a vector alloca with only
2371/// whole-vector loads and stores such that it could be promoted to a vector
2372/// SSA value. We only can ensure this for a limited set of operations, and we
2373/// don't want to do the rewrites unless we are confident that the result will
2374/// be promotable, so we have an early test here.
2376 unsigned VScale) {
2377 // Collect the candidate types for vector-based promotion. Also track whether
2378 // we have different element types.
2379 SmallVector<VectorType *, 4> CandidateTys;
2380 SetVector<Type *> LoadStoreTys;
2381 SetVector<Type *> DeferredTys;
2382 Type *CommonEltTy = nullptr;
2383 VectorType *CommonVecPtrTy = nullptr;
2384 bool HaveVecPtrTy = false;
2385 bool HaveCommonEltTy = true;
2386 bool HaveCommonVecPtrTy = true;
2387 auto CheckCandidateType = [&](Type *Ty) {
2388 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2389 // Return if bitcast to vectors is different for total size in bits.
2390 if (!CandidateTys.empty()) {
2391 VectorType *V = CandidateTys[0];
2392 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2393 DL.getTypeSizeInBits(V).getFixedValue()) {
2394 CandidateTys.clear();
2395 return;
2396 }
2397 }
2398 CandidateTys.push_back(VTy);
2399 Type *EltTy = VTy->getElementType();
2400
2401 if (!CommonEltTy)
2402 CommonEltTy = EltTy;
2403 else if (CommonEltTy != EltTy)
2404 HaveCommonEltTy = false;
2405
2406 if (EltTy->isPointerTy()) {
2407 HaveVecPtrTy = true;
2408 if (!CommonVecPtrTy)
2409 CommonVecPtrTy = VTy;
2410 else if (CommonVecPtrTy != VTy)
2411 HaveCommonVecPtrTy = false;
2412 }
2413 }
2414 };
2415
2416 // Put load and store types into a set for de-duplication.
2417 for (const Slice &S : P) {
2418 Type *Ty;
2419 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2420 Ty = LI->getType();
2421 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2422 Ty = SI->getValueOperand()->getType();
2423 else
2424 continue;
2425
2426 auto CandTy = Ty->getScalarType();
2427 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2428 S.endOffset() != P.endOffset())) {
2429 DeferredTys.insert(Ty);
2430 continue;
2431 }
2432
2433 LoadStoreTys.insert(Ty);
2434 // Consider any loads or stores that are the exact size of the slice.
2435 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2436 CheckCandidateType(Ty);
2437 }
2438
2439 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2441 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2442 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2443 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2444 return VTy;
2445
2446 CandidateTys.clear();
2448 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2449 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2450 CommonVecPtrTy, VScale);
2451}
2452
2453/// Test whether a slice of an alloca is valid for integer widening.
2454///
2455/// This implements the necessary checking for the \c isIntegerWideningViable
2456/// test below on a single slice of the alloca.
2457static bool isIntegerWideningViableForSlice(const Slice &S,
2458 uint64_t AllocBeginOffset,
2459 Type *AllocaTy,
2460 const DataLayout &DL,
2461 bool &WholeAllocaOp) {
2462 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2463
2464 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2465 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2466
2467 Use *U = S.getUse();
2468
2469 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2470 // larger than other load/store slices (RelEnd > Size). But lifetime are
2471 // always promotable and should not impact other slices' promotability of the
2472 // partition.
2473 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2474 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2475 return true;
2476 }
2477
2478 // We can't reasonably handle cases where the load or store extends past
2479 // the end of the alloca's type and into its padding.
2480 if (RelEnd > Size)
2481 return false;
2482
2483 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2484 if (LI->isVolatile())
2485 return false;
2486 // We can't handle loads that extend past the allocated memory.
2487 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
2488 if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
2489 return false;
2490 // So far, AllocaSliceRewriter does not support widening split slice tails
2491 // in rewriteIntegerLoad.
2492 if (S.beginOffset() < AllocBeginOffset)
2493 return false;
2494 // Note that we don't count vector loads or stores as whole-alloca
2495 // operations which enable integer widening because we would prefer to use
2496 // vector widening instead.
2497 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2498 WholeAllocaOp = true;
2499 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2500 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2501 return false;
2502 } else if (RelBegin != 0 || RelEnd != Size ||
2503 !canConvertValue(DL, AllocaTy, LI->getType())) {
2504 // Non-integer loads need to be convertible from the alloca type so that
2505 // they are promotable.
2506 return false;
2507 }
2508 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2509 Type *ValueTy = SI->getValueOperand()->getType();
2510 if (SI->isVolatile())
2511 return false;
2512 // We can't handle stores that extend past the allocated memory.
2513 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2514 if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
2515 return false;
2516 // So far, AllocaSliceRewriter does not support widening split slice tails
2517 // in rewriteIntegerStore.
2518 if (S.beginOffset() < AllocBeginOffset)
2519 return false;
2520 // Note that we don't count vector loads or stores as whole-alloca
2521 // operations which enable integer widening because we would prefer to use
2522 // vector widening instead.
2523 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2524 WholeAllocaOp = true;
2525 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2526 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2527 return false;
2528 } else if (RelBegin != 0 || RelEnd != Size ||
2529 !canConvertValue(DL, ValueTy, AllocaTy)) {
2530 // Non-integer stores need to be convertible to the alloca type so that
2531 // they are promotable.
2532 return false;
2533 }
2534 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2535 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2536 return false;
2537 if (!S.isSplittable())
2538 return false; // Skip any unsplittable intrinsics.
2539 } else {
2540 return false;
2541 }
2542
2543 return true;
2544}
2545
2546/// Test whether the given alloca partition's integer operations can be
2547/// widened to promotable ones.
2548///
2549/// This is a quick test to check whether we can rewrite the integer loads and
2550/// stores to a particular alloca into wider loads and stores and be able to
2551/// promote the resulting alloca.
2552static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2553 const DataLayout &DL) {
2554 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2555 // Don't create integer types larger than the maximum bitwidth.
2556 if (SizeInBits > IntegerType::MAX_INT_BITS)
2557 return false;
2558
2559 // Don't try to handle allocas with bit-padding.
2560 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2561 return false;
2562
2563 // We need to ensure that an integer type with the appropriate bitwidth can
2564 // be converted to the alloca type, whatever that is. We don't want to force
2565 // the alloca itself to have an integer type if there is a more suitable one.
2566 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2567 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2568 !canConvertValue(DL, IntTy, AllocaTy))
2569 return false;
2570
2571 // While examining uses, we ensure that the alloca has a covering load or
2572 // store. We don't want to widen the integer operations only to fail to
2573 // promote due to some other unsplittable entry (which we may make splittable
2574 // later). However, if there are only splittable uses, go ahead and assume
2575 // that we cover the alloca.
2576 // FIXME: We shouldn't consider split slices that happen to start in the
2577 // partition here...
2578 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2579
2580 for (const Slice &S : P)
2581 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2582 WholeAllocaOp))
2583 return false;
2584
2585 for (const Slice *S : P.splitSliceTails())
2586 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2587 WholeAllocaOp))
2588 return false;
2589
2590 return WholeAllocaOp;
2591}
2592
2593static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2595 const Twine &Name) {
2596 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2597 IntegerType *IntTy = cast<IntegerType>(V->getType());
2598 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2599 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2600 "Element extends past full value");
2601 uint64_t ShAmt = 8 * Offset;
2602 if (DL.isBigEndian())
2603 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2604 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2605 if (ShAmt) {
2606 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2607 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2608 }
2609 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2610 "Cannot extract to a larger integer!");
2611 if (Ty != IntTy) {
2612 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2613 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2614 }
2615 return V;
2616}
2617
2618static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2619 Value *V, uint64_t Offset, const Twine &Name) {
2620 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2621 IntegerType *Ty = cast<IntegerType>(V->getType());
2622 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2623 "Cannot insert a larger integer!");
2624 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2625 if (Ty != IntTy) {
2626 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2627 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2628 }
2629 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2630 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2631 "Element store outside of alloca store");
2632 uint64_t ShAmt = 8 * Offset;
2633 if (DL.isBigEndian())
2634 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2635 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2636 if (ShAmt) {
2637 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2638 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2639 }
2640
2641 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2642 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2643 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2644 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2645 V = IRB.CreateOr(Old, V, Name + ".insert");
2646 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2647 }
2648 return V;
2649}
2650
2651static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2652 unsigned EndIndex, const Twine &Name) {
2653 auto *VecTy = cast<FixedVectorType>(V->getType());
2654 unsigned NumElements = EndIndex - BeginIndex;
2655 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2656
2657 if (NumElements == VecTy->getNumElements())
2658 return V;
2659
2660 if (NumElements == 1) {
2661 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2662 Name + ".extract");
2663 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2664 return V;
2665 }
2666
2667 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2668 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2669 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2670 return V;
2671}
2672
2673static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2674 unsigned BeginIndex, const Twine &Name) {
2675 VectorType *VecTy = cast<VectorType>(Old->getType());
2676 assert(VecTy && "Can only insert a vector into a vector");
2677
2678 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2679 if (!Ty) {
2680 // Single element to insert.
2681 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2682 Name + ".insert");
2683 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2684 return V;
2685 }
2686
2687 unsigned NumSubElements = cast<FixedVectorType>(Ty)->getNumElements();
2688 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
2689
2690 assert(NumSubElements <= NumElements && "Too many elements!");
2691 if (NumSubElements == NumElements) {
2692 assert(V->getType() == VecTy && "Vector type mismatch");
2693 return V;
2694 }
2695 unsigned EndIndex = BeginIndex + NumSubElements;
2696
2697 // When inserting a smaller vector into the larger to store, we first
2698 // use a shuffle vector to widen it with undef elements, and then
2699 // a second shuffle vector to select between the loaded vector and the
2700 // incoming vector.
2702 Mask.reserve(NumElements);
2703 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2704 if (Idx >= BeginIndex && Idx < EndIndex)
2705 Mask.push_back(Idx - BeginIndex);
2706 else
2707 Mask.push_back(-1);
2708 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2709 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2710
2711 Mask.clear();
2712 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2713 if (Idx >= BeginIndex && Idx < EndIndex)
2714 Mask.push_back(Idx);
2715 else
2716 Mask.push_back(Idx + NumElements);
2717 V = IRB.CreateShuffleVector(V, Old, Mask, Name + "blend");
2718 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2719 return V;
2720}
2721
2722/// This function takes two vector values and combines them into a single vector
2723/// by concatenating their elements. The function handles:
2724///
2725/// 1. Element type mismatch: If either vector's element type differs from
2726/// NewAIEltType, the function bitcasts the vector to use NewAIEltType while
2727/// preserving the total bit width (adjusting the number of elements
2728/// accordingly).
2729///
2730/// 2. Size mismatch: After transforming the vectors to have the desired element
2731/// type, if the two vectors have different numbers of elements, the smaller
2732/// vector is extended with poison values to match the size of the larger
2733/// vector before concatenation.
2734///
2735/// 3. Concatenation: The vectors are merged using a shuffle operation that
2736/// places all elements of V0 first, followed by all elements of V1.
2737///
2738/// \param V0 The first vector to merge (must be a vector type)
2739/// \param V1 The second vector to merge (must be a vector type)
2740/// \param DL The data layout for size calculations
2741/// \param NewAIEltTy The desired element type for the result vector
2742/// \param Builder IRBuilder for creating new instructions
2743/// \return A new vector containing all elements from V0 followed by all
2744/// elements from V1
2746 Type *NewAIEltTy, IRBuilder<> &Builder) {
2747 // V0 and V1 are vectors
2748 // Create a new vector type with combined elements
2749 // Use ShuffleVector to concatenate the vectors
2750 auto *VecType0 = cast<FixedVectorType>(V0->getType());
2751 auto *VecType1 = cast<FixedVectorType>(V1->getType());
2752
2753 // If V0/V1 element types are different from NewAllocaElementType,
2754 // we need to introduce bitcasts before merging them
2755 auto BitcastIfNeeded = [&](Value *&V, FixedVectorType *&VecType,
2756 const char *DebugName) {
2757 Type *EltType = VecType->getElementType();
2758 if (EltType != NewAIEltTy) {
2759 // Calculate new number of elements to maintain same bit width
2760 unsigned TotalBits =
2761 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2762 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2763
2764 auto *NewVecType = FixedVectorType::get(NewAIEltTy, NewNumElts);
2765 V = Builder.CreateBitCast(V, NewVecType);
2766 VecType = NewVecType;
2767 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2768 }
2769 };
2770
2771 BitcastIfNeeded(V0, VecType0, "V0");
2772 BitcastIfNeeded(V1, VecType1, "V1");
2773
2774 unsigned NumElts0 = VecType0->getNumElements();
2775 unsigned NumElts1 = VecType1->getNumElements();
2776
2777 SmallVector<int, 16> ShuffleMask;
2778
2779 if (NumElts0 == NumElts1) {
2780 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2781 ShuffleMask.push_back(i);
2782 } else {
2783 // If two vectors have different sizes, we need to extend
2784 // the smaller vector to the size of the larger vector.
2785 unsigned SmallSize = std::min(NumElts0, NumElts1);
2786 unsigned LargeSize = std::max(NumElts0, NumElts1);
2787 bool IsV0Smaller = NumElts0 < NumElts1;
2788 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2789 SmallVector<int, 16> ExtendMask;
2790 for (unsigned i = 0; i < SmallSize; ++i)
2791 ExtendMask.push_back(i);
2792 for (unsigned i = SmallSize; i < LargeSize; ++i)
2793 ExtendMask.push_back(PoisonMaskElem);
2794 ExtendedVec = Builder.CreateShuffleVector(
2795 ExtendedVec, PoisonValue::get(ExtendedVec->getType()), ExtendMask);
2796 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2797 for (unsigned i = 0; i < NumElts0; ++i)
2798 ShuffleMask.push_back(i);
2799 for (unsigned i = 0; i < NumElts1; ++i)
2800 ShuffleMask.push_back(LargeSize + i);
2801 }
2802
2803 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2804}
2805
2806namespace {
2807
2808/// Visitor to rewrite instructions using p particular slice of an alloca
2809/// to use a new alloca.
2810///
2811/// Also implements the rewriting to vector-based accesses when the partition
2812/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2813/// lives here.
2814class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2815 // Befriend the base class so it can delegate to private visit methods.
2816 friend class InstVisitor<AllocaSliceRewriter, bool>;
2817
2818 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2819
2820 const DataLayout &DL;
2821 AllocaSlices &AS;
2822 SROA &Pass;
2823 AllocaInst &OldAI, &NewAI;
2824 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2825 Type *NewAllocaTy;
2826
2827 // This is a convenience and flag variable that will be null unless the new
2828 // alloca's integer operations should be widened to this integer type due to
2829 // passing isIntegerWideningViable above. If it is non-null, the desired
2830 // integer type will be stored here for easy access during rewriting.
2831 IntegerType *IntTy;
2832
2833 // If we are rewriting an alloca partition which can be written as pure
2834 // vector operations, we stash extra information here. When VecTy is
2835 // non-null, we have some strict guarantees about the rewritten alloca:
2836 // - The new alloca is exactly the size of the vector type here.
2837 // - The accesses all either map to the entire vector or to a single
2838 // element.
2839 // - The set of accessing instructions is only one of those handled above
2840 // in isVectorPromotionViable. Generally these are the same access kinds
2841 // which are promotable via mem2reg.
2842 VectorType *VecTy;
2843 Type *ElementTy;
2844 uint64_t ElementSize;
2845
2846 // The original offset of the slice currently being rewritten relative to
2847 // the original alloca.
2848 uint64_t BeginOffset = 0;
2849 uint64_t EndOffset = 0;
2850
2851 // The new offsets of the slice currently being rewritten relative to the
2852 // original alloca.
2853 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2854
2855 uint64_t SliceSize = 0;
2856 bool IsSplittable = false;
2857 bool IsSplit = false;
2858 Use *OldUse = nullptr;
2859 Instruction *OldPtr = nullptr;
2860
2861 // Track post-rewrite users which are PHI nodes and Selects.
2862 SmallSetVector<PHINode *, 8> &PHIUsers;
2863 SmallSetVector<SelectInst *, 8> &SelectUsers;
2864
2865 // Utility IR builder, whose name prefix is setup for each visited use, and
2866 // the insertion point is set to point to the user.
2867 IRBuilderTy IRB;
2868
2869 // Return the new alloca, addrspacecasted if required to avoid changing the
2870 // addrspace of a volatile access.
2871 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2872 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2873 return &NewAI;
2874
2875 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2876 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2877 }
2878
2879public:
2880 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2881 AllocaInst &OldAI, AllocaInst &NewAI,
2882 uint64_t NewAllocaBeginOffset,
2883 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2884 VectorType *PromotableVecTy,
2885 SmallSetVector<PHINode *, 8> &PHIUsers,
2886 SmallSetVector<SelectInst *, 8> &SelectUsers)
2887 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2888 NewAllocaBeginOffset(NewAllocaBeginOffset),
2889 NewAllocaEndOffset(NewAllocaEndOffset),
2890 NewAllocaTy(NewAI.getAllocatedType()),
2891 IntTy(
2892 IsIntegerPromotable
2893 ? Type::getIntNTy(NewAI.getContext(),
2894 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2895 .getFixedValue())
2896 : nullptr),
2897 VecTy(PromotableVecTy),
2898 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2899 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2900 : 0),
2901 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2902 IRB(NewAI.getContext(), ConstantFolder()) {
2903 if (VecTy) {
2904 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2905 "Only multiple-of-8 sized vector elements are viable");
2906 ++NumVectorized;
2907 }
2908 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2909 }
2910
2911 bool visit(AllocaSlices::const_iterator I) {
2912 bool CanSROA = true;
2913 BeginOffset = I->beginOffset();
2914 EndOffset = I->endOffset();
2915 IsSplittable = I->isSplittable();
2916 IsSplit =
2917 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2918 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2919 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2920 LLVM_DEBUG(dbgs() << "\n");
2921
2922 // Compute the intersecting offset range.
2923 assert(BeginOffset < NewAllocaEndOffset);
2924 assert(EndOffset > NewAllocaBeginOffset);
2925 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2926 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2927
2928 SliceSize = NewEndOffset - NewBeginOffset;
2929 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2930 << ") NewBegin:(" << NewBeginOffset << ", "
2931 << NewEndOffset << ") NewAllocaBegin:("
2932 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2933 << ")\n");
2934 assert(IsSplit || NewBeginOffset == BeginOffset);
2935 OldUse = I->getUse();
2936 OldPtr = cast<Instruction>(OldUse->get());
2937
2938 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2939 IRB.SetInsertPoint(OldUserI);
2940 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2941 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2942 Twine(BeginOffset) + ".");
2943
2944 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2945 if (VecTy || IntTy)
2946 assert(CanSROA);
2947 return CanSROA;
2948 }
2949
2950 /// Attempts to rewrite a partition using tree-structured merge optimization.
2951 ///
2952 /// This function analyzes a partition to determine if it can be optimized
2953 /// using a tree-structured merge pattern, where multiple non-overlapping
2954 /// stores completely fill an alloca. And there is no load from the alloca in
2955 /// the middle of the stores. Such patterns can be optimized by eliminating
2956 /// the intermediate stores and directly constructing the final vector by
2957 /// using shufflevectors.
2958 ///
2959 /// Example transformation:
2960 /// Before: (stores do not have to be in order)
2961 /// %alloca = alloca <8 x float>
2962 /// store <2 x float> %val0, ptr %alloca ; offset 0-1
2963 /// store <2 x float> %val2, ptr %alloca+16 ; offset 4-5
2964 /// store <2 x float> %val1, ptr %alloca+8 ; offset 2-3
2965 /// store <2 x float> %val3, ptr %alloca+24 ; offset 6-7
2966 ///
2967 /// After:
2968 /// %alloca = alloca <8 x float>
2969 /// %shuffle0 = shufflevector %val0, %val1, <4 x i32> <i32 0, i32 1, i32 2,
2970 /// i32 3>
2971 /// %shuffle1 = shufflevector %val2, %val3, <4 x i32> <i32 0, i32 1, i32 2,
2972 /// i32 3>
2973 /// %shuffle2 = shufflevector %shuffle0, %shuffle1, <8 x i32> <i32 0, i32 1,
2974 /// i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2975 /// store %shuffle2, ptr %alloca
2976 ///
2977 /// The optimization looks for partitions that:
2978 /// 1. Have no overlapping split slice tails
2979 /// 2. Contain non-overlapping stores that cover the entire alloca
2980 /// 3. Have exactly one load that reads the complete alloca structure and not
2981 /// in the middle of the stores (TODO: maybe we can relax the constraint
2982 /// about reading the entire alloca structure)
2983 ///
2984 /// \param P The partition to analyze and potentially rewrite
2985 /// \return An optional vector of values that were deleted during the rewrite
2986 /// process, or std::nullopt if the partition cannot be optimized
2987 /// using tree-structured merge
2988 std::optional<SmallVector<Value *, 4>>
2989 rewriteTreeStructuredMerge(Partition &P) {
2990 // No tail slices that overlap with the partition
2991 if (P.splitSliceTails().size() > 0)
2992 return std::nullopt;
2993
2994 SmallVector<Value *, 4> DeletedValues;
2995 LoadInst *TheLoad = nullptr;
2996
2997 // Structure to hold store information
2998 struct StoreInfo {
2999 StoreInst *Store;
3000 uint64_t BeginOffset;
3001 uint64_t EndOffset;
3002 Value *StoredValue;
3003 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
3004 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3005 };
3006
3007 SmallVector<StoreInfo, 4> StoreInfos;
3008
3009 // If the new alloca is a fixed vector type, we use its element type as the
3010 // allocated element type, otherwise we use i8 as the allocated element
3011 Type *AllocatedEltTy =
3013 ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
3014 : Type::getInt8Ty(NewAI.getContext());
3015 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
3016
3017 // Helper to check if a type is
3018 // 1. A fixed vector type
3019 // 2. The element type is not a pointer
3020 // 3. The element type size is byte-aligned
3021 // We only handle the cases that the ld/st meet these conditions
3022 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
3023 auto *FixedVecTy = dyn_cast<FixedVectorType>(Ty);
3024 return FixedVecTy &&
3025 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3026 !FixedVecTy->getElementType()->isPointerTy();
3027 };
3028
3029 for (Slice &S : P) {
3030 auto *User = cast<Instruction>(S.getUse()->getUser());
3031 if (auto *LI = dyn_cast<LoadInst>(User)) {
3032 // Do not handle the case if
3033 // 1. There is more than one load
3034 // 2. The load is volatile
3035 // 3. The load does not read the entire alloca structure
3036 // 4. The load does not meet the conditions in the helper function
3037 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
3038 S.beginOffset() != NewAllocaBeginOffset ||
3039 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
3040 return std::nullopt;
3041 TheLoad = LI;
3042 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
3043 // Do not handle the case if
3044 // 1. The store does not meet the conditions in the helper function
3045 // 2. The store is volatile
3046 // 3. The total store size is not a multiple of the allocated element
3047 // type size
3048 if (!IsTypeValidForTreeStructuredMerge(
3049 SI->getValueOperand()->getType()) ||
3050 SI->isVolatile())
3051 return std::nullopt;
3052 auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
3053 unsigned NumElts = VecTy->getNumElements();
3054 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
3055 if (NumElts * EltSize % AllocatedEltTySize != 0)
3056 return std::nullopt;
3057 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
3058 SI->getValueOperand());
3059 } else {
3060 // If we have instructions other than load and store, we cannot do the
3061 // tree structured merge
3062 return std::nullopt;
3063 }
3064 }
3065 // If we do not have any load, we cannot do the tree structured merge
3066 if (!TheLoad)
3067 return std::nullopt;
3068
3069 // If we do not have multiple stores, we cannot do the tree structured merge
3070 if (StoreInfos.size() < 2)
3071 return std::nullopt;
3072
3073 // Stores should not overlap and should cover the whole alloca
3074 // Sort by begin offset
3075 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
3076 return A.BeginOffset < B.BeginOffset;
3077 });
3078
3079 // Check for overlaps and coverage
3080 uint64_t ExpectedStart = NewAllocaBeginOffset;
3081 for (auto &StoreInfo : StoreInfos) {
3082 uint64_t BeginOff = StoreInfo.BeginOffset;
3083 uint64_t EndOff = StoreInfo.EndOffset;
3084
3085 // Check for gap or overlap
3086 if (BeginOff != ExpectedStart)
3087 return std::nullopt;
3088
3089 ExpectedStart = EndOff;
3090 }
3091 // Check that stores cover the entire alloca
3092 if (ExpectedStart != NewAllocaEndOffset)
3093 return std::nullopt;
3094
3095 // Stores should be in the same basic block
3096 // The load should not be in the middle of the stores
3097 // Note:
3098 // If the load is in a different basic block with the stores, we can still
3099 // do the tree structured merge. This is because we do not have the
3100 // store->load forwarding here. The merged vector will be stored back to
3101 // NewAI and the new load will load from NewAI. The forwarding will be
3102 // handled later when we try to promote NewAI.
3103 BasicBlock *LoadBB = TheLoad->getParent();
3104 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3105
3106 for (auto &StoreInfo : StoreInfos) {
3107 if (StoreInfo.Store->getParent() != StoreBB)
3108 return std::nullopt;
3109 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3110 return std::nullopt;
3111 }
3112
3113 // If we reach here, the partition can be merged with a tree structured
3114 // merge
3115 LLVM_DEBUG({
3116 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
3117 << "\n Ordered stores:\n";
3118 for (auto [i, Info] : enumerate(StoreInfos))
3119 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
3120 << Info.EndOffset << ") \tStore: " << *Info.Store
3121 << "\tValue: " << *Info.StoredValue << "\n";
3122 });
3123
3124 // Instead of having these stores, we merge all the stored values into a
3125 // vector and store the merged value into the alloca
3126 std::queue<Value *> VecElements;
3127 IRBuilder<> Builder(StoreInfos.back().Store);
3128 for (const auto &Info : StoreInfos) {
3129 DeletedValues.push_back(Info.Store);
3130 VecElements.push(Info.StoredValue);
3131 }
3132
3133 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3134 while (VecElements.size() > 1) {
3135 const auto NumElts = VecElements.size();
3136 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3137 Value *V0 = VecElements.front();
3138 VecElements.pop();
3139 Value *V1 = VecElements.front();
3140 VecElements.pop();
3141 Value *Merged = mergeTwoVectors(V0, V1, DL, AllocatedEltTy, Builder);
3142 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3143 VecElements.push(Merged);
3144 }
3145 if (NumElts % 2 == 1) {
3146 Value *V = VecElements.front();
3147 VecElements.pop();
3148 VecElements.push(V);
3149 }
3150 }
3151
3152 // Store the merged value into the alloca
3153 Value *MergedValue = VecElements.front();
3154 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3155
3156 IRBuilder<> LoadBuilder(TheLoad);
3157 TheLoad->replaceAllUsesWith(LoadBuilder.CreateAlignedLoad(
3158 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3159 TheLoad->getName() + ".sroa.new.load"));
3160 DeletedValues.push_back(TheLoad);
3161
3162 return DeletedValues;
3163 }
3164
3165private:
3166 // Make sure the other visit overloads are visible.
3167 using Base::visit;
3168
3169 // Every instruction which can end up as a user must have a rewrite rule.
3170 bool visitInstruction(Instruction &I) {
3171 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3172 llvm_unreachable("No rewrite rule for this instruction!");
3173 }
3174
3175 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
3176 // Note that the offset computation can use BeginOffset or NewBeginOffset
3177 // interchangeably for unsplit slices.
3178 assert(IsSplit || BeginOffset == NewBeginOffset);
3179 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3180
3181 StringRef OldName = OldPtr->getName();
3182 // Skip through the last '.sroa.' component of the name.
3183 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3184 if (LastSROAPrefix != StringRef::npos) {
3185 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3186 // Look for an SROA slice index.
3187 size_t IndexEnd = OldName.find_first_not_of("0123456789");
3188 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3189 // Strip the index and look for the offset.
3190 OldName = OldName.substr(IndexEnd + 1);
3191 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
3192 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3193 // Strip the offset.
3194 OldName = OldName.substr(OffsetEnd + 1);
3195 }
3196 }
3197 // Strip any SROA suffixes as well.
3198 OldName = OldName.substr(0, OldName.find(".sroa_"));
3199
3200 return getAdjustedPtr(IRB, DL, &NewAI,
3201 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
3202 PointerTy, Twine(OldName) + ".");
3203 }
3204
3205 /// Compute suitable alignment to access this slice of the *new*
3206 /// alloca.
3207 ///
3208 /// You can optionally pass a type to this routine and if that type's ABI
3209 /// alignment is itself suitable, this will return zero.
3210 Align getSliceAlign() {
3211 return commonAlignment(NewAI.getAlign(),
3212 NewBeginOffset - NewAllocaBeginOffset);
3213 }
3214
3215 unsigned getIndex(uint64_t Offset) {
3216 assert(VecTy && "Can only call getIndex when rewriting a vector");
3217 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3218 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3219 uint32_t Index = RelOffset / ElementSize;
3220 assert(Index * ElementSize == RelOffset);
3221 return Index;
3222 }
3223
3224 void deleteIfTriviallyDead(Value *V) {
3227 Pass.DeadInsts.push_back(I);
3228 }
3229
3230 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3231 unsigned BeginIndex = getIndex(NewBeginOffset);
3232 unsigned EndIndex = getIndex(NewEndOffset);
3233 assert(EndIndex > BeginIndex && "Empty vector!");
3234
3235 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3236 NewAI.getAlign(), "load");
3237
3238 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3239 LLVMContext::MD_access_group});
3240 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3241 }
3242
3243 Value *rewriteIntegerLoad(LoadInst &LI) {
3244 assert(IntTy && "We cannot insert an integer to the alloca");
3245 assert(!LI.isVolatile());
3246 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3247 NewAI.getAlign(), "load");
3248 V = convertValue(DL, IRB, V, IntTy);
3249 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3250 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3251 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3252 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3253 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
3254 }
3255 // It is possible that the extracted type is not the load type. This
3256 // happens if there is a load past the end of the alloca, and as
3257 // a consequence the slice is narrower but still a candidate for integer
3258 // lowering. To handle this case, we just zero extend the extracted
3259 // integer.
3260 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
3261 "Can only handle an extract for an overly wide load");
3262 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
3263 V = IRB.CreateZExt(V, LI.getType());
3264 return V;
3265 }
3266
3267 bool visitLoadInst(LoadInst &LI) {
3268 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3269 Value *OldOp = LI.getOperand(0);
3270 assert(OldOp == OldPtr);
3271
3272 AAMDNodes AATags = LI.getAAMetadata();
3273
3274 unsigned AS = LI.getPointerAddressSpace();
3275
3276 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3277 : LI.getType();
3278 bool IsPtrAdjusted = false;
3279 Value *V;
3280 if (VecTy) {
3281 V = rewriteVectorizedLoadInst(LI);
3282 } else if (IntTy && LI.getType()->isIntegerTy()) {
3283 V = rewriteIntegerLoad(LI);
3284 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3285 NewEndOffset == NewAllocaEndOffset &&
3286 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
3287 (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
3288 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3289 !LI.isVolatile()))) {
3290 Value *NewPtr =
3291 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3292 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3293 NewAI.getAlign(), LI.isVolatile(),
3294 LI.getName());
3295 if (LI.isVolatile())
3296 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3297 if (NewLI->isAtomic())
3298 NewLI->setAlignment(LI.getAlign());
3299
3300 // Copy any metadata that is valid for the new load. This may require
3301 // conversion to a different kind of metadata, e.g. !nonnull might change
3302 // to !range or vice versa.
3303 copyMetadataForLoad(*NewLI, LI);
3304
3305 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
3306 if (AATags)
3307 NewLI->setAAMetadata(AATags.adjustForAccess(
3308 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3309
3310 // Try to preserve nonnull metadata
3311 V = NewLI;
3312
3313 // If this is an integer load past the end of the slice (which means the
3314 // bytes outside the slice are undef or this load is dead) just forcibly
3315 // fix the integer size with correct handling of endianness.
3316 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3317 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3318 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3319 V = IRB.CreateZExt(V, TITy, "load.ext");
3320 if (DL.isBigEndian())
3321 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3322 "endian_shift");
3323 }
3324 } else {
3325 Type *LTy = IRB.getPtrTy(AS);
3326 LoadInst *NewLI =
3327 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3328 getSliceAlign(), LI.isVolatile(), LI.getName());
3329
3330 if (AATags)
3331 NewLI->setAAMetadata(AATags.adjustForAccess(
3332 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3333
3334 if (LI.isVolatile())
3335 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3336 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3337 LLVMContext::MD_access_group});
3338
3339 V = NewLI;
3340 IsPtrAdjusted = true;
3341 }
3342 V = convertValue(DL, IRB, V, TargetTy);
3343
3344 if (IsSplit) {
3345 assert(!LI.isVolatile());
3346 assert(LI.getType()->isIntegerTy() &&
3347 "Only integer type loads and stores are split");
3348 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3349 "Split load isn't smaller than original load");
3350 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
3351 "Non-byte-multiple bit width");
3352 // Move the insertion point just past the load so that we can refer to it.
3353 BasicBlock::iterator LIIt = std::next(LI.getIterator());
3354 // Ensure the insertion point comes before any debug-info immediately
3355 // after the load, so that variable values referring to the load are
3356 // dominated by it.
3357 LIIt.setHeadBit(true);
3358 IRB.SetInsertPoint(LI.getParent(), LIIt);
3359 // Create a placeholder value with the same type as LI to use as the
3360 // basis for the new value. This allows us to replace the uses of LI with
3361 // the computed value, and then replace the placeholder with LI, leaving
3362 // LI only used for this computation.
3363 Value *Placeholder =
3364 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
3365 false, Align(1));
3366 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3367 "insert");
3368 LI.replaceAllUsesWith(V);
3369 Placeholder->replaceAllUsesWith(&LI);
3370 Placeholder->deleteValue();
3371 } else {
3372 LI.replaceAllUsesWith(V);
3373 }
3374
3375 Pass.DeadInsts.push_back(&LI);
3376 deleteIfTriviallyDead(OldOp);
3377 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3378 return !LI.isVolatile() && !IsPtrAdjusted;
3379 }
3380
3381 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3382 AAMDNodes AATags) {
3383 // Capture V for the purpose of debug-info accounting once it's converted
3384 // to a vector store.
3385 Value *OrigV = V;
3386 if (V->getType() != VecTy) {
3387 unsigned BeginIndex = getIndex(NewBeginOffset);
3388 unsigned EndIndex = getIndex(NewEndOffset);
3389 assert(EndIndex > BeginIndex && "Empty vector!");
3390 unsigned NumElements = EndIndex - BeginIndex;
3391 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3392 "Too many elements!");
3393 Type *SliceTy = (NumElements == 1)
3394 ? ElementTy
3395 : FixedVectorType::get(ElementTy, NumElements);
3396 if (V->getType() != SliceTy)
3397 V = convertValue(DL, IRB, V, SliceTy);
3398
3399 // Mix in the existing elements.
3400 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3401 NewAI.getAlign(), "load");
3402 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3403 }
3404 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3405 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3406 LLVMContext::MD_access_group});
3407 if (AATags)
3408 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3409 V->getType(), DL));
3410 Pass.DeadInsts.push_back(&SI);
3411
3412 // NOTE: Careful to use OrigV rather than V.
3413 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3414 Store, Store->getPointerOperand(), OrigV, DL);
3415 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3416 return true;
3417 }
3418
3419 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3420 assert(IntTy && "We cannot extract an integer from the alloca");
3421 assert(!SI.isVolatile());
3422 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3423 IntTy->getBitWidth()) {
3424 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3425 NewAI.getAlign(), "oldload");
3426 Old = convertValue(DL, IRB, Old, IntTy);
3427 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3428 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3429 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3430 }
3431 V = convertValue(DL, IRB, V, NewAllocaTy);
3432 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3433 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3434 LLVMContext::MD_access_group});
3435 if (AATags)
3436 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3437 V->getType(), DL));
3438
3439 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3440 Store, Store->getPointerOperand(),
3441 Store->getValueOperand(), DL);
3442
3443 Pass.DeadInsts.push_back(&SI);
3444 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3445 return true;
3446 }
3447
3448 bool visitStoreInst(StoreInst &SI) {
3449 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3450 Value *OldOp = SI.getOperand(1);
3451 assert(OldOp == OldPtr);
3452
3453 AAMDNodes AATags = SI.getAAMetadata();
3454 Value *V = SI.getValueOperand();
3455
3456 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3457 // alloca that should be re-examined after promoting this alloca.
3458 if (V->getType()->isPointerTy())
3459 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3460 Pass.PostPromotionWorklist.insert(AI);
3461
3462 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3463 if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
3464 assert(!SI.isVolatile());
3465 assert(V->getType()->isIntegerTy() &&
3466 "Only integer type loads and stores are split");
3467 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3468 "Non-byte-multiple bit width");
3469 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3470 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3471 "extract");
3472 }
3473
3474 if (VecTy)
3475 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3476 if (IntTy && V->getType()->isIntegerTy())
3477 return rewriteIntegerStore(V, SI, AATags);
3478
3479 StoreInst *NewSI;
3480 if (NewBeginOffset == NewAllocaBeginOffset &&
3481 NewEndOffset == NewAllocaEndOffset &&
3482 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3483 V = convertValue(DL, IRB, V, NewAllocaTy);
3484 Value *NewPtr =
3485 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3486
3487 NewSI =
3488 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3489 } else {
3490 unsigned AS = SI.getPointerAddressSpace();
3491 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3492 NewSI =
3493 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3494 }
3495 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3496 LLVMContext::MD_access_group});
3497 if (AATags)
3498 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3499 V->getType(), DL));
3500 if (SI.isVolatile())
3501 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3502 if (NewSI->isAtomic())
3503 NewSI->setAlignment(SI.getAlign());
3504
3505 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3506 NewSI, NewSI->getPointerOperand(),
3507 NewSI->getValueOperand(), DL);
3508
3509 Pass.DeadInsts.push_back(&SI);
3510 deleteIfTriviallyDead(OldOp);
3511
3512 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3513 return NewSI->getPointerOperand() == &NewAI &&
3514 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3515 !SI.isVolatile();
3516 }
3517
3518 /// Compute an integer value from splatting an i8 across the given
3519 /// number of bytes.
3520 ///
3521 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3522 /// call this routine.
3523 /// FIXME: Heed the advice above.
3524 ///
3525 /// \param V The i8 value to splat.
3526 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3527 Value *getIntegerSplat(Value *V, unsigned Size) {
3528 assert(Size > 0 && "Expected a positive number of bytes.");
3529 IntegerType *VTy = cast<IntegerType>(V->getType());
3530 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3531 if (Size == 1)
3532 return V;
3533
3534 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3535 V = IRB.CreateMul(
3536 IRB.CreateZExt(V, SplatIntTy, "zext"),
3537 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3538 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3539 SplatIntTy)),
3540 "isplat");
3541 return V;
3542 }
3543
3544 /// Compute a vector splat for a given element value.
3545 Value *getVectorSplat(Value *V, unsigned NumElements) {
3546 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3547 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3548 return V;
3549 }
3550
3551 bool visitMemSetInst(MemSetInst &II) {
3552 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3553 assert(II.getRawDest() == OldPtr);
3554
3555 AAMDNodes AATags = II.getAAMetadata();
3556
3557 // If the memset has a variable size, it cannot be split, just adjust the
3558 // pointer to the new alloca.
3559 if (!isa<ConstantInt>(II.getLength())) {
3560 assert(!IsSplit);
3561 assert(NewBeginOffset == BeginOffset);
3562 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3563 II.setDestAlignment(getSliceAlign());
3564 // In theory we should call migrateDebugInfo here. However, we do not
3565 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3566 // constant geps, or storing a variable number of bytes.
3568 "AT: Unexpected link to non-const GEP");
3569 deleteIfTriviallyDead(OldPtr);
3570 return false;
3571 }
3572
3573 // Record this instruction for deletion.
3574 Pass.DeadInsts.push_back(&II);
3575
3576 Type *AllocaTy = NewAI.getAllocatedType();
3577 Type *ScalarTy = AllocaTy->getScalarType();
3578
3579 const bool CanContinue = [&]() {
3580 if (VecTy || IntTy)
3581 return true;
3582 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3583 return false;
3584 // Length must be in range for FixedVectorType.
3585 auto *C = cast<ConstantInt>(II.getLength());
3586 const uint64_t Len = C->getLimitedValue();
3587 if (Len > std::numeric_limits<unsigned>::max())
3588 return false;
3589 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3590 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3591 return canConvertValue(DL, SrcTy, AllocaTy) &&
3592 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3593 }();
3594
3595 // If this doesn't map cleanly onto the alloca type, and that type isn't
3596 // a single value type, just emit a memset.
3597 if (!CanContinue) {
3598 Type *SizeTy = II.getLength()->getType();
3599 unsigned Sz = NewEndOffset - NewBeginOffset;
3600 Constant *Size = ConstantInt::get(SizeTy, Sz);
3601 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3602 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3603 MaybeAlign(getSliceAlign()), II.isVolatile()));
3604 if (AATags)
3605 New->setAAMetadata(
3606 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3607
3608 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3609 New, New->getRawDest(), nullptr, DL);
3610
3611 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3612 return false;
3613 }
3614
3615 // If we can represent this as a simple value, we have to build the actual
3616 // value to store, which requires expanding the byte present in memset to
3617 // a sensible representation for the alloca type. This is essentially
3618 // splatting the byte to a sufficiently wide integer, splatting it across
3619 // any desired vector width, and bitcasting to the final type.
3620 Value *V;
3621
3622 if (VecTy) {
3623 // If this is a memset of a vectorized alloca, insert it.
3624 assert(ElementTy == ScalarTy);
3625
3626 unsigned BeginIndex = getIndex(NewBeginOffset);
3627 unsigned EndIndex = getIndex(NewEndOffset);
3628 assert(EndIndex > BeginIndex && "Empty vector!");
3629 unsigned NumElements = EndIndex - BeginIndex;
3630 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3631 "Too many elements!");
3632
3633 Value *Splat = getIntegerSplat(
3634 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3635 Splat = convertValue(DL, IRB, Splat, ElementTy);
3636 if (NumElements > 1)
3637 Splat = getVectorSplat(Splat, NumElements);
3638
3639 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3640 NewAI.getAlign(), "oldload");
3641 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3642 } else if (IntTy) {
3643 // If this is a memset on an alloca where we can widen stores, insert the
3644 // set integer.
3645 assert(!II.isVolatile());
3646
3647 uint64_t Size = NewEndOffset - NewBeginOffset;
3648 V = getIntegerSplat(II.getValue(), Size);
3649
3650 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3651 EndOffset != NewAllocaBeginOffset)) {
3652 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3653 NewAI.getAlign(), "oldload");
3654 Old = convertValue(DL, IRB, Old, IntTy);
3655 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3656 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3657 } else {
3658 assert(V->getType() == IntTy &&
3659 "Wrong type for an alloca wide integer!");
3660 }
3661 V = convertValue(DL, IRB, V, AllocaTy);
3662 } else {
3663 // Established these invariants above.
3664 assert(NewBeginOffset == NewAllocaBeginOffset);
3665 assert(NewEndOffset == NewAllocaEndOffset);
3666
3667 V = getIntegerSplat(II.getValue(),
3668 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3669 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3670 V = getVectorSplat(
3671 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3672
3673 V = convertValue(DL, IRB, V, AllocaTy);
3674 }
3675
3676 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3677 StoreInst *New =
3678 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3679 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3680 LLVMContext::MD_access_group});
3681 if (AATags)
3682 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3683 V->getType(), DL));
3684
3685 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3686 New, New->getPointerOperand(), V, DL);
3687
3688 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3689 return !II.isVolatile();
3690 }
3691
3692 bool visitMemTransferInst(MemTransferInst &II) {
3693 // Rewriting of memory transfer instructions can be a bit tricky. We break
3694 // them into two categories: split intrinsics and unsplit intrinsics.
3695
3696 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3697
3698 AAMDNodes AATags = II.getAAMetadata();
3699
3700 bool IsDest = &II.getRawDestUse() == OldUse;
3701 assert((IsDest && II.getRawDest() == OldPtr) ||
3702 (!IsDest && II.getRawSource() == OldPtr));
3703
3704 Align SliceAlign = getSliceAlign();
3705 // For unsplit intrinsics, we simply modify the source and destination
3706 // pointers in place. This isn't just an optimization, it is a matter of
3707 // correctness. With unsplit intrinsics we may be dealing with transfers
3708 // within a single alloca before SROA ran, or with transfers that have
3709 // a variable length. We may also be dealing with memmove instead of
3710 // memcpy, and so simply updating the pointers is the necessary for us to
3711 // update both source and dest of a single call.
3712 if (!IsSplittable) {
3713 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3714 if (IsDest) {
3715 // Update the address component of linked dbg.assigns.
3716 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(&II)) {
3717 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3718 DbgAssign->getAddress() == II.getDest())
3719 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3720 }
3721 II.setDest(AdjustedPtr);
3722 II.setDestAlignment(SliceAlign);
3723 } else {
3724 II.setSource(AdjustedPtr);
3725 II.setSourceAlignment(SliceAlign);
3726 }
3727
3728 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3729 deleteIfTriviallyDead(OldPtr);
3730 return false;
3731 }
3732 // For split transfer intrinsics we have an incredibly useful assurance:
3733 // the source and destination do not reside within the same alloca, and at
3734 // least one of them does not escape. This means that we can replace
3735 // memmove with memcpy, and we don't need to worry about all manner of
3736 // downsides to splitting and transforming the operations.
3737
3738 // If this doesn't map cleanly onto the alloca type, and that type isn't
3739 // a single value type, just emit a memcpy.
3740 bool EmitMemCpy =
3741 !VecTy && !IntTy &&
3742 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3743 SliceSize !=
3744 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
3745 !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
3747
3748 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3749 // size hasn't been shrunk based on analysis of the viable range, this is
3750 // a no-op.
3751 if (EmitMemCpy && &OldAI == &NewAI) {
3752 // Ensure the start lines up.
3753 assert(NewBeginOffset == BeginOffset);
3754
3755 // Rewrite the size as needed.
3756 if (NewEndOffset != EndOffset)
3757 II.setLength(NewEndOffset - NewBeginOffset);
3758 return false;
3759 }
3760 // Record this instruction for deletion.
3761 Pass.DeadInsts.push_back(&II);
3762
3763 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3764 // alloca that should be re-examined after rewriting this instruction.
3765 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3766 if (AllocaInst *AI =
3768 assert(AI != &OldAI && AI != &NewAI &&
3769 "Splittable transfers cannot reach the same alloca on both ends.");
3770 Pass.Worklist.insert(AI);
3771 }
3772
3773 Type *OtherPtrTy = OtherPtr->getType();
3774 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3775
3776 // Compute the relative offset for the other pointer within the transfer.
3777 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3778 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3779 Align OtherAlign =
3780 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3781 OtherAlign =
3782 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3783
3784 if (EmitMemCpy) {
3785 // Compute the other pointer, folding as much as possible to produce
3786 // a single, simple GEP in most cases.
3787 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3788 OtherPtr->getName() + ".");
3789
3790 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3791 Type *SizeTy = II.getLength()->getType();
3792 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3793
3794 Value *DestPtr, *SrcPtr;
3795 MaybeAlign DestAlign, SrcAlign;
3796 // Note: IsDest is true iff we're copying into the new alloca slice
3797 if (IsDest) {
3798 DestPtr = OurPtr;
3799 DestAlign = SliceAlign;
3800 SrcPtr = OtherPtr;
3801 SrcAlign = OtherAlign;
3802 } else {
3803 DestPtr = OtherPtr;
3804 DestAlign = OtherAlign;
3805 SrcPtr = OurPtr;
3806 SrcAlign = SliceAlign;
3807 }
3808 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3809 Size, II.isVolatile());
3810 if (AATags)
3811 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3812
3813 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3814 if (IsDest) {
3815 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3816 &II, New, DestPtr, nullptr, DL);
3817 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3819 DL, Offset, /*AllowNonInbounds*/ true))) {
3820 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3821 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3822 }
3823 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3824 return false;
3825 }
3826
3827 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3828 NewEndOffset == NewAllocaEndOffset;
3829 uint64_t Size = NewEndOffset - NewBeginOffset;
3830 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3831 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3832 unsigned NumElements = EndIndex - BeginIndex;
3833 IntegerType *SubIntTy =
3834 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3835
3836 // Reset the other pointer type to match the register type we're going to
3837 // use, but using the address space of the original other pointer.
3838 Type *OtherTy;
3839 if (VecTy && !IsWholeAlloca) {
3840 if (NumElements == 1)
3841 OtherTy = VecTy->getElementType();
3842 else
3843 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3844 } else if (IntTy && !IsWholeAlloca) {
3845 OtherTy = SubIntTy;
3846 } else {
3847 OtherTy = NewAllocaTy;
3848 }
3849
3850 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3851 OtherPtr->getName() + ".");
3852 MaybeAlign SrcAlign = OtherAlign;
3853 MaybeAlign DstAlign = SliceAlign;
3854 if (!IsDest)
3855 std::swap(SrcAlign, DstAlign);
3856
3857 Value *SrcPtr;
3858 Value *DstPtr;
3859
3860 if (IsDest) {
3861 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3862 SrcPtr = AdjPtr;
3863 } else {
3864 DstPtr = AdjPtr;
3865 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3866 }
3867
3868 Value *Src;
3869 if (VecTy && !IsWholeAlloca && !IsDest) {
3870 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3871 NewAI.getAlign(), "load");
3872 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3873 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3874 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3875 NewAI.getAlign(), "load");
3876 Src = convertValue(DL, IRB, Src, IntTy);
3877 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3878 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3879 } else {
3880 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3881 II.isVolatile(), "copyload");
3882 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3883 LLVMContext::MD_access_group});
3884 if (AATags)
3885 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3886 Load->getType(), DL));
3887 Src = Load;
3888 }
3889
3890 if (VecTy && !IsWholeAlloca && IsDest) {
3891 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3892 NewAI.getAlign(), "oldload");
3893 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3894 } else if (IntTy && !IsWholeAlloca && IsDest) {
3895 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3896 NewAI.getAlign(), "oldload");
3897 Old = convertValue(DL, IRB, Old, IntTy);
3898 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3899 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3900 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3901 }
3902
3903 StoreInst *Store = cast<StoreInst>(
3904 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3905 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3906 LLVMContext::MD_access_group});
3907 if (AATags)
3908 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3909 Src->getType(), DL));
3910
3911 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3912 if (IsDest) {
3913
3914 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3915 Store, DstPtr, Src, DL);
3916 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3918 DL, Offset, /*AllowNonInbounds*/ true))) {
3919 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3920 &II, Store, DstPtr, Src, DL);
3921 }
3922
3923 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3924 return !II.isVolatile();
3925 }
3926
3927 bool visitIntrinsicInst(IntrinsicInst &II) {
3928 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3929 "Unexpected intrinsic!");
3930 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3931
3932 // Record this instruction for deletion.
3933 Pass.DeadInsts.push_back(&II);
3934
3935 if (II.isDroppable()) {
3936 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3937 // TODO For now we forget assumed information, this can be improved.
3938 OldPtr->dropDroppableUsesIn(II);
3939 return true;
3940 }
3941
3942 assert(II.getArgOperand(0) == OldPtr);
3943 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3944 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3945 Value *New;
3946 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3947 New = IRB.CreateLifetimeStart(Ptr);
3948 else
3949 New = IRB.CreateLifetimeEnd(Ptr);
3950
3951 (void)New;
3952 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3953
3954 return true;
3955 }
3956
3957 void fixLoadStoreAlign(Instruction &Root) {
3958 // This algorithm implements the same visitor loop as
3959 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3960 // or store found.
3961 SmallPtrSet<Instruction *, 4> Visited;
3962 SmallVector<Instruction *, 4> Uses;
3963 Visited.insert(&Root);
3964 Uses.push_back(&Root);
3965 do {
3966 Instruction *I = Uses.pop_back_val();
3967
3968 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3969 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3970 continue;
3971 }
3972 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3973 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3974 continue;
3975 }
3976
3980 for (User *U : I->users())
3981 if (Visited.insert(cast<Instruction>(U)).second)
3982 Uses.push_back(cast<Instruction>(U));
3983 } while (!Uses.empty());
3984 }
3985
3986 bool visitPHINode(PHINode &PN) {
3987 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3988 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3989 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3990
3991 // We would like to compute a new pointer in only one place, but have it be
3992 // as local as possible to the PHI. To do that, we re-use the location of
3993 // the old pointer, which necessarily must be in the right position to
3994 // dominate the PHI.
3995 IRBuilderBase::InsertPointGuard Guard(IRB);
3996 if (isa<PHINode>(OldPtr))
3997 IRB.SetInsertPoint(OldPtr->getParent(),
3998 OldPtr->getParent()->getFirstInsertionPt());
3999 else
4000 IRB.SetInsertPoint(OldPtr);
4001 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
4002
4003 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4004 // Replace the operands which were using the old pointer.
4005 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
4006
4007 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
4008 deleteIfTriviallyDead(OldPtr);
4009
4010 // Fix the alignment of any loads or stores using this PHI node.
4011 fixLoadStoreAlign(PN);
4012
4013 // PHIs can't be promoted on their own, but often can be speculated. We
4014 // check the speculation outside of the rewriter so that we see the
4015 // fully-rewritten alloca.
4016 PHIUsers.insert(&PN);
4017 return true;
4018 }
4019
4020 bool visitSelectInst(SelectInst &SI) {
4021 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4022 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
4023 "Pointer isn't an operand!");
4024 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
4025 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
4026
4027 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4028 // Replace the operands which were using the old pointer.
4029 if (SI.getOperand(1) == OldPtr)
4030 SI.setOperand(1, NewPtr);
4031 if (SI.getOperand(2) == OldPtr)
4032 SI.setOperand(2, NewPtr);
4033
4034 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
4035 deleteIfTriviallyDead(OldPtr);
4036
4037 // Fix the alignment of any loads or stores using this select.
4038 fixLoadStoreAlign(SI);
4039
4040 // Selects can't be promoted on their own, but often can be speculated. We
4041 // check the speculation outside of the rewriter so that we see the
4042 // fully-rewritten alloca.
4043 SelectUsers.insert(&SI);
4044 return true;
4045 }
4046};
4047
4048/// Visitor to rewrite aggregate loads and stores as scalar.
4049///
4050/// This pass aggressively rewrites all aggregate loads and stores on
4051/// a particular pointer (or any pointer derived from it which we can identify)
4052/// with scalar loads and stores.
4053class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
4054 // Befriend the base class so it can delegate to private visit methods.
4055 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4056
4057 /// Queue of pointer uses to analyze and potentially rewrite.
4059
4060 /// Set to prevent us from cycling with phi nodes and loops.
4061 SmallPtrSet<User *, 8> Visited;
4062
4063 /// The current pointer use being rewritten. This is used to dig up the used
4064 /// value (as opposed to the user).
4065 Use *U = nullptr;
4066
4067 /// Used to calculate offsets, and hence alignment, of subobjects.
4068 const DataLayout &DL;
4069
4070 IRBuilderTy &IRB;
4071
4072public:
4073 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
4074 : DL(DL), IRB(IRB) {}
4075
4076 /// Rewrite loads and stores through a pointer and all pointers derived from
4077 /// it.
4078 bool rewrite(Instruction &I) {
4079 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
4080 enqueueUsers(I);
4081 bool Changed = false;
4082 while (!Queue.empty()) {
4083 U = Queue.pop_back_val();
4084 Changed |= visit(cast<Instruction>(U->getUser()));
4085 }
4086 return Changed;
4087 }
4088
4089private:
4090 /// Enqueue all the users of the given instruction for further processing.
4091 /// This uses a set to de-duplicate users.
4092 void enqueueUsers(Instruction &I) {
4093 for (Use &U : I.uses())
4094 if (Visited.insert(U.getUser()).second)
4095 Queue.push_back(&U);
4096 }
4097
4098 // Conservative default is to not rewrite anything.
4099 bool visitInstruction(Instruction &I) { return false; }
4100
4101 /// Generic recursive split emission class.
4102 template <typename Derived> class OpSplitter {
4103 protected:
4104 /// The builder used to form new instructions.
4105 IRBuilderTy &IRB;
4106
4107 /// The indices which to be used with insert- or extractvalue to select the
4108 /// appropriate value within the aggregate.
4109 SmallVector<unsigned, 4> Indices;
4110
4111 /// The indices to a GEP instruction which will move Ptr to the correct slot
4112 /// within the aggregate.
4113 SmallVector<Value *, 4> GEPIndices;
4114
4115 /// The base pointer of the original op, used as a base for GEPing the
4116 /// split operations.
4117 Value *Ptr;
4118
4119 /// The base pointee type being GEPed into.
4120 Type *BaseTy;
4121
4122 /// Known alignment of the base pointer.
4123 Align BaseAlign;
4124
4125 /// To calculate offset of each component so we can correctly deduce
4126 /// alignments.
4127 const DataLayout &DL;
4128
4129 /// Initialize the splitter with an insertion point, Ptr and start with a
4130 /// single zero GEP index.
4131 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4132 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4133 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4134 BaseAlign(BaseAlign), DL(DL) {
4135 IRB.SetInsertPoint(InsertionPoint);
4136 }
4137
4138 public:
4139 /// Generic recursive split emission routine.
4140 ///
4141 /// This method recursively splits an aggregate op (load or store) into
4142 /// scalar or vector ops. It splits recursively until it hits a single value
4143 /// and emits that single value operation via the template argument.
4144 ///
4145 /// The logic of this routine relies on GEPs and insertvalue and
4146 /// extractvalue all operating with the same fundamental index list, merely
4147 /// formatted differently (GEPs need actual values).
4148 ///
4149 /// \param Ty The type being split recursively into smaller ops.
4150 /// \param Agg The aggregate value being built up or stored, depending on
4151 /// whether this is splitting a load or a store respectively.
4152 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4153 if (Ty->isSingleValueType()) {
4154 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4155 return static_cast<Derived *>(this)->emitFunc(
4156 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
4157 }
4158
4159 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
4160 unsigned OldSize = Indices.size();
4161 (void)OldSize;
4162 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4163 ++Idx) {
4164 assert(Indices.size() == OldSize && "Did not return to the old size");
4165 Indices.push_back(Idx);
4166 GEPIndices.push_back(IRB.getInt32(Idx));
4167 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4168 GEPIndices.pop_back();
4169 Indices.pop_back();
4170 }
4171 return;
4172 }
4173
4174 if (StructType *STy = dyn_cast<StructType>(Ty)) {
4175 unsigned OldSize = Indices.size();
4176 (void)OldSize;
4177 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4178 ++Idx) {
4179 assert(Indices.size() == OldSize && "Did not return to the old size");
4180 Indices.push_back(Idx);
4181 GEPIndices.push_back(IRB.getInt32(Idx));
4182 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4183 GEPIndices.pop_back();
4184 Indices.pop_back();
4185 }
4186 return;
4187 }
4188
4189 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4190 }
4191 };
4192
4193 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
4194 AAMDNodes AATags;
4195 // A vector to hold the split components that we want to emit
4196 // separate fake uses for.
4197 SmallVector<Value *, 4> Components;
4198 // A vector to hold all the fake uses of the struct that we are splitting.
4199 // Usually there should only be one, but we are handling the general case.
4201
4202 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4203 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4204 IRBuilderTy &IRB)
4205 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4206 IRB),
4207 AATags(AATags) {}
4208
4209 /// Emit a leaf load of a single value. This is called at the leaves of the
4210 /// recursive emission to actually load values.
4211 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4213 // Load the single value and insert it using the indices.
4214 Value *GEP =
4215 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4216 LoadInst *Load =
4217 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4218
4219 APInt Offset(
4220 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4221 if (AATags &&
4222 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
4223 Load->setAAMetadata(
4224 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
4225 // Record the load so we can generate a fake use for this aggregate
4226 // component.
4227 Components.push_back(Load);
4228
4229 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4230 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
4231 }
4232
4233 // Stash the fake uses that use the value generated by this instruction.
4234 void recordFakeUses(LoadInst &LI) {
4235 for (Use &U : LI.uses())
4236 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
4237 if (II->getIntrinsicID() == Intrinsic::fake_use)
4238 FakeUses.push_back(II);
4239 }
4240
4241 // Replace all fake uses of the aggregate with a series of fake uses, one
4242 // for each split component.
4243 void emitFakeUses() {
4244 for (Instruction *I : FakeUses) {
4245 IRB.SetInsertPoint(I);
4246 for (auto *V : Components)
4247 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4248 I->eraseFromParent();
4249 }
4250 }
4251 };
4252
4253 bool visitLoadInst(LoadInst &LI) {
4254 assert(LI.getPointerOperand() == *U);
4255 if (!LI.isSimple() || LI.getType()->isSingleValueType())
4256 return false;
4257
4258 // We have an aggregate being loaded, split it apart.
4259 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
4260 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
4261 getAdjustedAlignment(&LI, 0), DL, IRB);
4262 Splitter.recordFakeUses(LI);
4264 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4265 Splitter.emitFakeUses();
4266 Visited.erase(&LI);
4267 LI.replaceAllUsesWith(V);
4268 LI.eraseFromParent();
4269 return true;
4270 }
4271
4272 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
4273 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4274 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4275 const DataLayout &DL, IRBuilderTy &IRB)
4276 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4277 DL, IRB),
4278 AATags(AATags), AggStore(AggStore) {}
4279 AAMDNodes AATags;
4280 StoreInst *AggStore;
4281 /// Emit a leaf store of a single value. This is called at the leaves of the
4282 /// recursive emission to actually produce stores.
4283 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4285 // Extract the single value and store it using the indices.
4286 //
4287 // The gep and extractvalue values are factored out of the CreateStore
4288 // call to make the output independent of the argument evaluation order.
4289 Value *ExtractValue =
4290 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4291 Value *InBoundsGEP =
4292 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4293 StoreInst *Store =
4294 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4295
4296 APInt Offset(
4297 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4298 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
4299 if (AATags) {
4300 Store->setAAMetadata(AATags.adjustForAccess(
4301 Offset.getZExtValue(), ExtractValue->getType(), DL));
4302 }
4303
4304 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
4305 // If we cannot (because there's an intervening non-const or unbounded
4306 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
4307 // this instruction.
4309 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
4310 uint64_t SizeInBits =
4311 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4312 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
4313 SizeInBits, AggStore, Store,
4314 Store->getPointerOperand(), Store->getValueOperand(),
4315 DL);
4316 } else {
4318 "AT: unexpected debug.assign linked to store through "
4319 "unbounded GEP");
4320 }
4321 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
4322 }
4323 };
4324
4325 bool visitStoreInst(StoreInst &SI) {
4326 if (!SI.isSimple() || SI.getPointerOperand() != *U)
4327 return false;
4328 Value *V = SI.getValueOperand();
4329 if (V->getType()->isSingleValueType())
4330 return false;
4331
4332 // We have an aggregate being stored, split it apart.
4333 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4334 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4335 getAdjustedAlignment(&SI, 0), DL, IRB);
4336 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4337 Visited.erase(&SI);
4338 // The stores replacing SI each have markers describing fragments of the
4339 // assignment so delete the assignment markers linked to SI.
4341 SI.eraseFromParent();
4342 return true;
4343 }
4344
4345 bool visitBitCastInst(BitCastInst &BC) {
4346 enqueueUsers(BC);
4347 return false;
4348 }
4349
4350 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4351 enqueueUsers(ASC);
4352 return false;
4353 }
4354
4355 // Unfold gep (select cond, ptr1, ptr2), idx
4356 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
4357 // and gep ptr, (select cond, idx1, idx2)
4358 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
4359 // We also allow for i1 zext indices, which are equivalent to selects.
4360 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4361 // Check whether the GEP has exactly one select operand and all indices
4362 // will become constant after the transform.
4364 for (Value *Op : GEPI.indices()) {
4365 if (auto *SI = dyn_cast<SelectInst>(Op)) {
4366 if (Sel)
4367 return false;
4368
4369 Sel = SI;
4370 if (!isa<ConstantInt>(SI->getTrueValue()) ||
4371 !isa<ConstantInt>(SI->getFalseValue()))
4372 return false;
4373 continue;
4374 }
4375 if (auto *ZI = dyn_cast<ZExtInst>(Op)) {
4376 if (Sel)
4377 return false;
4378 Sel = ZI;
4379 if (!ZI->getSrcTy()->isIntegerTy(1))
4380 return false;
4381 continue;
4382 }
4383
4384 if (!isa<ConstantInt>(Op))
4385 return false;
4386 }
4387
4388 if (!Sel)
4389 return false;
4390
4391 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4392 dbgs() << " original: " << *Sel << "\n";
4393 dbgs() << " " << GEPI << "\n";);
4394
4395 auto GetNewOps = [&](Value *SelOp) {
4396 SmallVector<Value *> NewOps;
4397 for (Value *Op : GEPI.operands())
4398 if (Op == Sel)
4399 NewOps.push_back(SelOp);
4400 else
4401 NewOps.push_back(Op);
4402 return NewOps;
4403 };
4404
4405 Value *Cond, *True, *False;
4406 Instruction *MDFrom = nullptr;
4407 if (auto *SI = dyn_cast<SelectInst>(Sel)) {
4408 Cond = SI->getCondition();
4409 True = SI->getTrueValue();
4410 False = SI->getFalseValue();
4412 MDFrom = SI;
4413 } else {
4414 Cond = Sel->getOperand(0);
4415 True = ConstantInt::get(Sel->getType(), 1);
4416 False = ConstantInt::get(Sel->getType(), 0);
4417 }
4418 SmallVector<Value *> TrueOps = GetNewOps(True);
4419 SmallVector<Value *> FalseOps = GetNewOps(False);
4420
4421 IRB.SetInsertPoint(&GEPI);
4422 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4423
4424 Type *Ty = GEPI.getSourceElementType();
4425 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4426 True->getName() + ".sroa.gep", NW);
4427
4428 Value *NFalse =
4429 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4430 False->getName() + ".sroa.gep", NW);
4431
4432 Value *NSel = MDFrom
4433 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4434 Sel->getName() + ".sroa.sel", MDFrom)
4435 : IRB.CreateSelectWithUnknownProfile(
4436 Cond, NTrue, NFalse, DEBUG_TYPE,
4437 Sel->getName() + ".sroa.sel");
4438 Visited.erase(&GEPI);
4439 GEPI.replaceAllUsesWith(NSel);
4440 GEPI.eraseFromParent();
4441 Instruction *NSelI = cast<Instruction>(NSel);
4442 Visited.insert(NSelI);
4443 enqueueUsers(*NSelI);
4444
4445 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4446 dbgs() << " " << *NFalse << "\n";
4447 dbgs() << " " << *NSel << "\n";);
4448
4449 return true;
4450 }
4451
4452 // Unfold gep (phi ptr1, ptr2), idx
4453 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4454 // and gep ptr, (phi idx1, idx2)
4455 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4456 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4457 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4458 // operand (looking through the phi if it is the phi we want to unfold) is
4459 // an instruction besides a static alloca.
4460 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4461 auto IsInvalidPointerOperand = [](Value *V) {
4462 if (!isa<Instruction>(V))
4463 return false;
4464 if (auto *AI = dyn_cast<AllocaInst>(V))
4465 return !AI->isStaticAlloca();
4466 return true;
4467 };
4468 if (Phi) {
4469 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4470 return false;
4471 } else {
4472 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4473 return false;
4474 }
4475 // Check whether the GEP has exactly one phi operand (including the pointer
4476 // operand) and all indices will become constant after the transform.
4477 for (Value *Op : GEPI.indices()) {
4478 if (auto *SI = dyn_cast<PHINode>(Op)) {
4479 if (Phi)
4480 return false;
4481
4482 Phi = SI;
4483 if (!all_of(Phi->incoming_values(),
4484 [](Value *V) { return isa<ConstantInt>(V); }))
4485 return false;
4486 continue;
4487 }
4488
4489 if (!isa<ConstantInt>(Op))
4490 return false;
4491 }
4492
4493 if (!Phi)
4494 return false;
4495
4496 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4497 dbgs() << " original: " << *Phi << "\n";
4498 dbgs() << " " << GEPI << "\n";);
4499
4500 auto GetNewOps = [&](Value *PhiOp) {
4501 SmallVector<Value *> NewOps;
4502 for (Value *Op : GEPI.operands())
4503 if (Op == Phi)
4504 NewOps.push_back(PhiOp);
4505 else
4506 NewOps.push_back(Op);
4507 return NewOps;
4508 };
4509
4510 IRB.SetInsertPoint(Phi);
4511 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4512 Phi->getName() + ".sroa.phi");
4513
4514 Type *SourceTy = GEPI.getSourceElementType();
4515 // We only handle arguments, constants, and static allocas here, so we can
4516 // insert GEPs at the end of the entry block.
4517 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4518 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4519 Value *Op = Phi->getIncomingValue(I);
4520 BasicBlock *BB = Phi->getIncomingBlock(I);
4521 Value *NewGEP;
4522 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4523 NewGEP = NewPhi->getIncomingValue(NI);
4524 } else {
4525 SmallVector<Value *> NewOps = GetNewOps(Op);
4526 NewGEP =
4527 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4528 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4529 }
4530 NewPhi->addIncoming(NewGEP, BB);
4531 }
4532
4533 Visited.erase(&GEPI);
4534 GEPI.replaceAllUsesWith(NewPhi);
4535 GEPI.eraseFromParent();
4536 Visited.insert(NewPhi);
4537 enqueueUsers(*NewPhi);
4538
4539 LLVM_DEBUG(dbgs() << " to: ";
4540 for (Value *In
4541 : NewPhi->incoming_values()) dbgs()
4542 << "\n " << *In;
4543 dbgs() << "\n " << *NewPhi << '\n');
4544
4545 return true;
4546 }
4547
4548 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4549 if (unfoldGEPSelect(GEPI))
4550 return true;
4551
4552 if (unfoldGEPPhi(GEPI))
4553 return true;
4554
4555 enqueueUsers(GEPI);
4556 return false;
4557 }
4558
4559 bool visitPHINode(PHINode &PN) {
4560 enqueueUsers(PN);
4561 return false;
4562 }
4563
4564 bool visitSelectInst(SelectInst &SI) {
4565 enqueueUsers(SI);
4566 return false;
4567 }
4568};
4569
4570} // end anonymous namespace
4571
4572/// Strip aggregate type wrapping.
4573///
4574/// This removes no-op aggregate types wrapping an underlying type. It will
4575/// strip as many layers of types as it can without changing either the type
4576/// size or the allocated size.
4578 if (Ty->isSingleValueType())
4579 return Ty;
4580
4581 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4582 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4583
4584 Type *InnerTy;
4585 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4586 InnerTy = ArrTy->getElementType();
4587 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4588 const StructLayout *SL = DL.getStructLayout(STy);
4589 unsigned Index = SL->getElementContainingOffset(0);
4590 InnerTy = STy->getElementType(Index);
4591 } else {
4592 return Ty;
4593 }
4594
4595 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4596 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4597 return Ty;
4598
4599 return stripAggregateTypeWrapping(DL, InnerTy);
4600}
4601
4602/// Try to find a partition of the aggregate type passed in for a given
4603/// offset and size.
4604///
4605/// This recurses through the aggregate type and tries to compute a subtype
4606/// based on the offset and size. When the offset and size span a sub-section
4607/// of an array, it will even compute a new array type for that sub-section,
4608/// and the same for structs.
4609///
4610/// Note that this routine is very strict and tries to find a partition of the
4611/// type which produces the *exact* right offset and size. It is not forgiving
4612/// when the size or offset cause either end of type-based partition to be off.
4613/// Also, this is a best-effort routine. It is reasonable to give up and not
4614/// return a type if necessary.
4616 uint64_t Size) {
4617 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4618 return stripAggregateTypeWrapping(DL, Ty);
4619 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4620 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4621 return nullptr;
4622
4623 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4624 Type *ElementTy;
4625 uint64_t TyNumElements;
4626 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4627 ElementTy = AT->getElementType();
4628 TyNumElements = AT->getNumElements();
4629 } else {
4630 // FIXME: This isn't right for vectors with non-byte-sized or
4631 // non-power-of-two sized elements.
4632 auto *VT = cast<FixedVectorType>(Ty);
4633 ElementTy = VT->getElementType();
4634 TyNumElements = VT->getNumElements();
4635 }
4636 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4637 uint64_t NumSkippedElements = Offset / ElementSize;
4638 if (NumSkippedElements >= TyNumElements)
4639 return nullptr;
4640 Offset -= NumSkippedElements * ElementSize;
4641
4642 // First check if we need to recurse.
4643 if (Offset > 0 || Size < ElementSize) {
4644 // Bail if the partition ends in a different array element.
4645 if ((Offset + Size) > ElementSize)
4646 return nullptr;
4647 // Recurse through the element type trying to peel off offset bytes.
4648 return getTypePartition(DL, ElementTy, Offset, Size);
4649 }
4650 assert(Offset == 0);
4651
4652 if (Size == ElementSize)
4653 return stripAggregateTypeWrapping(DL, ElementTy);
4654 assert(Size > ElementSize);
4655 uint64_t NumElements = Size / ElementSize;
4656 if (NumElements * ElementSize != Size)
4657 return nullptr;
4658 return ArrayType::get(ElementTy, NumElements);
4659 }
4660
4662 if (!STy)
4663 return nullptr;
4664
4665 const StructLayout *SL = DL.getStructLayout(STy);
4666
4667 if (SL->getSizeInBits().isScalable())
4668 return nullptr;
4669
4670 if (Offset >= SL->getSizeInBytes())
4671 return nullptr;
4672 uint64_t EndOffset = Offset + Size;
4673 if (EndOffset > SL->getSizeInBytes())
4674 return nullptr;
4675
4676 unsigned Index = SL->getElementContainingOffset(Offset);
4677 Offset -= SL->getElementOffset(Index);
4678
4679 Type *ElementTy = STy->getElementType(Index);
4680 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4681 if (Offset >= ElementSize)
4682 return nullptr; // The offset points into alignment padding.
4683
4684 // See if any partition must be contained by the element.
4685 if (Offset > 0 || Size < ElementSize) {
4686 if ((Offset + Size) > ElementSize)
4687 return nullptr;
4688 return getTypePartition(DL, ElementTy, Offset, Size);
4689 }
4690 assert(Offset == 0);
4691
4692 if (Size == ElementSize)
4693 return stripAggregateTypeWrapping(DL, ElementTy);
4694
4695 StructType::element_iterator EI = STy->element_begin() + Index,
4696 EE = STy->element_end();
4697 if (EndOffset < SL->getSizeInBytes()) {
4698 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4699 if (Index == EndIndex)
4700 return nullptr; // Within a single element and its padding.
4701
4702 // Don't try to form "natural" types if the elements don't line up with the
4703 // expected size.
4704 // FIXME: We could potentially recurse down through the last element in the
4705 // sub-struct to find a natural end point.
4706 if (SL->getElementOffset(EndIndex) != EndOffset)
4707 return nullptr;
4708
4709 assert(Index < EndIndex);
4710 EE = STy->element_begin() + EndIndex;
4711 }
4712
4713 // Try to build up a sub-structure.
4714 StructType *SubTy =
4715 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4716 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4717 if (Size != SubSL->getSizeInBytes())
4718 return nullptr; // The sub-struct doesn't have quite the size needed.
4719
4720 return SubTy;
4721}
4722
4723/// Pre-split loads and stores to simplify rewriting.
4724///
4725/// We want to break up the splittable load+store pairs as much as
4726/// possible. This is important to do as a preprocessing step, as once we
4727/// start rewriting the accesses to partitions of the alloca we lose the
4728/// necessary information to correctly split apart paired loads and stores
4729/// which both point into this alloca. The case to consider is something like
4730/// the following:
4731///
4732/// %a = alloca [12 x i8]
4733/// %gep1 = getelementptr i8, ptr %a, i32 0
4734/// %gep2 = getelementptr i8, ptr %a, i32 4
4735/// %gep3 = getelementptr i8, ptr %a, i32 8
4736/// store float 0.0, ptr %gep1
4737/// store float 1.0, ptr %gep2
4738/// %v = load i64, ptr %gep1
4739/// store i64 %v, ptr %gep2
4740/// %f1 = load float, ptr %gep2
4741/// %f2 = load float, ptr %gep3
4742///
4743/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4744/// promote everything so we recover the 2 SSA values that should have been
4745/// there all along.
4746///
4747/// \returns true if any changes are made.
4748bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4749 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4750
4751 // Track the loads and stores which are candidates for pre-splitting here, in
4752 // the order they first appear during the partition scan. These give stable
4753 // iteration order and a basis for tracking which loads and stores we
4754 // actually split.
4757
4758 // We need to accumulate the splits required of each load or store where we
4759 // can find them via a direct lookup. This is important to cross-check loads
4760 // and stores against each other. We also track the slice so that we can kill
4761 // all the slices that end up split.
4762 struct SplitOffsets {
4763 Slice *S;
4764 std::vector<uint64_t> Splits;
4765 };
4766 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4767
4768 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4769 // This is important as we also cannot pre-split stores of those loads!
4770 // FIXME: This is all pretty gross. It means that we can be more aggressive
4771 // in pre-splitting when the load feeding the store happens to come from
4772 // a separate alloca. Put another way, the effectiveness of SROA would be
4773 // decreased by a frontend which just concatenated all of its local allocas
4774 // into one big flat alloca. But defeating such patterns is exactly the job
4775 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4776 // change store pre-splitting to actually force pre-splitting of the load
4777 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4778 // maybe it would make it more principled?
4779 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4780
4781 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4782 for (auto &P : AS.partitions()) {
4783 for (Slice &S : P) {
4784 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4785 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4786 // If this is a load we have to track that it can't participate in any
4787 // pre-splitting. If this is a store of a load we have to track that
4788 // that load also can't participate in any pre-splitting.
4789 if (auto *LI = dyn_cast<LoadInst>(I))
4790 UnsplittableLoads.insert(LI);
4791 else if (auto *SI = dyn_cast<StoreInst>(I))
4792 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4793 UnsplittableLoads.insert(LI);
4794 continue;
4795 }
4796 assert(P.endOffset() > S.beginOffset() &&
4797 "Empty or backwards partition!");
4798
4799 // Determine if this is a pre-splittable slice.
4800 if (auto *LI = dyn_cast<LoadInst>(I)) {
4801 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4802
4803 // The load must be used exclusively to store into other pointers for
4804 // us to be able to arbitrarily pre-split it. The stores must also be
4805 // simple to avoid changing semantics.
4806 auto IsLoadSimplyStored = [](LoadInst *LI) {
4807 for (User *LU : LI->users()) {
4808 auto *SI = dyn_cast<StoreInst>(LU);
4809 if (!SI || !SI->isSimple())
4810 return false;
4811 }
4812 return true;
4813 };
4814 if (!IsLoadSimplyStored(LI)) {
4815 UnsplittableLoads.insert(LI);
4816 continue;
4817 }
4818
4819 Loads.push_back(LI);
4820 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4821 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4822 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4823 continue;
4824 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4825 if (!StoredLoad || !StoredLoad->isSimple())
4826 continue;
4827 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4828
4829 Stores.push_back(SI);
4830 } else {
4831 // Other uses cannot be pre-split.
4832 continue;
4833 }
4834
4835 // Record the initial split.
4836 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4837 auto &Offsets = SplitOffsetsMap[I];
4838 assert(Offsets.Splits.empty() &&
4839 "Should not have splits the first time we see an instruction!");
4840 Offsets.S = &S;
4841 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4842 }
4843
4844 // Now scan the already split slices, and add a split for any of them which
4845 // we're going to pre-split.
4846 for (Slice *S : P.splitSliceTails()) {
4847 auto SplitOffsetsMapI =
4848 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4849 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4850 continue;
4851 auto &Offsets = SplitOffsetsMapI->second;
4852
4853 assert(Offsets.S == S && "Found a mismatched slice!");
4854 assert(!Offsets.Splits.empty() &&
4855 "Cannot have an empty set of splits on the second partition!");
4856 assert(Offsets.Splits.back() ==
4857 P.beginOffset() - Offsets.S->beginOffset() &&
4858 "Previous split does not end where this one begins!");
4859
4860 // Record each split. The last partition's end isn't needed as the size
4861 // of the slice dictates that.
4862 if (S->endOffset() > P.endOffset())
4863 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4864 }
4865 }
4866
4867 // We may have split loads where some of their stores are split stores. For
4868 // such loads and stores, we can only pre-split them if their splits exactly
4869 // match relative to their starting offset. We have to verify this prior to
4870 // any rewriting.
4871 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4872 // Lookup the load we are storing in our map of split
4873 // offsets.
4874 auto *LI = cast<LoadInst>(SI->getValueOperand());
4875 // If it was completely unsplittable, then we're done,
4876 // and this store can't be pre-split.
4877 if (UnsplittableLoads.count(LI))
4878 return true;
4879
4880 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4881 if (LoadOffsetsI == SplitOffsetsMap.end())
4882 return false; // Unrelated loads are definitely safe.
4883 auto &LoadOffsets = LoadOffsetsI->second;
4884
4885 // Now lookup the store's offsets.
4886 auto &StoreOffsets = SplitOffsetsMap[SI];
4887
4888 // If the relative offsets of each split in the load and
4889 // store match exactly, then we can split them and we
4890 // don't need to remove them here.
4891 if (LoadOffsets.Splits == StoreOffsets.Splits)
4892 return false;
4893
4894 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4895 << " " << *LI << "\n"
4896 << " " << *SI << "\n");
4897
4898 // We've found a store and load that we need to split
4899 // with mismatched relative splits. Just give up on them
4900 // and remove both instructions from our list of
4901 // candidates.
4902 UnsplittableLoads.insert(LI);
4903 return true;
4904 });
4905 // Now we have to go *back* through all the stores, because a later store may
4906 // have caused an earlier store's load to become unsplittable and if it is
4907 // unsplittable for the later store, then we can't rely on it being split in
4908 // the earlier store either.
4909 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4910 auto *LI = cast<LoadInst>(SI->getValueOperand());
4911 return UnsplittableLoads.count(LI);
4912 });
4913 // Once we've established all the loads that can't be split for some reason,
4914 // filter any that made it into our list out.
4915 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4916 return UnsplittableLoads.count(LI);
4917 });
4918
4919 // If no loads or stores are left, there is no pre-splitting to be done for
4920 // this alloca.
4921 if (Loads.empty() && Stores.empty())
4922 return false;
4923
4924 // From here on, we can't fail and will be building new accesses, so rig up
4925 // an IR builder.
4926 IRBuilderTy IRB(&AI);
4927
4928 // Collect the new slices which we will merge into the alloca slices.
4929 SmallVector<Slice, 4> NewSlices;
4930
4931 // Track any allocas we end up splitting loads and stores for so we iterate
4932 // on them.
4933 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4934
4935 // At this point, we have collected all of the loads and stores we can
4936 // pre-split, and the specific splits needed for them. We actually do the
4937 // splitting in a specific order in order to handle when one of the loads in
4938 // the value operand to one of the stores.
4939 //
4940 // First, we rewrite all of the split loads, and just accumulate each split
4941 // load in a parallel structure. We also build the slices for them and append
4942 // them to the alloca slices.
4943 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4944 std::vector<LoadInst *> SplitLoads;
4945 const DataLayout &DL = AI.getDataLayout();
4946 for (LoadInst *LI : Loads) {
4947 SplitLoads.clear();
4948
4949 auto &Offsets = SplitOffsetsMap[LI];
4950 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4951 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4952 "Load must have type size equal to store size");
4953 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4954 "Load must be >= slice size");
4955
4956 uint64_t BaseOffset = Offsets.S->beginOffset();
4957 assert(BaseOffset + SliceSize > BaseOffset &&
4958 "Cannot represent alloca access size using 64-bit integers!");
4959
4961 IRB.SetInsertPoint(LI);
4962
4963 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4964
4965 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4966 int Idx = 0, Size = Offsets.Splits.size();
4967 for (;;) {
4968 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4969 auto AS = LI->getPointerAddressSpace();
4970 auto *PartPtrTy = LI->getPointerOperandType();
4971 LoadInst *PLoad = IRB.CreateAlignedLoad(
4972 PartTy,
4973 getAdjustedPtr(IRB, DL, BasePtr,
4974 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4975 PartPtrTy, BasePtr->getName() + "."),
4976 getAdjustedAlignment(LI, PartOffset),
4977 /*IsVolatile*/ false, LI->getName());
4978 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4979 LLVMContext::MD_access_group});
4980
4981 // Append this load onto the list of split loads so we can find it later
4982 // to rewrite the stores.
4983 SplitLoads.push_back(PLoad);
4984
4985 // Now build a new slice for the alloca.
4986 NewSlices.push_back(
4987 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4988 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4989 /*IsSplittable*/ false, nullptr));
4990 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4991 << ", " << NewSlices.back().endOffset()
4992 << "): " << *PLoad << "\n");
4993
4994 // See if we've handled all the splits.
4995 if (Idx >= Size)
4996 break;
4997
4998 // Setup the next partition.
4999 PartOffset = Offsets.Splits[Idx];
5000 ++Idx;
5001 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
5002 }
5003
5004 // Now that we have the split loads, do the slow walk over all uses of the
5005 // load and rewrite them as split stores, or save the split loads to use
5006 // below if the store is going to be split there anyways.
5007 bool DeferredStores = false;
5008 for (User *LU : LI->users()) {
5009 StoreInst *SI = cast<StoreInst>(LU);
5010 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
5011 DeferredStores = true;
5012 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
5013 << "\n");
5014 continue;
5015 }
5016
5017 Value *StoreBasePtr = SI->getPointerOperand();
5018 IRB.SetInsertPoint(SI);
5019 AAMDNodes AATags = SI->getAAMetadata();
5020
5021 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
5022
5023 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
5024 LoadInst *PLoad = SplitLoads[Idx];
5025 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
5026 auto *PartPtrTy = SI->getPointerOperandType();
5027
5028 auto AS = SI->getPointerAddressSpace();
5029 StoreInst *PStore = IRB.CreateAlignedStore(
5030 PLoad,
5031 getAdjustedPtr(IRB, DL, StoreBasePtr,
5032 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5033 PartPtrTy, StoreBasePtr->getName() + "."),
5034 getAdjustedAlignment(SI, PartOffset),
5035 /*IsVolatile*/ false);
5036 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5037 LLVMContext::MD_access_group,
5038 LLVMContext::MD_DIAssignID});
5039
5040 if (AATags)
5041 PStore->setAAMetadata(
5042 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
5043 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
5044 }
5045
5046 // We want to immediately iterate on any allocas impacted by splitting
5047 // this store, and we have to track any promotable alloca (indicated by
5048 // a direct store) as needing to be resplit because it is no longer
5049 // promotable.
5050 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
5051 ResplitPromotableAllocas.insert(OtherAI);
5052 Worklist.insert(OtherAI);
5053 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5054 StoreBasePtr->stripInBoundsOffsets())) {
5055 Worklist.insert(OtherAI);
5056 }
5057
5058 // Mark the original store as dead.
5059 DeadInsts.push_back(SI);
5060 }
5061
5062 // Save the split loads if there are deferred stores among the users.
5063 if (DeferredStores)
5064 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
5065
5066 // Mark the original load as dead and kill the original slice.
5067 DeadInsts.push_back(LI);
5068 Offsets.S->kill();
5069 }
5070
5071 // Second, we rewrite all of the split stores. At this point, we know that
5072 // all loads from this alloca have been split already. For stores of such
5073 // loads, we can simply look up the pre-existing split loads. For stores of
5074 // other loads, we split those loads first and then write split stores of
5075 // them.
5076 for (StoreInst *SI : Stores) {
5077 auto *LI = cast<LoadInst>(SI->getValueOperand());
5078 IntegerType *Ty = cast<IntegerType>(LI->getType());
5079 assert(Ty->getBitWidth() % 8 == 0);
5080 uint64_t StoreSize = Ty->getBitWidth() / 8;
5081 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
5082
5083 auto &Offsets = SplitOffsetsMap[SI];
5084 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
5085 "Slice size should always match load size exactly!");
5086 uint64_t BaseOffset = Offsets.S->beginOffset();
5087 assert(BaseOffset + StoreSize > BaseOffset &&
5088 "Cannot represent alloca access size using 64-bit integers!");
5089
5090 Value *LoadBasePtr = LI->getPointerOperand();
5091 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
5092
5093 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
5094
5095 // Check whether we have an already split load.
5096 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
5097 std::vector<LoadInst *> *SplitLoads = nullptr;
5098 if (SplitLoadsMapI != SplitLoadsMap.end()) {
5099 SplitLoads = &SplitLoadsMapI->second;
5100 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
5101 "Too few split loads for the number of splits in the store!");
5102 } else {
5103 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
5104 }
5105
5106 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
5107 int Idx = 0, Size = Offsets.Splits.size();
5108 for (;;) {
5109 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
5110 auto *LoadPartPtrTy = LI->getPointerOperandType();
5111 auto *StorePartPtrTy = SI->getPointerOperandType();
5112
5113 // Either lookup a split load or create one.
5114 LoadInst *PLoad;
5115 if (SplitLoads) {
5116 PLoad = (*SplitLoads)[Idx];
5117 } else {
5118 IRB.SetInsertPoint(LI);
5119 auto AS = LI->getPointerAddressSpace();
5120 PLoad = IRB.CreateAlignedLoad(
5121 PartTy,
5122 getAdjustedPtr(IRB, DL, LoadBasePtr,
5123 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5124 LoadPartPtrTy, LoadBasePtr->getName() + "."),
5125 getAdjustedAlignment(LI, PartOffset),
5126 /*IsVolatile*/ false, LI->getName());
5127 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5128 LLVMContext::MD_access_group});
5129 }
5130
5131 // And store this partition.
5132 IRB.SetInsertPoint(SI);
5133 auto AS = SI->getPointerAddressSpace();
5134 StoreInst *PStore = IRB.CreateAlignedStore(
5135 PLoad,
5136 getAdjustedPtr(IRB, DL, StoreBasePtr,
5137 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5138 StorePartPtrTy, StoreBasePtr->getName() + "."),
5139 getAdjustedAlignment(SI, PartOffset),
5140 /*IsVolatile*/ false);
5141 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5142 LLVMContext::MD_access_group});
5143
5144 // Now build a new slice for the alloca.
5145 // ProtectedFieldDisc==nullptr is a lie, but it doesn't matter because we
5146 // already determined that all accesses are consistent.
5147 NewSlices.push_back(
5148 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5149 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
5150 /*IsSplittable*/ false, nullptr));
5151 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5152 << ", " << NewSlices.back().endOffset()
5153 << "): " << *PStore << "\n");
5154 if (!SplitLoads) {
5155 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5156 }
5157
5158 // See if we've finished all the splits.
5159 if (Idx >= Size)
5160 break;
5161
5162 // Setup the next partition.
5163 PartOffset = Offsets.Splits[Idx];
5164 ++Idx;
5165 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5166 }
5167
5168 // We want to immediately iterate on any allocas impacted by splitting
5169 // this load, which is only relevant if it isn't a load of this alloca and
5170 // thus we didn't already split the loads above. We also have to keep track
5171 // of any promotable allocas we split loads on as they can no longer be
5172 // promoted.
5173 if (!SplitLoads) {
5174 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
5175 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5176 ResplitPromotableAllocas.insert(OtherAI);
5177 Worklist.insert(OtherAI);
5178 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5179 LoadBasePtr->stripInBoundsOffsets())) {
5180 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5181 Worklist.insert(OtherAI);
5182 }
5183 }
5184
5185 // Mark the original store as dead now that we've split it up and kill its
5186 // slice. Note that we leave the original load in place unless this store
5187 // was its only use. It may in turn be split up if it is an alloca load
5188 // for some other alloca, but it may be a normal load. This may introduce
5189 // redundant loads, but where those can be merged the rest of the optimizer
5190 // should handle the merging, and this uncovers SSA splits which is more
5191 // important. In practice, the original loads will almost always be fully
5192 // split and removed eventually, and the splits will be merged by any
5193 // trivial CSE, including instcombine.
5194 if (LI->hasOneUse()) {
5195 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5196 DeadInsts.push_back(LI);
5197 }
5198 DeadInsts.push_back(SI);
5199 Offsets.S->kill();
5200 }
5201
5202 // Remove the killed slices that have ben pre-split.
5203 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5204
5205 // Insert our new slices. This will sort and merge them into the sorted
5206 // sequence.
5207 AS.insert(NewSlices);
5208
5209 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
5210#ifndef NDEBUG
5211 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5212 LLVM_DEBUG(AS.print(dbgs(), I, " "));
5213#endif
5214
5215 // Finally, don't try to promote any allocas that new require re-splitting.
5216 // They have already been added to the worklist above.
5217 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5218
5219 return true;
5220}
5221
5222/// Select a partition type for an alloca partition.
5223///
5224/// Try to compute a friendly type for this partition of the alloca. This
5225/// won't always succeed, in which case we fall back to a legal integer type
5226/// or an i8 array of an appropriate size.
5227///
5228/// \returns A tuple with the following elements:
5229/// - PartitionType: The computed type for this partition.
5230/// - IsIntegerWideningViable: True if integer widening promotion is used.
5231/// - VectorType: The vector type if vector promotion is used, otherwise
5232/// nullptr.
5233static std::tuple<Type *, bool, VectorType *>
5235 LLVMContext &C) {
5236 // First check if the partition is viable for vector promotion.
5237 //
5238 // We prefer vector promotion over integer widening promotion when:
5239 // - The vector element type is a floating-point type.
5240 // - All the loads/stores to the alloca are vector loads/stores to the
5241 // entire alloca or load/store a single element of the vector.
5242 //
5243 // Otherwise when there is an integer vector with mixed type loads/stores we
5244 // prefer integer widening promotion because it's more likely the user is
5245 // doing bitwise arithmetic and we generate better code.
5246 VectorType *VecTy =
5248 // If the vector element type is a floating-point type, we prefer vector
5249 // promotion. If the vector has one element, let the below code select
5250 // whether we promote with the vector or scalar.
5251 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5252 VecTy->getElementCount().getFixedValue() > 1)
5253 return {VecTy, false, VecTy};
5254
5255 // Check if there is a common type that all slices of the partition use that
5256 // spans the partition.
5257 auto [CommonUseTy, LargestIntTy] =
5258 findCommonType(P.begin(), P.end(), P.endOffset());
5259 if (CommonUseTy) {
5260 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy);
5261 if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
5262 // We prefer vector promotion here because if vector promotion is viable
5263 // and there is a common type used, then it implies the second listed
5264 // condition for preferring vector promotion is true.
5265 if (VecTy)
5266 return {VecTy, false, VecTy};
5267 return {CommonUseTy, isIntegerWideningViable(P, CommonUseTy, DL),
5268 nullptr};
5269 }
5270 }
5271
5272 // Can we find an appropriate subtype in the original allocated
5273 // type?
5274 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
5275 P.beginOffset(), P.size())) {
5276 // If the partition is an integer array that can be spanned by a legal
5277 // integer type, prefer to represent it as a legal integer type because
5278 // it's more likely to be promotable.
5279 if (TypePartitionTy->isArrayTy() &&
5280 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5281 DL.isLegalInteger(P.size() * 8))
5282 TypePartitionTy = Type::getIntNTy(C, P.size() * 8);
5283 // There was no common type used, so we prefer integer widening promotion.
5284 if (isIntegerWideningViable(P, TypePartitionTy, DL))
5285 return {TypePartitionTy, true, nullptr};
5286 if (VecTy)
5287 return {VecTy, false, VecTy};
5288 // If we couldn't promote with TypePartitionTy, try with the largest
5289 // integer type used.
5290 if (LargestIntTy &&
5291 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size() &&
5292 isIntegerWideningViable(P, LargestIntTy, DL))
5293 return {LargestIntTy, true, nullptr};
5294
5295 // Fallback to TypePartitionTy and we probably won't promote.
5296 return {TypePartitionTy, false, nullptr};
5297 }
5298
5299 // Select the largest integer type used if it spans the partition.
5300 if (LargestIntTy &&
5301 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size())
5302 return {LargestIntTy, false, nullptr};
5303
5304 // Select a legal integer type if it spans the partition.
5305 if (DL.isLegalInteger(P.size() * 8))
5306 return {Type::getIntNTy(C, P.size() * 8), false, nullptr};
5307
5308 // Fallback to an i8 array.
5309 return {ArrayType::get(Type::getInt8Ty(C), P.size()), false, nullptr};
5310}
5311
5312/// Rewrite an alloca partition's users.
5313///
5314/// This routine drives both of the rewriting goals of the SROA pass. It tries
5315/// to rewrite uses of an alloca partition to be conducive for SSA value
5316/// promotion. If the partition needs a new, more refined alloca, this will
5317/// build that new alloca, preserving as much type information as possible, and
5318/// rewrite the uses of the old alloca to point at the new one and have the
5319/// appropriate new offsets. It also evaluates how successful the rewrite was
5320/// at enabling promotion and if it was successful queues the alloca to be
5321/// promoted.
5322AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5323 Partition &P) {
5324 const DataLayout &DL = AI.getDataLayout();
5325 // Select the type for the new alloca that spans the partition.
5326 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5327 selectPartitionType(P, DL, AI, *C);
5328
5329 // Check for the case where we're going to rewrite to a new alloca of the
5330 // exact same type as the original, and with the same access offsets. In that
5331 // case, re-use the existing alloca, but still run through the rewriter to
5332 // perform phi and select speculation.
5333 // P.beginOffset() can be non-zero even with the same type in a case with
5334 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
5335 AllocaInst *NewAI;
5336 if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5337 NewAI = &AI;
5338 // FIXME: We should be able to bail at this point with "nothing changed".
5339 // FIXME: We might want to defer PHI speculation until after here.
5340 // FIXME: return nullptr;
5341 } else {
5342 // Make sure the alignment is compatible with P.beginOffset().
5343 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
5344 // If we will get at least this much alignment from the type alone, leave
5345 // the alloca's alignment unconstrained.
5346 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
5347 NewAI = new AllocaInst(
5348 PartitionTy, AI.getAddressSpace(), nullptr,
5349 IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
5350 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5351 AI.getIterator());
5352 // Copy the old AI debug location over to the new one.
5353 NewAI->setDebugLoc(AI.getDebugLoc());
5354 ++NumNewAllocas;
5355 }
5356
5357 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5358 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5359
5360 // Track the high watermark on the worklist as it is only relevant for
5361 // promoted allocas. We will reset it to this point if the alloca is not in
5362 // fact scheduled for promotion.
5363 unsigned PPWOldSize = PostPromotionWorklist.size();
5364 unsigned NumUses = 0;
5365 SmallSetVector<PHINode *, 8> PHIUsers;
5366 SmallSetVector<SelectInst *, 8> SelectUsers;
5367
5368 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
5369 P.endOffset(), IsIntegerWideningViable, VecTy,
5370 PHIUsers, SelectUsers);
5371 bool Promotable = true;
5372 // Check whether we can have tree-structured merge.
5373 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5374 NumUses += DeletedValues->size() + 1;
5375 for (Value *V : *DeletedValues)
5376 DeadInsts.push_back(V);
5377 } else {
5378 for (Slice *S : P.splitSliceTails()) {
5379 Promotable &= Rewriter.visit(S);
5380 ++NumUses;
5381 }
5382 for (Slice &S : P) {
5383 Promotable &= Rewriter.visit(&S);
5384 ++NumUses;
5385 }
5386 }
5387
5388 NumAllocaPartitionUses += NumUses;
5389 MaxUsesPerAllocaPartition.updateMax(NumUses);
5390
5391 // Now that we've processed all the slices in the new partition, check if any
5392 // PHIs or Selects would block promotion.
5393 for (PHINode *PHI : PHIUsers)
5394 if (!isSafePHIToSpeculate(*PHI)) {
5395 Promotable = false;
5396 PHIUsers.clear();
5397 SelectUsers.clear();
5398 break;
5399 }
5400
5402 NewSelectsToRewrite;
5403 NewSelectsToRewrite.reserve(SelectUsers.size());
5404 for (SelectInst *Sel : SelectUsers) {
5405 std::optional<RewriteableMemOps> Ops =
5406 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5407 if (!Ops) {
5408 Promotable = false;
5409 PHIUsers.clear();
5410 SelectUsers.clear();
5411 NewSelectsToRewrite.clear();
5412 break;
5413 }
5414 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5415 }
5416
5417 if (Promotable) {
5418 for (Use *U : AS.getDeadUsesIfPromotable()) {
5419 auto *OldInst = dyn_cast<Instruction>(U->get());
5420 Value::dropDroppableUse(*U);
5421 if (OldInst)
5422 if (isInstructionTriviallyDead(OldInst))
5423 DeadInsts.push_back(OldInst);
5424 }
5425 if (PHIUsers.empty() && SelectUsers.empty()) {
5426 // Promote the alloca.
5427 PromotableAllocas.insert(NewAI);
5428 } else {
5429 // If we have either PHIs or Selects to speculate, add them to those
5430 // worklists and re-queue the new alloca so that we promote in on the
5431 // next iteration.
5432 SpeculatablePHIs.insert_range(PHIUsers);
5433 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5434 NewSelectsToRewrite.size());
5435 for (auto &&KV : llvm::make_range(
5436 std::make_move_iterator(NewSelectsToRewrite.begin()),
5437 std::make_move_iterator(NewSelectsToRewrite.end())))
5438 SelectsToRewrite.insert(std::move(KV));
5439 Worklist.insert(NewAI);
5440 }
5441 } else {
5442 // Drop any post-promotion work items if promotion didn't happen.
5443 while (PostPromotionWorklist.size() > PPWOldSize)
5444 PostPromotionWorklist.pop_back();
5445
5446 // We couldn't promote and we didn't create a new partition, nothing
5447 // happened.
5448 if (NewAI == &AI)
5449 return nullptr;
5450
5451 // If we can't promote the alloca, iterate on it to check for new
5452 // refinements exposed by splitting the current alloca. Don't iterate on an
5453 // alloca which didn't actually change and didn't get promoted.
5454 Worklist.insert(NewAI);
5455 }
5456
5457 return NewAI;
5458}
5459
5460// There isn't a shared interface to get the "address" parts out of a
5461// dbg.declare and dbg.assign, so provide some wrappers.
5464 return DVR->isKillAddress();
5465 return DVR->isKillLocation();
5466}
5467
5470 return DVR->getAddressExpression();
5471 return DVR->getExpression();
5472}
5473
5474/// Create or replace an existing fragment in a DIExpression with \p Frag.
5475/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5476/// operation, add \p BitExtractOffset to the offset part.
5477///
5478/// Returns the new expression, or nullptr if this fails (see details below).
5479///
5480/// This function is similar to DIExpression::createFragmentExpression except
5481/// for 3 important distinctions:
5482/// 1. The new fragment isn't relative to an existing fragment.
5483/// 2. It assumes the computed location is a memory location. This means we
5484/// don't need to perform checks that creating the fragment preserves the
5485/// expression semantics.
5486/// 3. Existing extract_bits are modified independently of fragment changes
5487/// using \p BitExtractOffset. A change to the fragment offset or size
5488/// may affect a bit extract. But a bit extract offset can change
5489/// independently of the fragment dimensions.
5490///
5491/// Returns the new expression, or nullptr if one couldn't be created.
5492/// Ideally this is only used to signal that a bit-extract has become
5493/// zero-sized (and thus the new debug record has no size and can be
5494/// dropped), however, it fails for other reasons too - see the FIXME below.
5495///
5496/// FIXME: To keep the change that introduces this function NFC it bails
5497/// in some situations unecessarily, e.g. when fragment and bit extract
5498/// sizes differ.
5501 int64_t BitExtractOffset) {
5503 bool HasFragment = false;
5504 bool HasBitExtract = false;
5505
5506 for (auto &Op : Expr->expr_ops()) {
5507 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5508 HasFragment = true;
5509 continue;
5510 }
5511 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5513 HasBitExtract = true;
5514 int64_t ExtractOffsetInBits = Op.getArg(0);
5515 int64_t ExtractSizeInBits = Op.getArg(1);
5516
5517 // DIExpression::createFragmentExpression doesn't know how to handle
5518 // a fragment that is smaller than the extract. Copy the behaviour
5519 // (bail) to avoid non-NFC changes.
5520 // FIXME: Don't do this.
5521 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5522 return nullptr;
5523
5524 assert(BitExtractOffset <= 0);
5525 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5526
5527 // DIExpression::createFragmentExpression doesn't know what to do
5528 // if the new extract starts "outside" the existing one. Copy the
5529 // behaviour (bail) to avoid non-NFC changes.
5530 // FIXME: Don't do this.
5531 if (AdjustedOffset < 0)
5532 return nullptr;
5533
5534 Ops.push_back(Op.getOp());
5535 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5536 Ops.push_back(ExtractSizeInBits);
5537 continue;
5538 }
5539 Op.appendToVector(Ops);
5540 }
5541
5542 // Unsupported by createFragmentExpression, so don't support it here yet to
5543 // preserve NFC-ness.
5544 if (HasFragment && HasBitExtract)
5545 return nullptr;
5546
5547 if (!HasBitExtract) {
5549 Ops.push_back(Frag.OffsetInBits);
5550 Ops.push_back(Frag.SizeInBits);
5551 }
5552 return DIExpression::get(Expr->getContext(), Ops);
5553}
5554
5555/// Insert a new DbgRecord.
5556/// \p Orig Original to copy record type, debug loc and variable from, and
5557/// additionally value and value expression for dbg_assign records.
5558/// \p NewAddr Location's new base address.
5559/// \p NewAddrExpr New expression to apply to address.
5560/// \p BeforeInst Insert position.
5561/// \p NewFragment New fragment (absolute, non-relative).
5562/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5563static void
5565 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5566 std::optional<DIExpression::FragmentInfo> NewFragment,
5567 int64_t BitExtractAdjustment) {
5568 (void)DIB;
5569
5570 // A dbg_assign puts fragment info in the value expression only. The address
5571 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5572 // new fragment info into NewAddrExpr (as it only has one expression).
5573 DIExpression *NewFragmentExpr =
5574 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5575 if (NewFragment)
5576 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5577 BitExtractAdjustment);
5578 if (!NewFragmentExpr)
5579 return;
5580
5581 if (Orig->isDbgDeclare()) {
5583 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5584 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5585 BeforeInst->getIterator());
5586 return;
5587 }
5588
5589 if (Orig->isDbgValue()) {
5591 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5592 // Drop debug information if the expression doesn't start with a
5593 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5594 // describes the address of alloca rather than the value inside the alloca.
5595 if (!NewFragmentExpr->startsWithDeref())
5596 DVR->setKillAddress();
5597 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5598 BeforeInst->getIterator());
5599 return;
5600 }
5601
5602 // Apply a DIAssignID to the store if it doesn't already have it.
5603 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5604 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5606 }
5607
5609 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5610 NewAddrExpr, Orig->getDebugLoc());
5611 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5612 (void)NewAssign;
5613}
5614
5615/// Walks the slices of an alloca and form partitions based on them,
5616/// rewriting each of their uses.
5617bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5618 if (AS.begin() == AS.end())
5619 return false;
5620
5621 unsigned NumPartitions = 0;
5622 bool Changed = false;
5623 const DataLayout &DL = AI.getModule()->getDataLayout();
5624
5625 // First try to pre-split loads and stores.
5626 Changed |= presplitLoadsAndStores(AI, AS);
5627
5628 // Now that we have identified any pre-splitting opportunities,
5629 // mark loads and stores unsplittable except for the following case.
5630 // We leave a slice splittable if all other slices are disjoint or fully
5631 // included in the slice, such as whole-alloca loads and stores.
5632 // If we fail to split these during pre-splitting, we want to force them
5633 // to be rewritten into a partition.
5634 bool IsSorted = true;
5635
5636 uint64_t AllocaSize =
5637 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5638 const uint64_t MaxBitVectorSize = 1024;
5639 if (AllocaSize <= MaxBitVectorSize) {
5640 // If a byte boundary is included in any load or store, a slice starting or
5641 // ending at the boundary is not splittable.
5642 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5643 for (Slice &S : AS)
5644 for (unsigned O = S.beginOffset() + 1;
5645 O < S.endOffset() && O < AllocaSize; O++)
5646 SplittableOffset.reset(O);
5647
5648 for (Slice &S : AS) {
5649 if (!S.isSplittable())
5650 continue;
5651
5652 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5653 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5654 continue;
5655
5656 if (isa<LoadInst>(S.getUse()->getUser()) ||
5657 isa<StoreInst>(S.getUse()->getUser())) {
5658 S.makeUnsplittable();
5659 IsSorted = false;
5660 }
5661 }
5662 } else {
5663 // We only allow whole-alloca splittable loads and stores
5664 // for a large alloca to avoid creating too large BitVector.
5665 for (Slice &S : AS) {
5666 if (!S.isSplittable())
5667 continue;
5668
5669 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5670 continue;
5671
5672 if (isa<LoadInst>(S.getUse()->getUser()) ||
5673 isa<StoreInst>(S.getUse()->getUser())) {
5674 S.makeUnsplittable();
5675 IsSorted = false;
5676 }
5677 }
5678 }
5679
5680 if (!IsSorted)
5682
5683 /// Describes the allocas introduced by rewritePartition in order to migrate
5684 /// the debug info.
5685 struct Fragment {
5686 AllocaInst *Alloca;
5687 uint64_t Offset;
5688 uint64_t Size;
5689 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5690 : Alloca(AI), Offset(O), Size(S) {}
5691 };
5692 SmallVector<Fragment, 4> Fragments;
5693
5694 // Rewrite each partition.
5695 for (auto &P : AS.partitions()) {
5696 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5697 Changed = true;
5698 if (NewAI != &AI) {
5699 uint64_t SizeOfByte = 8;
5700 uint64_t AllocaSize =
5701 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5702 // Don't include any padding.
5703 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5704 Fragments.push_back(
5705 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5706 }
5707 }
5708 ++NumPartitions;
5709 }
5710
5711 NumAllocaPartitions += NumPartitions;
5712 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5713
5714 // Migrate debug information from the old alloca to the new alloca(s)
5715 // and the individual partitions.
5716 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5717 // Can't overlap with undef memory.
5718 if (isKillAddress(DbgVariable))
5719 return;
5720
5721 const Value *DbgPtr = DbgVariable->getAddress();
5723 DbgVariable->getFragmentOrEntireVariable();
5724 // Get the address expression constant offset if one exists and the ops
5725 // that come after it.
5726 int64_t CurrentExprOffsetInBytes = 0;
5727 SmallVector<uint64_t> PostOffsetOps;
5728 if (!getAddressExpression(DbgVariable)
5729 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5730 return; // Couldn't interpret this DIExpression - drop the var.
5731
5732 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5733 int64_t ExtractOffsetInBits = 0;
5734 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5735 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5737 ExtractOffsetInBits = Op.getArg(0);
5738 break;
5739 }
5740 }
5741
5742 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5743 for (auto Fragment : Fragments) {
5744 int64_t OffsetFromLocationInBits;
5745 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5746 // Find the variable fragment that the new alloca slice covers.
5747 // Drop debug info for this variable fragment if we can't compute an
5748 // intersect between it and the alloca slice.
5750 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5751 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5752 NewDbgFragment, OffsetFromLocationInBits))
5753 continue; // Do not migrate this fragment to this slice.
5754
5755 // Zero sized fragment indicates there's no intersect between the variable
5756 // fragment and the alloca slice. Skip this slice for this variable
5757 // fragment.
5758 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5759 continue; // Do not migrate this fragment to this slice.
5760
5761 // No fragment indicates DbgVariable's variable or fragment exactly
5762 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5763 if (!NewDbgFragment)
5764 NewDbgFragment = DbgVariable->getFragment();
5765
5766 // Reduce the new expression offset by the bit-extract offset since
5767 // we'll be keeping that.
5768 int64_t OffestFromNewAllocaInBits =
5769 OffsetFromLocationInBits - ExtractOffsetInBits;
5770 // We need to adjust an existing bit extract if the offset expression
5771 // can't eat the slack (i.e., if the new offset would be negative).
5772 int64_t BitExtractOffset =
5773 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5774 // The magnitude of a negative value indicates the number of bits into
5775 // the existing variable fragment that the memory region begins. The new
5776 // variable fragment already excludes those bits - the new DbgPtr offset
5777 // only needs to be applied if it's positive.
5778 OffestFromNewAllocaInBits =
5779 std::max(int64_t(0), OffestFromNewAllocaInBits);
5780
5781 // Rebuild the expression:
5782 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5783 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5784 // address expression but the value expression instead.
5785 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5786 if (OffestFromNewAllocaInBits > 0) {
5787 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5788 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5789 }
5790
5791 // Remove any existing intrinsics on the new alloca describing
5792 // the variable fragment.
5793 auto RemoveOne = [DbgVariable](auto *OldDII) {
5794 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5795 return LHS->getVariable() == RHS->getVariable() &&
5796 LHS->getDebugLoc()->getInlinedAt() ==
5797 RHS->getDebugLoc()->getInlinedAt();
5798 };
5799 if (SameVariableFragment(OldDII, DbgVariable))
5800 OldDII->eraseFromParent();
5801 };
5802 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5803 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5804 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5805 NewDbgFragment, BitExtractOffset);
5806 }
5807 };
5808
5809 // Migrate debug information from the old alloca to the new alloca(s)
5810 // and the individual partitions.
5811 for_each(findDVRDeclares(&AI), MigrateOne);
5812 for_each(findDVRValues(&AI), MigrateOne);
5813 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5814
5815 return Changed;
5816}
5817
5818/// Clobber a use with poison, deleting the used value if it becomes dead.
5819void SROA::clobberUse(Use &U) {
5820 Value *OldV = U;
5821 // Replace the use with an poison value.
5822 U = PoisonValue::get(OldV->getType());
5823
5824 // Check for this making an instruction dead. We have to garbage collect
5825 // all the dead instructions to ensure the uses of any alloca end up being
5826 // minimal.
5827 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5828 if (isInstructionTriviallyDead(OldI)) {
5829 DeadInsts.push_back(OldI);
5830 }
5831}
5832
5833/// A basic LoadAndStorePromoter that does not remove store nodes.
5835public:
5837 Type *ZeroType)
5838 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5839 bool shouldDelete(Instruction *I) const override {
5840 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5841 }
5842
5844 return UndefValue::get(ZeroType);
5845 }
5846
5847private:
5848 Type *ZeroType;
5849};
5850
5851bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5852 // Look through each "partition", looking for slices with the same start/end
5853 // that do not overlap with any before them. The slices are sorted by
5854 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5855 // sophisticated algorithm that takes splittable slices into account.
5856 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5857 bool AllSameAndValid = true;
5858 Type *PartitionType = nullptr;
5860 uint64_t BeginOffset = 0;
5861 uint64_t EndOffset = 0;
5862
5863 auto Flush = [&]() {
5864 if (AllSameAndValid && !Insts.empty()) {
5865 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5866 << EndOffset << ")\n");
5868 SSAUpdater SSA(&NewPHIs);
5869 Insts.push_back(&AI);
5870 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5871 Promoter.run(Insts);
5872 }
5873 AllSameAndValid = true;
5874 PartitionType = nullptr;
5875 Insts.clear();
5876 };
5877
5878 for (Slice &S : AS) {
5879 auto *User = cast<Instruction>(S.getUse()->getUser());
5880 if (isAssumeLikeIntrinsic(User)) {
5881 LLVM_DEBUG({
5882 dbgs() << "Ignoring slice: ";
5883 AS.print(dbgs(), &S);
5884 });
5885 continue;
5886 }
5887 if (S.beginOffset() >= EndOffset) {
5888 Flush();
5889 BeginOffset = S.beginOffset();
5890 EndOffset = S.endOffset();
5891 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5892 if (AllSameAndValid) {
5893 LLVM_DEBUG({
5894 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5895 << EndOffset << ")";
5896 AS.print(dbgs(), &S);
5897 });
5898 AllSameAndValid = false;
5899 }
5900 EndOffset = std::max(EndOffset, S.endOffset());
5901 continue;
5902 }
5903
5904 if (auto *LI = dyn_cast<LoadInst>(User)) {
5905 Type *UserTy = LI->getType();
5906 // LoadAndStorePromoter requires all the types to be the same.
5907 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5908 AllSameAndValid = false;
5909 PartitionType = UserTy;
5910 Insts.push_back(User);
5911 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5912 Type *UserTy = SI->getValueOperand()->getType();
5913 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5914 AllSameAndValid = false;
5915 PartitionType = UserTy;
5916 Insts.push_back(User);
5917 } else {
5918 AllSameAndValid = false;
5919 }
5920 }
5921
5922 Flush();
5923 return true;
5924}
5925
5926/// Analyze an alloca for SROA.
5927///
5928/// This analyzes the alloca to ensure we can reason about it, builds
5929/// the slices of the alloca, and then hands it off to be split and
5930/// rewritten as needed.
5931std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5932SROA::runOnAlloca(AllocaInst &AI) {
5933 bool Changed = false;
5934 bool CFGChanged = false;
5935
5936 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5937 ++NumAllocasAnalyzed;
5938
5939 // Special case dead allocas, as they're trivial.
5940 if (AI.use_empty()) {
5941 AI.eraseFromParent();
5942 Changed = true;
5943 return {Changed, CFGChanged};
5944 }
5945 const DataLayout &DL = AI.getDataLayout();
5946
5947 // Skip alloca forms that this analysis can't handle.
5948 auto *AT = AI.getAllocatedType();
5949 TypeSize Size = DL.getTypeAllocSize(AT);
5950 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5951 Size.getFixedValue() == 0)
5952 return {Changed, CFGChanged};
5953
5954 // First, split any FCA loads and stores touching this alloca to promote
5955 // better splitting and promotion opportunities.
5956 IRBuilderTy IRB(&AI);
5957 AggLoadStoreRewriter AggRewriter(DL, IRB);
5958 Changed |= AggRewriter.rewrite(AI);
5959
5960 // Build the slices using a recursive instruction-visiting builder.
5961 AllocaSlices AS(DL, AI);
5962 LLVM_DEBUG(AS.print(dbgs()));
5963 if (AS.isEscaped())
5964 return {Changed, CFGChanged};
5965
5966 if (AS.isEscapedReadOnly()) {
5967 Changed |= propagateStoredValuesToLoads(AI, AS);
5968 return {Changed, CFGChanged};
5969 }
5970
5971 for (auto &P : AS.partitions()) {
5972 // For now, we can't split if a field is accessed both via protected field
5973 // and not, because that would mean that we would need to introduce sign and
5974 // auth operations to convert between the protected and non-protected uses,
5975 // and this pass doesn't know how to do that. Also, this case is unlikely to
5976 // occur in normal code.
5977 std::optional<Value *> ProtectedFieldDisc;
5978 auto SliceHasMismatch = [&](Slice &S) {
5979 if (auto *II = dyn_cast<IntrinsicInst>(S.getUse()->getUser()))
5980 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
5981 II->getIntrinsicID() == Intrinsic::lifetime_end)
5982 return false;
5983 if (!ProtectedFieldDisc)
5984 ProtectedFieldDisc = S.ProtectedFieldDisc;
5985 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5986 };
5987 for (Slice &S : P)
5988 if (SliceHasMismatch(S))
5989 return {Changed, CFGChanged};
5990 for (Slice *S : P.splitSliceTails())
5991 if (SliceHasMismatch(*S))
5992 return {Changed, CFGChanged};
5993 }
5994
5995 // Delete all the dead users of this alloca before splitting and rewriting it.
5996 for (Instruction *DeadUser : AS.getDeadUsers()) {
5997 // Free up everything used by this instruction.
5998 for (Use &DeadOp : DeadUser->operands())
5999 clobberUse(DeadOp);
6000
6001 // Now replace the uses of this instruction.
6002 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
6003
6004 // And mark it for deletion.
6005 DeadInsts.push_back(DeadUser);
6006 Changed = true;
6007 }
6008 for (Use *DeadOp : AS.getDeadOperands()) {
6009 clobberUse(*DeadOp);
6010 Changed = true;
6011 }
6012 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
6013 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
6014
6015 DeadInsts.push_back(PFPUser);
6016 Changed = true;
6017 }
6018
6019 // No slices to split. Leave the dead alloca for a later pass to clean up.
6020 if (AS.begin() == AS.end())
6021 return {Changed, CFGChanged};
6022
6023 Changed |= splitAlloca(AI, AS);
6024
6025 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
6026 while (!SpeculatablePHIs.empty())
6027 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
6028
6029 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
6030 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6031 while (!RemainingSelectsToRewrite.empty()) {
6032 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
6033 CFGChanged |=
6034 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
6035 }
6036
6037 return {Changed, CFGChanged};
6038}
6039
6040/// Delete the dead instructions accumulated in this run.
6041///
6042/// Recursively deletes the dead instructions we've accumulated. This is done
6043/// at the very end to maximize locality of the recursive delete and to
6044/// minimize the problems of invalidated instruction pointers as such pointers
6045/// are used heavily in the intermediate stages of the algorithm.
6046///
6047/// We also record the alloca instructions deleted here so that they aren't
6048/// subsequently handed to mem2reg to promote.
6049bool SROA::deleteDeadInstructions(
6050 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6051 bool Changed = false;
6052 while (!DeadInsts.empty()) {
6053 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
6054 if (!I)
6055 continue;
6056 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
6057
6058 // If the instruction is an alloca, find the possible dbg.declare connected
6059 // to it, and remove it too. We must do this before calling RAUW or we will
6060 // not be able to find it.
6061 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6062 DeletedAllocas.insert(AI);
6063 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
6064 OldDII->eraseFromParent();
6065 }
6066
6068 I->replaceAllUsesWith(UndefValue::get(I->getType()));
6069
6070 for (Use &Operand : I->operands())
6071 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
6072 // Zero out the operand and see if it becomes trivially dead.
6073 Operand = nullptr;
6075 DeadInsts.push_back(U);
6076 }
6077
6078 ++NumDeleted;
6079 I->eraseFromParent();
6080 Changed = true;
6081 }
6082 return Changed;
6083}
6084/// Promote the allocas, using the best available technique.
6085///
6086/// This attempts to promote whatever allocas have been identified as viable in
6087/// the PromotableAllocas list. If that list is empty, there is nothing to do.
6088/// This function returns whether any promotion occurred.
6089bool SROA::promoteAllocas() {
6090 if (PromotableAllocas.empty())
6091 return false;
6092
6093 if (SROASkipMem2Reg) {
6094 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
6095 } else {
6096 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
6097 NumPromoted += PromotableAllocas.size();
6098 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6099 }
6100
6101 PromotableAllocas.clear();
6102 return true;
6103}
6104
6105std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
6106 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
6107
6108 const DataLayout &DL = F.getDataLayout();
6109 BasicBlock &EntryBB = F.getEntryBlock();
6110 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
6111 I != E; ++I) {
6112 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6113 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6115 PromotableAllocas.insert(AI);
6116 else
6117 Worklist.insert(AI);
6118 }
6119 }
6120
6121 bool Changed = false;
6122 bool CFGChanged = false;
6123 // A set of deleted alloca instruction pointers which should be removed from
6124 // the list of promotable allocas.
6125 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6126
6127 do {
6128 while (!Worklist.empty()) {
6129 auto [IterationChanged, IterationCFGChanged] =
6130 runOnAlloca(*Worklist.pop_back_val());
6131 Changed |= IterationChanged;
6132 CFGChanged |= IterationCFGChanged;
6133
6134 Changed |= deleteDeadInstructions(DeletedAllocas);
6135
6136 // Remove the deleted allocas from various lists so that we don't try to
6137 // continue processing them.
6138 if (!DeletedAllocas.empty()) {
6139 Worklist.set_subtract(DeletedAllocas);
6140 PostPromotionWorklist.set_subtract(DeletedAllocas);
6141 PromotableAllocas.set_subtract(DeletedAllocas);
6142 DeletedAllocas.clear();
6143 }
6144 }
6145
6146 Changed |= promoteAllocas();
6147
6148 Worklist = PostPromotionWorklist;
6149 PostPromotionWorklist.clear();
6150 } while (!Worklist.empty());
6151
6152 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6153 assert((!CFGChanged || !PreserveCFG) &&
6154 "Should not have modified the CFG when told to preserve it.");
6155
6156 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
6157 for (auto &BB : F) {
6159 }
6160 }
6161
6162 return {Changed, CFGChanged};
6163}
6164
6168 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6169 auto [Changed, CFGChanged] =
6170 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6171 if (!Changed)
6172 return PreservedAnalyses::all();
6174 if (!CFGChanged)
6177 return PA;
6178}
6179
6181 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6182 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
6183 OS, MapClassName2PassName);
6184 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
6185 : "<modify-cfg>");
6186}
6187
6188SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {}
6189
6190namespace {
6191
6192/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
6193class SROALegacyPass : public FunctionPass {
6195
6196public:
6197 static char ID;
6198
6202 }
6203
6204 bool runOnFunction(Function &F) override {
6205 if (skipFunction(F))
6206 return false;
6207
6208 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6209 AssumptionCache &AC =
6210 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6211 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6212 auto [Changed, _] =
6213 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6214 return Changed;
6215 }
6216
6217 void getAnalysisUsage(AnalysisUsage &AU) const override {
6218 AU.addRequired<AssumptionCacheTracker>();
6219 AU.addRequired<DominatorTreeWrapperPass>();
6220 AU.addPreserved<GlobalsAAWrapperPass>();
6221 AU.addPreserved<DominatorTreeWrapperPass>();
6222 }
6223
6224 StringRef getPassName() const override { return "SROA"; }
6225};
6226
6227} // end anonymous namespace
6228
6229char SROALegacyPass::ID = 0;
6230
6235
6236INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
6237 "Scalar Replacement Of Aggregates", false, false)
6240INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:593
This file implements a map that provides insertion order iteration.
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:343
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
Definition SROA.cpp:5234
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2375
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1960
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2226
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1526
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4577
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:278
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5499
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2618
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2151
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1949
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2457
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2651
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1020
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1915
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1848
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:1007
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5462
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2673
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2552
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1666
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2331
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:324
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1592
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2593
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5564
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1809
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2745
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5468
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4615
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1970
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1747
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition SROA.cpp:2060
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition SROA.cpp:1032
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1052
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:820
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:967
partition_iterator & operator++()
Definition SROA.cpp:987
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition SROA.cpp:5839
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition SROA.cpp:5836
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition SROA.cpp:5843
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Definition DebugLoc.cpp:67
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition Operator.cpp:125
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, StringRef Name=StringRef())
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVMContext & getContext() const
Definition Metadata.h:1242
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6165
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6180
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6188
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition StringRef.h:345
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:293
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:723
TypeSize getSizeInBytes() const
Definition DataLayout.h:732
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
TypeSize getSizeInBits() const
Definition DataLayout.h:734
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
element_iterator element_end() const
element_iterator element_begin() const
bool isPacked() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:293
op_iterator op_begin()
Definition User.h:285
const Use & getOperandUse(unsigned i) const
Definition User.h:246
Value * getOperand(unsigned i) const
Definition User.h:233
op_iterator op_end()
Definition User.h:287
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:819
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition Value.cpp:218
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition Dwarf.h:151
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition Dwarf.h:144
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition Dwarf.h:150
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2106
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2122
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1501
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
void * PointerTy
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2190
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
bool capturesFullProvenance(CaptureComponents CC)
Definition ModRef.h:341
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:435
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition DebugInfo.cpp:82
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:48
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition SROA.cpp:6231
SROAOptions
Definition SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NDEBUG
Definition regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition Metadata.h:820
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Describes an element of a Bitfield.
Definition Bitfields.h:176
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70