LLVM 23.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/Metadata.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/PassManager.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
75#include "llvm/IR/ValueHandle.h"
77#include "llvm/Pass.h"
81#include "llvm/Support/Debug.h"
89#include <algorithm>
90#include <cassert>
91#include <cstddef>
92#include <cstdint>
93#include <cstring>
94#include <iterator>
95#include <queue>
96#include <string>
97#include <tuple>
98#include <utility>
99#include <variant>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "sroa"
105
106STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
114STATISTIC(NumLoadsPredicated,
115 "Number of loads rewritten into predicated loads to allow promotion");
117 NumStoresPredicated,
118 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumDeleted, "Number of instructions deleted");
120STATISTIC(NumVectorized, "Number of vectorized aggregates");
121
122namespace llvm {
123/// Disable running mem2reg during SROA in order to test or debug SROA.
124static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
125 cl::Hidden);
127} // namespace llvm
128
129namespace {
130
131class AllocaSliceRewriter;
132class AllocaSlices;
133class Partition;
134
135class SelectHandSpeculativity {
136 unsigned char Storage = 0; // None are speculatable by default.
137 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
138 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
139public:
140 SelectHandSpeculativity() = default;
141 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
142 bool isSpeculatable(bool isTrueVal) const;
143 bool areAllSpeculatable() const;
144 bool areAnySpeculatable() const;
145 bool areNoneSpeculatable() const;
146 // For interop as int half of PointerIntPair.
147 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149};
150static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
151
152using PossiblySpeculatableLoad =
154using UnspeculatableStore = StoreInst *;
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
157using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
158
159/// An optimization pass providing Scalar Replacement of Aggregates.
160///
161/// This pass takes allocations which can be completely analyzed (that is, they
162/// don't escape) and tries to turn them into scalar SSA values. There are
163/// a few steps to this process.
164///
165/// 1) It takes allocations of aggregates and analyzes the ways in which they
166/// are used to try to split them into smaller allocations, ideally of
167/// a single scalar data type. It will split up memcpy and memset accesses
168/// as necessary and try to isolate individual scalar accesses.
169/// 2) It will transform accesses into forms which are suitable for SSA value
170/// promotion. This can be replacing a memset with a scalar store of an
171/// integer value, or it can involve speculating operations on a PHI or
172/// select to be a PHI or select of the results.
173/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
174/// onto insert and extract operations on a vector value, and convert them to
175/// this form. By doing so, it will enable promotion of vector aggregates to
176/// SSA vector values.
177class SROA {
178 LLVMContext *const C;
179 DomTreeUpdater *const DTU;
180 AssumptionCache *const AC;
181 const bool PreserveCFG;
182
183 /// Worklist of alloca instructions to simplify.
184 ///
185 /// Each alloca in the function is added to this. Each new alloca formed gets
186 /// added to it as well to recursively simplify unless that alloca can be
187 /// directly promoted. Finally, each time we rewrite a use of an alloca other
188 /// the one being actively rewritten, we add it back onto the list if not
189 /// already present to ensure it is re-visited.
190 SmallSetVector<AllocaInst *, 16> Worklist;
191
192 /// A collection of instructions to delete.
193 /// We try to batch deletions to simplify code and make things a bit more
194 /// efficient. We also make sure there is no dangling pointers.
195 SmallVector<WeakVH, 8> DeadInsts;
196
197 /// Post-promotion worklist.
198 ///
199 /// Sometimes we discover an alloca which has a high probability of becoming
200 /// viable for SROA after a round of promotion takes place. In those cases,
201 /// the alloca is enqueued here for re-processing.
202 ///
203 /// Note that we have to be very careful to clear allocas out of this list in
204 /// the event they are deleted.
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
206
207 /// A collection of alloca instructions we can directly promote.
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
210 PromotableAllocas;
211
212 /// A worklist of PHIs to speculate prior to promoting allocas.
213 ///
214 /// All of these PHIs have been checked for the safety of speculation and by
215 /// being speculated will allow promoting allocas currently in the promotable
216 /// queue.
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
218
219 /// A worklist of select instructions to rewrite prior to promoting
220 /// allocas.
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
222
223 /// Select instructions that use an alloca and are subsequently loaded can be
224 /// rewritten to load both input pointers and then select between the result,
225 /// allowing the load of the alloca to be promoted.
226 /// From this:
227 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
228 /// %V = load <type>, ptr %P2
229 /// to:
230 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
231 /// %V2 = load <type>, ptr %Other
232 /// %V = select i1 %cond, <type> %V1, <type> %V2
233 ///
234 /// We can do this to a select if its only uses are loads
235 /// and if either the operand to the select can be loaded unconditionally,
236 /// or if we are allowed to perform CFG modifications.
237 /// If found an intervening bitcast with a single use of the load,
238 /// allow the promotion.
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
241
242public:
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 SROAOptions PreserveCFG_)
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
247
248 /// Main run method used by both the SROAPass and by the legacy pass.
249 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
250
251private:
252 friend class AllocaSliceRewriter;
253
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
262};
263
264} // end anonymous namespace
265
266/// Calculate the fragment of a variable to use when slicing a store
267/// based on the slice dimensions, existing fragment, and base storage
268/// fragment.
269/// Results:
270/// UseFrag - Use Target as the new fragment.
271/// UseNoFrag - The new slice already covers the whole variable.
272/// Skip - The new alloca slice doesn't include this variable.
273/// FIXME: Can we use calculateFragmentIntersect instead?
274namespace {
275enum FragCalcResult { UseFrag, UseNoFrag, Skip };
276}
277static FragCalcResult
279 uint64_t NewStorageSliceOffsetInBits,
280 uint64_t NewStorageSliceSizeInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
284 // If the base storage describes part of the variable apply the offset and
285 // the size constraint.
286 if (StorageFragment) {
287 Target.SizeInBits =
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
289 Target.OffsetInBits =
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
291 } else {
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
294 }
295
296 // If this slice extracts the entirety of an independent variable from a
297 // larger alloca, do not produce a fragment expression, as the variable is
298 // not fragmented.
299 if (!CurrentFragment) {
300 if (auto Size = Variable->getSizeInBits()) {
301 // Treat the current fragment as covering the whole variable.
302 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
303 if (Target == CurrentFragment)
304 return UseNoFrag;
305 }
306 }
307
308 // No additional work to do if there isn't a fragment already, or there is
309 // but it already exactly describes the new assignment.
310 if (!CurrentFragment || *CurrentFragment == Target)
311 return UseFrag;
312
313 // Reject the target fragment if it doesn't fit wholly within the current
314 // fragment. TODO: We could instead chop up the target to fit in the case of
315 // a partial overlap.
316 if (Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
318 return Skip;
319
320 // Target fits within the current fragment, return it.
321 return UseFrag;
322}
323
325 return DebugVariable(DVR->getVariable(), std::nullopt,
326 DVR->getDebugLoc().getInlinedAt());
327}
328
329/// Find linked dbg.assign and generate a new one with the correct
330/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
331/// value component is copied from the old dbg.assign to the new.
332/// \param OldAlloca Alloca for the variable before splitting.
333/// \param IsSplit True if the store (not necessarily alloca)
334/// is being split.
335/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
336/// \param SliceSizeInBits New number of bits being written to.
337/// \param OldInst Instruction that is being split.
338/// \param Inst New instruction performing this part of the
339/// split store.
340/// \param Dest Store destination.
341/// \param Value Stored value.
342/// \param DL Datalayout.
343static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
344 uint64_t OldAllocaOffsetInBits,
345 uint64_t SliceSizeInBits, Instruction *OldInst,
346 Instruction *Inst, Value *Dest, Value *Value,
347 const DataLayout &DL) {
348 // If we want allocas to be migrated using this helper then we need to ensure
349 // that the BaseFragments map code still works. A simple solution would be
350 // to choose to always clone alloca dbg_assigns (rather than sometimes
351 // "stealing" them).
352 assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
353
354 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
355 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
356 if (DVRAssignMarkerRange.empty())
357 return;
358
359 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
360 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
361 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
362 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
363 << "\n");
364 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
365 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
366 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
367 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
368 if (Value)
369 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
370
371 /// Map of aggregate variables to their fragment associated with OldAlloca.
373 BaseFragments;
374 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
375 BaseFragments[getAggregateVariable(DVR)] =
376 DVR->getExpression()->getFragmentInfo();
377
378 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
379 // one). It shouldn't already have one: assert this assumption.
380 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
381 DIAssignID *NewID = nullptr;
382 auto &Ctx = Inst->getContext();
383 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
384 assert(OldAlloca->isStaticAlloca());
385
386 auto MigrateDbgAssign = [&](DbgVariableRecord *DbgAssign) {
387 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
388 << "\n");
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation = false;
391
392 if (IsSplit) {
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
394 {
395 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
396 if (R == BaseFragments.end())
397 return;
398 BaseFragment = R->second;
399 }
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
402 DIExpression::FragmentInfo NewFragment;
403 FragCalcResult Result = calculateFragment(
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
406
407 if (Result == Skip)
408 return;
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
411 // Rewrite NewFragment to be relative to the existing one (this is
412 // what createFragmentExpression wants). CalculateFragment has
413 // already resolved the size for us. FIXME: Should it return the
414 // relative fragment too?
415 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
416 }
417 // Add the new fragment info to the existing expression if possible.
419 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
420 Expr = *E;
421 } else {
422 // Otherwise, add the new fragment info to an empty expression and
423 // discard the value component of this dbg.assign as the value cannot
424 // be computed with the new fragment.
426 DIExpression::get(Expr->getContext(), {}),
427 NewFragment.OffsetInBits, NewFragment.SizeInBits);
428 SetKillLocation = true;
429 }
430 }
431 }
432
433 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
434 if (!NewID) {
435 NewID = DIAssignID::getDistinct(Ctx);
436 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
437 }
438
439 DbgVariableRecord *NewAssign;
440 if (IsSplit) {
441 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
443 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
444 Dest, DIExpression::get(Expr->getContext(), {}),
445 DbgAssign->getDebugLoc())));
446 } else {
447 // The store is not split, simply steal the existing dbg_assign.
448 NewAssign = DbgAssign;
449 NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
450 NewAssign->setAddress(Dest);
451 if (Value)
452 NewAssign->replaceVariableLocationOp(0u, Value);
453 assert(Expr == NewAssign->getExpression());
454 }
455
456 // If we've updated the value but the original dbg.assign has an arglist
457 // then kill it now - we can't use the requested new value.
458 // We can't replace the DIArgList with the new value as it'd leave
459 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
460 // an arglist). And we can't keep the DIArgList in case the linked store
461 // is being split - in which case the DIArgList + expression may no longer
462 // be computing the correct value.
463 // This should be a very rare situation as it requires the value being
464 // stored to differ from the dbg.assign (i.e., the value has been
465 // represented differently in the debug intrinsic for some reason).
466 SetKillLocation |=
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
469 if (SetKillLocation)
470 NewAssign->setKillLocation();
471
472 // We could use more precision here at the cost of some additional (code)
473 // complexity - if the original dbg.assign was adjacent to its store, we
474 // could position this new dbg.assign adjacent to its store rather than the
475 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
476 // what we get now:
477 // split store !1
478 // split store !2
479 // dbg.assign !1
480 // dbg.assign !2
481 // This (current behaviour) results results in debug assignments being
482 // noted as slightly offset (in code) from the store. In practice this
483 // should have little effect on the debugging experience due to the fact
484 // that all the split stores should get the same line number.
485 if (NewAssign != DbgAssign) {
486 NewAssign->moveBefore(DbgAssign->getIterator());
487 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
488 }
489 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
490 };
491
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
493}
494
495namespace {
496
497/// A custom IRBuilder inserter which prefixes all names, but only in
498/// Assert builds.
499class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
500 std::string Prefix;
501
502 Twine getNameWithPrefix(const Twine &Name) const {
503 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
504 }
505
506public:
507 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
508
509 void InsertHelper(Instruction *I, const Twine &Name,
510 BasicBlock::iterator InsertPt) const override {
511 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name),
512 InsertPt);
513 }
514};
515
516/// Provide a type for IRBuilder that drops names in release builds.
518
519/// A used slice of an alloca.
520///
521/// This structure represents a slice of an alloca used by some instruction. It
522/// stores both the begin and end offsets of this use, a pointer to the use
523/// itself, and a flag indicating whether we can classify the use as splittable
524/// or not when forming partitions of the alloca.
525class Slice {
526 /// The beginning offset of the range.
527 uint64_t BeginOffset = 0;
528
529 /// The ending offset, not included in the range.
530 uint64_t EndOffset = 0;
531
532 /// Storage for both the use of this slice and whether it can be
533 /// split.
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
535
536public:
537 Slice() = default;
538
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable,
540 Value *ProtectedFieldDisc)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(U, IsSplittable),
543 ProtectedFieldDisc(ProtectedFieldDisc) {}
544
545 uint64_t beginOffset() const { return BeginOffset; }
546 uint64_t endOffset() const { return EndOffset; }
547
548 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
550
551 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
552
553 bool isDead() const { return getUse() == nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(nullptr); }
555
556 // When this access is via an llvm.protected.field.ptr intrinsic, contains
557 // the second argument to the intrinsic, the discriminator.
558 Value *ProtectedFieldDisc;
559
560 /// Support for ordering ranges.
561 ///
562 /// This provides an ordering over ranges such that start offsets are
563 /// always increasing, and within equal start offsets, the end offsets are
564 /// decreasing. Thus the spanning range comes first in a cluster with the
565 /// same start position.
566 bool operator<(const Slice &RHS) const {
567 if (beginOffset() < RHS.beginOffset())
568 return true;
569 if (beginOffset() > RHS.beginOffset())
570 return false;
571 if (isSplittable() != RHS.isSplittable())
572 return !isSplittable();
573 if (endOffset() > RHS.endOffset())
574 return true;
575 return false;
576 }
577
578 /// Support comparison with a single offset to allow binary searches.
579 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
580 return LHS.beginOffset() < RHSOffset;
581 }
582 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
583 return LHSOffset < RHS.beginOffset();
584 }
585
586 bool operator==(const Slice &RHS) const {
587 return isSplittable() == RHS.isSplittable() &&
588 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
589 }
590 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
591};
592
593/// Representation of the alloca slices.
594///
595/// This class represents the slices of an alloca which are formed by its
596/// various uses. If a pointer escapes, we can't fully build a representation
597/// for the slices used and we reflect that in this structure. The uses are
598/// stored, sorted by increasing beginning offset and with unsplittable slices
599/// starting at a particular offset before splittable slices.
600class AllocaSlices {
601public:
602 /// Construct the slices of a particular alloca.
603 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
604
605 /// Test whether a pointer to the allocation escapes our analysis.
606 ///
607 /// If this is true, the slices are never fully built and should be
608 /// ignored.
609 bool isEscaped() const { return PointerEscapingInstr; }
610 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
611
612 /// Support for iterating over the slices.
613 /// @{
614 using iterator = SmallVectorImpl<Slice>::iterator;
615 using range = iterator_range<iterator>;
616
617 iterator begin() { return Slices.begin(); }
618 iterator end() { return Slices.end(); }
619
620 using const_iterator = SmallVectorImpl<Slice>::const_iterator;
621 using const_range = iterator_range<const_iterator>;
622
623 const_iterator begin() const { return Slices.begin(); }
624 const_iterator end() const { return Slices.end(); }
625 /// @}
626
627 /// Erase a range of slices.
628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
629
630 /// Insert new slices for this alloca.
631 ///
632 /// This moves the slices into the alloca's slices collection, and re-sorts
633 /// everything so that the usual ordering properties of the alloca's slices
634 /// hold.
635 void insert(ArrayRef<Slice> NewSlices) {
636 int OldSize = Slices.size();
637 Slices.append(NewSlices.begin(), NewSlices.end());
638 auto SliceI = Slices.begin() + OldSize;
639 std::stable_sort(SliceI, Slices.end());
640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
641 }
642
643 // Forward declare the iterator and range accessor for walking the
644 // partitions.
645 class partition_iterator;
647
648 /// Access the dead users for this alloca.
649 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
650
651 /// Access the users for this alloca that are llvm.protected.field.ptr
652 /// intrinsics.
653 ArrayRef<IntrinsicInst *> getPFPUsers() const { return PFPUsers; }
654
655 /// Access Uses that should be dropped if the alloca is promotable.
656 ArrayRef<Use *> getDeadUsesIfPromotable() const {
657 return DeadUseIfPromotable;
658 }
659
660 /// Access the dead operands referring to this alloca.
661 ///
662 /// These are operands which have cannot actually be used to refer to the
663 /// alloca as they are outside its range and the user doesn't correct for
664 /// that. These mostly consist of PHI node inputs and the like which we just
665 /// need to replace with undef.
666 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
667
668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
670 void printSlice(raw_ostream &OS, const_iterator I,
671 StringRef Indent = " ") const;
672 void printUse(raw_ostream &OS, const_iterator I,
673 StringRef Indent = " ") const;
674 void print(raw_ostream &OS) const;
675 void dump(const_iterator I) const;
676 void dump() const;
677#endif
678
679private:
680 template <typename DerivedT, typename RetT = void> class BuilderBase;
681 class SliceBuilder;
682
683 friend class AllocaSlices::SliceBuilder;
684
685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686 /// Handle to alloca instruction to simplify method interfaces.
687 AllocaInst &AI;
688#endif
689
690 /// The instruction responsible for this alloca not having a known set
691 /// of slices.
692 ///
693 /// When an instruction (potentially) escapes the pointer to the alloca, we
694 /// store a pointer to that here and abort trying to form slices of the
695 /// alloca. This will be null if the alloca slices are analyzed successfully.
696 Instruction *PointerEscapingInstr;
697 Instruction *PointerEscapingInstrReadOnly;
698
699 /// The slices of the alloca.
700 ///
701 /// We store a vector of the slices formed by uses of the alloca here. This
702 /// vector is sorted by increasing begin offset, and then the unsplittable
703 /// slices before the splittable ones. See the Slice inner class for more
704 /// details.
706
707 /// Instructions which will become dead if we rewrite the alloca.
708 ///
709 /// Note that these are not separated by slice. This is because we expect an
710 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
711 /// all these instructions can simply be removed and replaced with poison as
712 /// they come from outside of the allocated space.
713 SmallVector<Instruction *, 8> DeadUsers;
714
715 /// Users that are llvm.protected.field.ptr intrinsics. These will be RAUW'd
716 /// to their first argument if we rewrite the alloca.
718
719 /// Uses which will become dead if can promote the alloca.
720 SmallVector<Use *, 8> DeadUseIfPromotable;
721
722 /// Operands which will become dead if we rewrite the alloca.
723 ///
724 /// These are operands that in their particular use can be replaced with
725 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
726 /// to PHI nodes and the like. They aren't entirely dead (there might be
727 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
728 /// want to swap this particular input for poison to simplify the use lists of
729 /// the alloca.
730 SmallVector<Use *, 8> DeadOperands;
731};
732
733/// A partition of the slices.
734///
735/// An ephemeral representation for a range of slices which can be viewed as
736/// a partition of the alloca. This range represents a span of the alloca's
737/// memory which cannot be split, and provides access to all of the slices
738/// overlapping some part of the partition.
739///
740/// Objects of this type are produced by traversing the alloca's slices, but
741/// are only ephemeral and not persistent.
742class Partition {
743private:
744 friend class AllocaSlices;
745 friend class AllocaSlices::partition_iterator;
746
747 using iterator = AllocaSlices::iterator;
748
749 /// The beginning and ending offsets of the alloca for this
750 /// partition.
751 uint64_t BeginOffset = 0, EndOffset = 0;
752
753 /// The start and end iterators of this partition.
754 iterator SI, SJ;
755
756 /// A collection of split slice tails overlapping the partition.
757 SmallVector<Slice *, 4> SplitTails;
758
759 /// Raw constructor builds an empty partition starting and ending at
760 /// the given iterator.
761 Partition(iterator SI) : SI(SI), SJ(SI) {}
762
763public:
764 /// The start offset of this partition.
765 ///
766 /// All of the contained slices start at or after this offset.
767 uint64_t beginOffset() const { return BeginOffset; }
768
769 /// The end offset of this partition.
770 ///
771 /// All of the contained slices end at or before this offset.
772 uint64_t endOffset() const { return EndOffset; }
773
774 /// The size of the partition.
775 ///
776 /// Note that this can never be zero.
777 uint64_t size() const {
778 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
779 return EndOffset - BeginOffset;
780 }
781
782 /// Test whether this partition contains no slices, and merely spans
783 /// a region occupied by split slices.
784 bool empty() const { return SI == SJ; }
785
786 /// \name Iterate slices that start within the partition.
787 /// These may be splittable or unsplittable. They have a begin offset >= the
788 /// partition begin offset.
789 /// @{
790 // FIXME: We should probably define a "concat_iterator" helper and use that
791 // to stitch together pointee_iterators over the split tails and the
792 // contiguous iterators of the partition. That would give a much nicer
793 // interface here. We could then additionally expose filtered iterators for
794 // split, unsplit, and unsplittable splices based on the usage patterns.
795 iterator begin() const { return SI; }
796 iterator end() const { return SJ; }
797 /// @}
798
799 /// Get the sequence of split slice tails.
800 ///
801 /// These tails are of slices which start before this partition but are
802 /// split and overlap into the partition. We accumulate these while forming
803 /// partitions.
804 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
805};
806
807} // end anonymous namespace
808
809/// An iterator over partitions of the alloca's slices.
810///
811/// This iterator implements the core algorithm for partitioning the alloca's
812/// slices. It is a forward iterator as we don't support backtracking for
813/// efficiency reasons, and re-use a single storage area to maintain the
814/// current set of split slices.
815///
816/// It is templated on the slice iterator type to use so that it can operate
817/// with either const or non-const slice iterators.
819 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
820 Partition> {
821 friend class AllocaSlices;
822
823 /// Most of the state for walking the partitions is held in a class
824 /// with a nice interface for examining them.
825 Partition P;
826
827 /// We need to keep the end of the slices to know when to stop.
828 AllocaSlices::iterator SE;
829
830 /// We also need to keep track of the maximum split end offset seen.
831 /// FIXME: Do we really?
832 uint64_t MaxSplitSliceEndOffset = 0;
833
834 /// Sets the partition to be empty at given iterator, and sets the
835 /// end iterator.
836 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
837 : P(SI), SE(SE) {
838 // If not already at the end, advance our state to form the initial
839 // partition.
840 if (SI != SE)
841 advance();
842 }
843
844 /// Advance the iterator to the next partition.
845 ///
846 /// Requires that the iterator not be at the end of the slices.
847 void advance() {
848 assert((P.SI != SE || !P.SplitTails.empty()) &&
849 "Cannot advance past the end of the slices!");
850
851 // Clear out any split uses which have ended.
852 if (!P.SplitTails.empty()) {
853 if (P.EndOffset >= MaxSplitSliceEndOffset) {
854 // If we've finished all splits, this is easy.
855 P.SplitTails.clear();
856 MaxSplitSliceEndOffset = 0;
857 } else {
858 // Remove the uses which have ended in the prior partition. This
859 // cannot change the max split slice end because we just checked that
860 // the prior partition ended prior to that max.
861 llvm::erase_if(P.SplitTails,
862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
863 assert(llvm::any_of(P.SplitTails,
864 [&](Slice *S) {
865 return S->endOffset() == MaxSplitSliceEndOffset;
866 }) &&
867 "Could not find the current max split slice offset!");
868 assert(llvm::all_of(P.SplitTails,
869 [&](Slice *S) {
870 return S->endOffset() <= MaxSplitSliceEndOffset;
871 }) &&
872 "Max split slice end offset is not actually the max!");
873 }
874 }
875
876 // If P.SI is already at the end, then we've cleared the split tail and
877 // now have an end iterator.
878 if (P.SI == SE) {
879 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
880 return;
881 }
882
883 // If we had a non-empty partition previously, set up the state for
884 // subsequent partitions.
885 if (P.SI != P.SJ) {
886 // Accumulate all the splittable slices which started in the old
887 // partition into the split list.
888 for (Slice &S : P)
889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
890 P.SplitTails.push_back(&S);
891 MaxSplitSliceEndOffset =
892 std::max(S.endOffset(), MaxSplitSliceEndOffset);
893 }
894
895 // Start from the end of the previous partition.
896 P.SI = P.SJ;
897
898 // If P.SI is now at the end, we at most have a tail of split slices.
899 if (P.SI == SE) {
900 P.BeginOffset = P.EndOffset;
901 P.EndOffset = MaxSplitSliceEndOffset;
902 return;
903 }
904
905 // If the we have split slices and the next slice is after a gap and is
906 // not splittable immediately form an empty partition for the split
907 // slices up until the next slice begins.
908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
909 !P.SI->isSplittable()) {
910 P.BeginOffset = P.EndOffset;
911 P.EndOffset = P.SI->beginOffset();
912 return;
913 }
914 }
915
916 // OK, we need to consume new slices. Set the end offset based on the
917 // current slice, and step SJ past it. The beginning offset of the
918 // partition is the beginning offset of the next slice unless we have
919 // pre-existing split slices that are continuing, in which case we begin
920 // at the prior end offset.
921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
922 P.EndOffset = P.SI->endOffset();
923 ++P.SJ;
924
925 // There are two strategies to form a partition based on whether the
926 // partition starts with an unsplittable slice or a splittable slice.
927 if (!P.SI->isSplittable()) {
928 // When we're forming an unsplittable region, it must always start at
929 // the first slice and will extend through its end.
930 assert(P.BeginOffset == P.SI->beginOffset());
931
932 // Form a partition including all of the overlapping slices with this
933 // unsplittable slice.
934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
935 if (!P.SJ->isSplittable())
936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
937 ++P.SJ;
938 }
939
940 // We have a partition across a set of overlapping unsplittable
941 // partitions.
942 return;
943 }
944
945 // If we're starting with a splittable slice, then we need to form
946 // a synthetic partition spanning it and any other overlapping splittable
947 // splices.
948 assert(P.SI->isSplittable() && "Forming a splittable partition!");
949
950 // Collect all of the overlapping splittable slices.
951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
952 P.SJ->isSplittable()) {
953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
954 ++P.SJ;
955 }
956
957 // Back upiP.EndOffset if we ended the span early when encountering an
958 // unsplittable slice. This synthesizes the early end offset of
959 // a partition spanning only splittable slices.
960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
961 assert(!P.SJ->isSplittable());
962 P.EndOffset = P.SJ->beginOffset();
963 }
964 }
965
966public:
967 bool operator==(const partition_iterator &RHS) const {
968 assert(SE == RHS.SE &&
969 "End iterators don't match between compared partition iterators!");
970
971 // The observed positions of partitions is marked by the P.SI iterator and
972 // the emptiness of the split slices. The latter is only relevant when
973 // P.SI == SE, as the end iterator will additionally have an empty split
974 // slices list, but the prior may have the same P.SI and a tail of split
975 // slices.
976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
977 assert(P.SJ == RHS.P.SJ &&
978 "Same set of slices formed two different sized partitions!");
979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
980 "Same slice position with differently sized non-empty split "
981 "slice tails!");
982 return true;
983 }
984 return false;
985 }
986
987 partition_iterator &operator++() {
988 advance();
989 return *this;
990 }
991
992 Partition &operator*() { return P; }
993};
994
995/// A forward range over the partitions of the alloca's slices.
996///
997/// This accesses an iterator range over the partitions of the alloca's
998/// slices. It computes these partitions on the fly based on the overlapping
999/// offsets of the slices and the ability to split them. It will visit "empty"
1000/// partitions to cover regions of the alloca only accessed via split
1001/// slices.
1002iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
1003 return make_range(partition_iterator(begin(), end()),
1004 partition_iterator(end(), end()));
1005}
1006
1008 // If the condition being selected on is a constant or the same value is
1009 // being selected between, fold the select. Yes this does (rarely) happen
1010 // early on.
1011 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
1012 return SI.getOperand(1 + CI->isZero());
1013 if (SI.getOperand(1) == SI.getOperand(2))
1014 return SI.getOperand(1);
1015
1016 return nullptr;
1017}
1018
1019/// A helper that folds a PHI node or a select.
1021 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1022 // If PN merges together the same value, return that value.
1023 return PN->hasConstantValue();
1024 }
1026}
1027
1028/// Builder for the alloca slices.
1029///
1030/// This class builds a set of alloca slices by recursively visiting the uses
1031/// of an alloca and making a slice for each load and store at each offset.
1032class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1033 friend class PtrUseVisitor<SliceBuilder>;
1034 friend class InstVisitor<SliceBuilder>;
1035
1036 using Base = PtrUseVisitor<SliceBuilder>;
1037
1038 const uint64_t AllocSize;
1039 AllocaSlices &AS;
1040
1041 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1043
1044 /// Set to de-duplicate dead instructions found in the use walk.
1045 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1046
1047 // When this access is via an llvm.protected.field.ptr intrinsic, contains
1048 // the second argument to the intrinsic, the discriminator.
1049 Value *ProtectedFieldDisc = nullptr;
1050
1051public:
1052 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1054 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1055 AS(AS) {}
1056
1057private:
1058 void markAsDead(Instruction &I) {
1059 if (VisitedDeadInsts.insert(&I).second)
1060 AS.DeadUsers.push_back(&I);
1061 }
1062
1063 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1064 bool IsSplittable = false) {
1065 // Completely skip uses which have a zero size or start either before or
1066 // past the end of the allocation.
1067 if (Size == 0 || Offset.uge(AllocSize)) {
1068 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1069 << Offset
1070 << " which has zero size or starts outside of the "
1071 << AllocSize << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 return markAsDead(I);
1075 }
1076
1077 uint64_t BeginOffset = Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset + Size;
1079
1080 // Clamp the end offset to the end of the allocation. Note that this is
1081 // formulated to handle even the case where "BeginOffset + Size" overflows.
1082 // This may appear superficially to be something we could ignore entirely,
1083 // but that is not so! There may be widened loads or PHI-node uses where
1084 // some instructions are dead but not others. We can't completely ignore
1085 // them, and so have to record at least the information here.
1086 assert(AllocSize >= BeginOffset); // Established above.
1087 if (Size > AllocSize - BeginOffset) {
1088 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1089 << Offset << " to remain within the " << AllocSize
1090 << " byte alloca:\n"
1091 << " alloca: " << AS.AI << "\n"
1092 << " use: " << I << "\n");
1093 EndOffset = AllocSize;
1094 }
1095
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1098 }
1099
1100 void visitBitCastInst(BitCastInst &BC) {
1101 if (BC.use_empty())
1102 return markAsDead(BC);
1103
1104 return Base::visitBitCastInst(BC);
1105 }
1106
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1108 if (ASC.use_empty())
1109 return markAsDead(ASC);
1110
1111 return Base::visitAddrSpaceCastInst(ASC);
1112 }
1113
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1115 if (GEPI.use_empty())
1116 return markAsDead(GEPI);
1117
1118 return Base::visitGetElementPtrInst(GEPI);
1119 }
1120
1121 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1122 uint64_t Size, bool IsVolatile) {
1123 // We allow splitting of non-volatile loads and stores where the type is an
1124 // integer type. These may be used to implement 'memcpy' or other "transfer
1125 // of bits" patterns.
1126 bool IsSplittable =
1127 Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
1128
1129 insertUse(I, Offset, Size, IsSplittable);
1130 }
1131
1132 void visitLoadInst(LoadInst &LI) {
1133 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1134 "All simple FCA loads should have been pre-split");
1135
1136 // If there is a load with an unknown offset, we can still perform store
1137 // to load forwarding for other known-offset loads.
1138 if (!IsOffsetKnown)
1139 return PI.setEscapedReadOnly(&LI);
1140
1141 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1142 if (Size.isScalable()) {
1143 unsigned VScale = LI.getFunction()->getVScaleValue();
1144 if (!VScale)
1145 return PI.setAborted(&LI);
1146
1147 Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
1148 }
1149
1150 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1151 LI.isVolatile());
1152 }
1153
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp = SI.getValueOperand();
1156 if (ValOp == *U)
1157 return PI.setEscapedAndAborted(&SI);
1158 if (!IsOffsetKnown)
1159 return PI.setAborted(&SI);
1160
1161 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1162 if (StoreSize.isScalable()) {
1163 unsigned VScale = SI.getFunction()->getVScaleValue();
1164 if (!VScale)
1165 return PI.setAborted(&SI);
1166
1167 StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
1168 }
1169
1170 uint64_t Size = StoreSize.getFixedValue();
1171
1172 // If this memory access can be shown to *statically* extend outside the
1173 // bounds of the allocation, it's behavior is undefined, so simply
1174 // ignore it. Note that this is more strict than the generic clamping
1175 // behavior of insertUse. We also try to handle cases which might run the
1176 // risk of overflow.
1177 // FIXME: We should instead consider the pointer to have escaped if this
1178 // function is being instrumented for addressing bugs or race conditions.
1179 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1180 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1181 << Offset << " which extends past the end of the "
1182 << AllocSize << " byte alloca:\n"
1183 << " alloca: " << AS.AI << "\n"
1184 << " use: " << SI << "\n");
1185 return markAsDead(SI);
1186 }
1187
1188 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1189 "All simple FCA stores should have been pre-split");
1190 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1191 }
1192
1193 void visitMemSetInst(MemSetInst &II) {
1194 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1195 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1196 if ((Length && Length->getValue() == 0) ||
1197 (IsOffsetKnown && Offset.uge(AllocSize)))
1198 // Zero-length mem transfer intrinsics can be ignored entirely.
1199 return markAsDead(II);
1200
1201 if (!IsOffsetKnown)
1202 return PI.setAborted(&II);
1203
1204 insertUse(II, Offset,
1205 Length ? Length->getLimitedValue()
1206 : AllocSize - Offset.getLimitedValue(),
1207 (bool)Length);
1208 }
1209
1210 void visitMemTransferInst(MemTransferInst &II) {
1211 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1212 if (Length && Length->getValue() == 0)
1213 // Zero-length mem transfer intrinsics can be ignored entirely.
1214 return markAsDead(II);
1215
1216 // Because we can visit these intrinsics twice, also check to see if the
1217 // first time marked this instruction as dead. If so, skip it.
1218 if (VisitedDeadInsts.count(&II))
1219 return;
1220
1221 if (!IsOffsetKnown)
1222 return PI.setAborted(&II);
1223
1224 // This side of the transfer is completely out-of-bounds, and so we can
1225 // nuke the entire transfer. However, we also need to nuke the other side
1226 // if already added to our partitions.
1227 // FIXME: Yet another place we really should bypass this when
1228 // instrumenting for ASan.
1229 if (Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.find(&II);
1232 if (MTPI != MemTransferSliceMap.end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(II);
1235 }
1236
1237 uint64_t RawOffset = Offset.getLimitedValue();
1238 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1239
1240 // Check for the special case where the same exact value is used for both
1241 // source and dest.
1242 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1243 // For non-volatile transfers this is a no-op.
1244 if (!II.isVolatile())
1245 return markAsDead(II);
1246
1247 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1248 }
1249
1250 // If we have seen both source and destination for a mem transfer, then
1251 // they both point to the same alloca.
1252 bool Inserted;
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1257 if (!Inserted) {
1258 Slice &PrevP = AS.Slices[PrevIdx];
1259
1260 // Check if the begin offsets match and this is a non-volatile transfer.
1261 // In that case, we can completely elide the transfer.
1262 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1263 PrevP.kill();
1264 return markAsDead(II);
1265 }
1266
1267 // Otherwise we have an offset transfer within the same alloca. We can't
1268 // split those.
1269 PrevP.makeUnsplittable();
1270 }
1271
1272 // Insert the use now that we've fixed up the splittable nature.
1273 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1274
1275 // Check that we ended up with a valid index in the map.
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1277 "Map index doesn't point back to a slice with this user.");
1278 }
1279
1280 // Disable SRoA for any intrinsics except for lifetime invariants.
1281 // FIXME: What about debug intrinsics? This matches old behavior, but
1282 // doesn't make sense.
1283 void visitIntrinsicInst(IntrinsicInst &II) {
1284 if (II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1286 return;
1287 }
1288
1289 if (!IsOffsetKnown)
1290 return PI.setAborted(&II);
1291
1292 if (II.isLifetimeStartOrEnd()) {
1293 insertUse(II, Offset, AllocSize, true);
1294 return;
1295 }
1296
1297 if (II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1298 // We only handle loads and stores as users of llvm.protected.field.ptr.
1299 // Other uses may add items to the worklist, which will cause
1300 // ProtectedFieldDisc to be tracked incorrectly.
1301 AS.PFPUsers.push_back(&II);
1302 ProtectedFieldDisc = II.getArgOperand(1);
1303 for (Use &U : II.uses()) {
1304 this->U = &U;
1305 if (auto *LI = dyn_cast<LoadInst>(U.getUser()))
1306 visitLoadInst(*LI);
1307 else if (auto *SI = dyn_cast<StoreInst>(U.getUser()))
1308 visitStoreInst(*SI);
1309 else
1310 PI.setAborted(&II);
1311 if (PI.isAborted())
1312 break;
1313 }
1314 ProtectedFieldDisc = nullptr;
1315 return;
1316 }
1317
1318 Base::visitIntrinsicInst(II);
1319 }
1320
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1322 // We consider any PHI or select that results in a direct load or store of
1323 // the same offset to be a viable use for slicing purposes. These uses
1324 // are considered unsplittable and the size is the maximum loaded or stored
1325 // size.
1326 SmallPtrSet<Instruction *, 4> Visited;
1328 Visited.insert(Root);
1329 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1330 const DataLayout &DL = Root->getDataLayout();
1331 // If there are no loads or stores, the access is dead. We mark that as
1332 // a size zero access.
1333 Size = 0;
1334 do {
1335 Instruction *I, *UsedI;
1336 std::tie(UsedI, I) = Uses.pop_back_val();
1337
1338 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1339 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1340 if (LoadSize.isScalable()) {
1341 PI.setAborted(LI);
1342 return nullptr;
1343 }
1344 Size = std::max(Size, LoadSize.getFixedValue());
1345 continue;
1346 }
1347 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1348 Value *Op = SI->getOperand(0);
1349 if (Op == UsedI)
1350 return SI;
1351 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1352 if (StoreSize.isScalable()) {
1353 PI.setAborted(SI);
1354 return nullptr;
1355 }
1356 Size = std::max(Size, StoreSize.getFixedValue());
1357 continue;
1358 }
1359
1360 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1361 if (!GEP->hasAllZeroIndices())
1362 return GEP;
1363 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1365 return I;
1366 }
1367
1368 for (User *U : I->users())
1369 if (Visited.insert(cast<Instruction>(U)).second)
1370 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1371 } while (!Uses.empty());
1372
1373 return nullptr;
1374 }
1375
1376 void visitPHINodeOrSelectInst(Instruction &I) {
1378 if (I.use_empty())
1379 return markAsDead(I);
1380
1381 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1382 // instructions in this BB, which may be required during rewriting. Bail out
1383 // on these cases.
1384 if (isa<PHINode>(I) && !I.getParent()->hasInsertionPt())
1385 return PI.setAborted(&I);
1386
1387 // TODO: We could use simplifyInstruction here to fold PHINodes and
1388 // SelectInsts. However, doing so requires to change the current
1389 // dead-operand-tracking mechanism. For instance, suppose neither loading
1390 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1391 // trap either. However, if we simply replace %U with undef using the
1392 // current dead-operand-tracking mechanism, "load (select undef, undef,
1393 // %other)" may trap because the select may return the first operand
1394 // "undef".
1395 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1396 if (Result == *U)
1397 // If the result of the constant fold will be the pointer, recurse
1398 // through the PHI/select as if we had RAUW'ed it.
1399 enqueueUsers(I);
1400 else
1401 // Otherwise the operand to the PHI/select is dead, and we can replace
1402 // it with poison.
1403 AS.DeadOperands.push_back(U);
1404
1405 return;
1406 }
1407
1408 if (!IsOffsetKnown)
1409 return PI.setAborted(&I);
1410
1411 // See if we already have computed info on this node.
1412 uint64_t &Size = PHIOrSelectSizes[&I];
1413 if (!Size) {
1414 // This is a new PHI/Select, check for an unsafe use of it.
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1416 return PI.setAborted(UnsafeI);
1417 }
1418
1419 // For PHI and select operands outside the alloca, we can't nuke the entire
1420 // phi or select -- the other side might still be relevant, so we special
1421 // case them here and use a separate structure to track the operands
1422 // themselves which should be replaced with poison.
1423 // FIXME: This should instead be escaped in the event we're instrumenting
1424 // for address sanitization.
1425 if (Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1427 return;
1428 }
1429
1430 insertUse(I, Offset, Size);
1431 }
1432
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1434
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1436
1437 /// Disable SROA entirely if there are unhandled users of the alloca.
1438 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1439
1440 void visitCallBase(CallBase &CB) {
1441 // If the call operand is read-only and only does a read-only or address
1442 // capture, then we mark it as EscapedReadOnly.
1443 if (CB.isDataOperand(U) &&
1444 !capturesFullProvenance(CB.getCaptureInfo(U->getOperandNo())) &&
1445 CB.onlyReadsMemory(U->getOperandNo())) {
1446 PI.setEscapedReadOnly(&CB);
1447 return;
1448 }
1449
1450 Base::visitCallBase(CB);
1451 }
1452};
1453
1454AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1455 :
1456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1457 AI(AI),
1458#endif
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1460 SliceBuilder PB(DL, AI, *this);
1461 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1463 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1464 // possibly by just storing the PtrInfo in the AllocaSlices.
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr && "Did not track a bad instruction");
1468 return;
1469 }
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1471
1472 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1473
1474 // Sort the uses. This arranges for the offsets to be in ascending order,
1475 // and the sizes to be in descending order.
1476 llvm::stable_sort(Slices);
1477}
1478
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1480
1481void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1482 StringRef Indent) const {
1483 printSlice(OS, I, Indent);
1484 OS << "\n";
1485 printUse(OS, I, Indent);
1486}
1487
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1489 StringRef Indent) const {
1490 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1491 << " slice #" << (I - begin())
1492 << (I->isSplittable() ? " (splittable)" : "");
1493}
1494
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1496 StringRef Indent) const {
1497 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1498}
1499
1500void AllocaSlices::print(raw_ostream &OS) const {
1501 if (PointerEscapingInstr) {
1502 OS << "Can't analyze slices for alloca: " << AI << "\n"
1503 << " A pointer to this alloca escaped by:\n"
1504 << " " << *PointerEscapingInstr << "\n";
1505 return;
1506 }
1507
1508 if (PointerEscapingInstrReadOnly)
1509 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1510
1511 OS << "Slices of alloca: " << AI << "\n";
1512 for (const_iterator I = begin(), E = end(); I != E; ++I)
1513 print(OS, I);
1514}
1515
1516LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1517 print(dbgs(), I);
1518}
1519LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1520
1521#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522
1523/// Walk the range of a partitioning looking for a common type to cover this
1524/// sequence of slices.
1525static std::pair<Type *, IntegerType *>
1526findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1527 uint64_t EndOffset) {
1528 Type *Ty = nullptr;
1529 bool TyIsCommon = true;
1530 IntegerType *ITy = nullptr;
1531
1532 // Note that we need to look at *every* alloca slice's Use to ensure we
1533 // always get consistent results regardless of the order of slices.
1534 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1535 Use *U = I->getUse();
1536 if (isa<IntrinsicInst>(*U->getUser()))
1537 continue;
1538 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1539 continue;
1540
1541 Type *UserTy = nullptr;
1542 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1543 UserTy = LI->getType();
1544 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1545 UserTy = SI->getValueOperand()->getType();
1546 }
1547
1548 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1549 // If the type is larger than the partition, skip it. We only encounter
1550 // this for split integer operations where we want to use the type of the
1551 // entity causing the split. Also skip if the type is not a byte width
1552 // multiple.
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1555 continue;
1556
1557 // Track the largest bitwidth integer type used in this way in case there
1558 // is no common type.
1559 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1560 ITy = UserITy;
1561 }
1562
1563 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1564 // depend on types skipped above.
1565 if (!UserTy || (Ty && Ty != UserTy))
1566 TyIsCommon = false; // Give up on anything but an iN type.
1567 else
1568 Ty = UserTy;
1569 }
1570
1571 return {TyIsCommon ? Ty : nullptr, ITy};
1572}
1573
1574/// PHI instructions that use an alloca and are subsequently loaded can be
1575/// rewritten to load both input pointers in the pred blocks and then PHI the
1576/// results, allowing the load of the alloca to be promoted.
1577/// From this:
1578/// %P2 = phi [i32* %Alloca, i32* %Other]
1579/// %V = load i32* %P2
1580/// to:
1581/// %V1 = load i32* %Alloca -> will be mem2reg'd
1582/// ...
1583/// %V2 = load i32* %Other
1584/// ...
1585/// %V = phi [i32 %V1, i32 %V2]
1586///
1587/// We can do this to a select if its only uses are loads and if the operands
1588/// to the select can be loaded unconditionally.
1589///
1590/// FIXME: This should be hoisted into a generic utility, likely in
1591/// Transforms/Util/Local.h
1593 const DataLayout &DL = PN.getDataLayout();
1594
1595 // For now, we can only do this promotion if the load is in the same block
1596 // as the PHI, and if there are no stores between the phi and load.
1597 // TODO: Allow recursive phi users.
1598 // TODO: Allow stores.
1599 BasicBlock *BB = PN.getParent();
1600 Align MaxAlign;
1601 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1602 Type *LoadType = nullptr;
1603 for (User *U : PN.users()) {
1605 if (!LI || !LI->isSimple())
1606 return false;
1607
1608 // For now we only allow loads in the same block as the PHI. This is
1609 // a common case that happens when instcombine merges two loads through
1610 // a PHI.
1611 if (LI->getParent() != BB)
1612 return false;
1613
1614 if (LoadType) {
1615 if (LoadType != LI->getType())
1616 return false;
1617 } else {
1618 LoadType = LI->getType();
1619 }
1620
1621 // Ensure that there are no instructions between the PHI and the load that
1622 // could store.
1623 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1624 if (BBI->mayWriteToMemory())
1625 return false;
1626
1627 MaxAlign = std::max(MaxAlign, LI->getAlign());
1628 }
1629
1630 if (!LoadType)
1631 return false;
1632
1633 APInt LoadSize =
1634 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1635
1636 // We can only transform this if it is safe to push the loads into the
1637 // predecessor blocks. The only thing to watch out for is that we can't put
1638 // a possibly trapping load in the predecessor if it is a critical edge.
1639 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1641 Value *InVal = PN.getIncomingValue(Idx);
1642
1643 // If the value is produced by the terminator of the predecessor (an
1644 // invoke) or it has side-effects, there is no valid place to put a load
1645 // in the predecessor.
1646 if (TI == InVal || TI->mayHaveSideEffects())
1647 return false;
1648
1649 // If the predecessor has a single successor, then the edge isn't
1650 // critical.
1651 if (TI->getNumSuccessors() == 1)
1652 continue;
1653
1654 // If this pointer is always safe to load, or if we can prove that there
1655 // is already a load in the block, then we can move the load to the pred
1656 // block.
1657 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1658 continue;
1659
1660 return false;
1661 }
1662
1663 return true;
1664}
1665
1666static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1667 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1668
1669 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1670 Type *LoadTy = SomeLoad->getType();
1671 IRB.SetInsertPoint(&PN);
1672 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1673 PN.getName() + ".sroa.speculated");
1674
1675 // Get the AA tags and alignment to use from one of the loads. It does not
1676 // matter which one we get and if any differ.
1677 AAMDNodes AATags = SomeLoad->getAAMetadata();
1678 Align Alignment = SomeLoad->getAlign();
1679
1680 // Rewrite all loads of the PN to use the new PHI.
1681 while (!PN.use_empty()) {
1682 LoadInst *LI = cast<LoadInst>(PN.user_back());
1683 LI->replaceAllUsesWith(NewPN);
1684 LI->eraseFromParent();
1685 }
1686
1687 // Inject loads into all of the pred blocks.
1688 DenseMap<BasicBlock *, Value *> InjectedLoads;
1689 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1690 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1691 Value *InVal = PN.getIncomingValue(Idx);
1692
1693 // A PHI node is allowed to have multiple (duplicated) entries for the same
1694 // basic block, as long as the value is the same. So if we already injected
1695 // a load in the predecessor, then we should reuse the same load for all
1696 // duplicated entries.
1697 if (Value *V = InjectedLoads.lookup(Pred)) {
1698 NewPN->addIncoming(V, Pred);
1699 continue;
1700 }
1701
1702 Instruction *TI = Pred->getTerminator();
1703 IRB.SetInsertPoint(TI);
1704
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1709 if (AATags)
1710 Load->setAAMetadata(AATags);
1711 NewPN->addIncoming(Load, Pred);
1712 InjectedLoads[Pred] = Load;
1713 }
1714
1715 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1716 PN.eraseFromParent();
1717}
1718
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1721 if (isTrueVal)
1723 else
1725 return *this;
1726}
1727
1728bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1729 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1730 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1731}
1732
1733bool SelectHandSpeculativity::areAllSpeculatable() const {
1734 return isSpeculatable(/*isTrueVal=*/true) &&
1735 isSpeculatable(/*isTrueVal=*/false);
1736}
1737
1738bool SelectHandSpeculativity::areAnySpeculatable() const {
1739 return isSpeculatable(/*isTrueVal=*/true) ||
1740 isSpeculatable(/*isTrueVal=*/false);
1741}
1742bool SelectHandSpeculativity::areNoneSpeculatable() const {
1743 return !areAnySpeculatable();
1744}
1745
1746static SelectHandSpeculativity
1748 assert(LI.isSimple() && "Only for simple loads");
1749 SelectHandSpeculativity Spec;
1750
1751 const DataLayout &DL = SI.getDataLayout();
1752 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1754 &LI))
1755 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1756 else if (PreserveCFG)
1757 return Spec;
1758
1759 return Spec;
1760}
1761
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1764 RewriteableMemOps Ops;
1765
1766 for (User *U : SI.users()) {
1767 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1768 U = *BC->user_begin();
1769
1770 if (auto *Store = dyn_cast<StoreInst>(U)) {
1771 // Note that atomic stores can be transformed; atomic semantics do not
1772 // have any meaning for a local alloca. Stores are not speculatable,
1773 // however, so if we can't turn it into a predicated store, we are done.
1774 if (Store->isVolatile() || PreserveCFG)
1775 return {}; // Give up on this `select`.
1776 Ops.emplace_back(Store);
1777 continue;
1778 }
1779
1780 auto *LI = dyn_cast<LoadInst>(U);
1781
1782 // Note that atomic loads can be transformed;
1783 // atomic semantics do not have any meaning for a local alloca.
1784 if (!LI || LI->isVolatile())
1785 return {}; // Give up on this `select`.
1786
1787 PossiblySpeculatableLoad Load(LI);
1788 if (!LI->isSimple()) {
1789 // If the `load` is not simple, we can't speculatively execute it,
1790 // but we could handle this via a CFG modification. But can we?
1791 if (PreserveCFG)
1792 return {}; // Give up on this `select`.
1793 Ops.emplace_back(Load);
1794 continue;
1795 }
1796
1797 SelectHandSpeculativity Spec =
1799 if (PreserveCFG && !Spec.areAllSpeculatable())
1800 return {}; // Give up on this `select`.
1801
1802 Load.setInt(Spec);
1803 Ops.emplace_back(Load);
1804 }
1805
1806 return Ops;
1807}
1808
1810 IRBuilderTy &IRB) {
1811 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1812
1813 Value *TV = SI.getTrueValue();
1814 Value *FV = SI.getFalseValue();
1815 // Replace the given load of the select with a select of two loads.
1816
1817 assert(LI.isSimple() && "We only speculate simple loads");
1818
1819 IRB.SetInsertPoint(&LI);
1820
1821 LoadInst *TL =
1822 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1823 LI.getName() + ".sroa.speculate.load.true");
1824 LoadInst *FL =
1825 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1826 LI.getName() + ".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1828
1829 // Transfer alignment and AA info if present.
1830 TL->setAlignment(LI.getAlign());
1831 FL->setAlignment(LI.getAlign());
1832
1833 AAMDNodes Tags = LI.getAAMetadata();
1834 if (Tags) {
1835 TL->setAAMetadata(Tags);
1836 FL->setAAMetadata(Tags);
1837 }
1838
1839 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1840 LI.getName() + ".sroa.speculated",
1841 ProfcheckDisableMetadataFixes ? nullptr : &SI);
1842
1843 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1844 LI.replaceAllUsesWith(V);
1845}
1846
1847template <typename T>
1849 SelectHandSpeculativity Spec,
1850 DomTreeUpdater &DTU) {
1851 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1852 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1853 BasicBlock *Head = I.getParent();
1854 Instruction *ThenTerm = nullptr;
1855 Instruction *ElseTerm = nullptr;
1856 if (Spec.areNoneSpeculatable())
1857 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1859 else {
1860 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1862 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1863 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1864 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1865 }
1866 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1867 Spec = {}; // Do not use `Spec` beyond this point.
1868 BasicBlock *Tail = I.getParent();
1869 Tail->setName(Head->getName() + ".cont");
1870 PHINode *PN;
1871 if (isa<LoadInst>(I))
1872 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1873 for (BasicBlock *SuccBB : successors(Head)) {
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1877 auto &CondMemOp = cast<T>(*I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1880 if (isa<LoadInst>(I))
1881 ++NumLoadsPredicated;
1882 else
1883 ++NumStoresPredicated;
1884 } else {
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1887 }
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr = SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1891 if (isa<LoadInst>(I)) {
1892 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1893 PN->addIncoming(&CondMemOp, NewMemOpBB);
1894 } else
1895 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1896 }
1897 if (isa<LoadInst>(I)) {
1898 PN->takeName(&I);
1899 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1900 I.replaceAllUsesWith(PN);
1901 }
1902}
1903
1905 SelectHandSpeculativity Spec,
1906 DomTreeUpdater &DTU) {
1907 if (auto *LI = dyn_cast<LoadInst>(&I))
1908 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1909 else if (auto *SI = dyn_cast<StoreInst>(&I))
1910 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1911 else
1912 llvm_unreachable_internal("Only for load and store.");
1913}
1914
1916 const RewriteableMemOps &Ops,
1917 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1918 bool CFGChanged = false;
1919 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1920
1921 for (const RewriteableMemOp &Op : Ops) {
1922 SelectHandSpeculativity Spec;
1923 Instruction *I;
1924 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1925 I = *US;
1926 } else {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1930 }
1931 if (Spec.areAllSpeculatable()) {
1933 } else {
1934 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1935 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1936 CFGChanged = true;
1937 }
1938 I->eraseFromParent();
1939 }
1940
1941 for (User *U : make_early_inc_range(SI.users()))
1942 cast<BitCastInst>(U)->eraseFromParent();
1943 SI.eraseFromParent();
1944 return CFGChanged;
1945}
1946
1947/// Compute an adjusted pointer from Ptr by Offset bytes where the
1948/// resulting pointer has PointerTy.
1949static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1951 const Twine &NamePrefix) {
1952 if (Offset != 0)
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1954 NamePrefix + "sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1956 NamePrefix + "sroa_cast");
1957}
1958
1959/// Compute the adjusted alignment for a load or store from an offset.
1963
1964/// Test whether we can convert a value from the old to the new type.
1965///
1966/// This predicate should be used to guard calls to convertValue in order to
1967/// ensure that we only try to convert viable values. The strategy is that we
1968/// will peel off single element struct and array wrappings to get to an
1969/// underlying value, and convert that value.
1970static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
1971 unsigned VScale = 0) {
1972 if (OldTy == NewTy)
1973 return true;
1974
1975 // For integer types, we can't handle any bit-width differences. This would
1976 // break both vector conversions with extension and introduce endianness
1977 // issues when in conjunction with loads and stores.
1978 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1980 cast<IntegerType>(NewTy)->getBitWidth() &&
1981 "We can't have the same bitwidth for different int types");
1982 return false;
1983 }
1984
1985 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1987
1988 if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
1989 (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
1990 // Conversion is only possible when the size of scalable vectors is known.
1991 if (!VScale)
1992 return false;
1993
1994 // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
1995 // a single domain (either fixed or scalable). Any additional conversion
1996 // between fixed and scalable types is handled through integer types.
1997 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1998 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1999
2000 if (isa<ScalableVectorType>(NewTy)) {
2002 return false;
2003
2004 NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
2005 } else {
2007 return false;
2008
2009 OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
2010 }
2011 }
2012
2013 if (NewSize != OldSize)
2014 return false;
2015 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
2016 return false;
2017
2018 // We can convert pointers to integers and vice-versa. Same for vectors
2019 // of pointers and integers.
2020 OldTy = OldTy->getScalarType();
2021 NewTy = NewTy->getScalarType();
2022 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
2023 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
2024 unsigned OldAS = OldTy->getPointerAddressSpace();
2025 unsigned NewAS = NewTy->getPointerAddressSpace();
2026 // Convert pointers if they are pointers from the same address space or
2027 // different integral (not non-integral) address spaces with the same
2028 // pointer size.
2029 return OldAS == NewAS ||
2030 (!DL.isNonIntegralAddressSpace(OldAS) &&
2031 !DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2033 }
2034
2035 // We can convert integers to integral pointers, but not to non-integral
2036 // pointers.
2037 if (OldTy->isIntegerTy())
2038 return !DL.isNonIntegralPointerType(NewTy);
2039
2040 // We can convert integral pointers to integers, but non-integral pointers
2041 // need to remain pointers.
2042 if (!DL.isNonIntegralPointerType(OldTy))
2043 return NewTy->isIntegerTy();
2044
2045 return false;
2046 }
2047
2048 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
2049 return false;
2050
2051 return true;
2052}
2053
2054/// Generic routine to convert an SSA value to a value of a different
2055/// type.
2056///
2057/// This will try various different casting techniques, such as bitcasts,
2058/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
2059/// two types for viability with this routine.
2060static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2061 Type *NewTy) {
2062 Type *OldTy = V->getType();
2063
2064#ifndef NDEBUG
2065 BasicBlock *BB = IRB.GetInsertBlock();
2066 assert(BB && BB->getParent() && "VScale unknown!");
2067 unsigned VScale = BB->getParent()->getVScaleValue();
2068 assert(canConvertValue(DL, OldTy, NewTy, VScale) &&
2069 "Value not convertable to type");
2070#endif
2071
2072 if (OldTy == NewTy)
2073 return V;
2074
2075 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2076 "Integer types must be the exact same to convert.");
2077
2078 // A variant of bitcast that supports a mixture of fixed and scalable types
2079 // that are know to have the same size.
2080 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2081 Type *InTy = In->getType();
2082 if (InTy == Ty)
2083 return In;
2084
2086 // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> -->
2087 // <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16>
2089 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2090 PoisonValue::get(VTy), In,
2091 IRB.getInt64(0)),
2092 Ty);
2093 }
2094
2096 // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> -->
2097 // <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32>
2099 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2100 IRB.getInt64(0));
2101 }
2102
2103 return IRB.CreateBitCast(In, Ty);
2104 };
2105
2106 // See if we need inttoptr for this type pair. May require additional bitcast.
2107 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2108 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2109 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2110 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2111 // Directly handle i64 to i8*
2112 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2113 NewTy);
2114 }
2115
2116 // See if we need ptrtoint for this type pair. May require additional bitcast.
2117 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2118 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2119 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2120 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2121 // Expand i8* to i64 --> i8* to i64 to i64
2122 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2123 NewTy);
2124 }
2125
2126 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2127 unsigned OldAS = OldTy->getPointerAddressSpace();
2128 unsigned NewAS = NewTy->getPointerAddressSpace();
2129 // To convert pointers with different address spaces (they are already
2130 // checked convertible, i.e. they have the same pointer size), so far we
2131 // cannot use `bitcast` (which has restrict on the same address space) or
2132 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2133 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2134 // size.
2135 if (OldAS != NewAS) {
2136 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2137 return IRB.CreateIntToPtr(
2138 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2139 DL.getIntPtrType(NewTy)),
2140 NewTy);
2141 }
2142 }
2143
2144 return CreateBitCastLike(V, NewTy);
2145}
2146
2147/// Test whether the given slice use can be promoted to a vector.
2148///
2149/// This function is called to test each entry in a partition which is slated
2150/// for a single slice.
2151static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2152 VectorType *Ty,
2153 uint64_t ElementSize,
2154 const DataLayout &DL,
2155 unsigned VScale) {
2156 // First validate the slice offsets.
2157 uint64_t BeginOffset =
2158 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2159 uint64_t BeginIndex = BeginOffset / ElementSize;
2160 if (BeginIndex * ElementSize != BeginOffset ||
2161 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2162 return false;
2163 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2164 uint64_t EndIndex = EndOffset / ElementSize;
2165 if (EndIndex * ElementSize != EndOffset ||
2166 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2167 return false;
2168
2169 assert(EndIndex > BeginIndex && "Empty vector!");
2170 uint64_t NumElements = EndIndex - BeginIndex;
2171 Type *SliceTy = (NumElements == 1)
2172 ? Ty->getElementType()
2173 : FixedVectorType::get(Ty->getElementType(), NumElements);
2174
2175 Type *SplitIntTy =
2176 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2177
2178 Use *U = S.getUse();
2179
2180 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2181 if (MI->isVolatile())
2182 return false;
2183 if (!S.isSplittable())
2184 return false; // Skip any unsplittable intrinsics.
2185 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2186 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2187 return false;
2188 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2189 if (LI->isVolatile())
2190 return false;
2191 Type *LTy = LI->getType();
2192 // Disable vector promotion when there are loads or stores of an FCA.
2193 if (LTy->isStructTy())
2194 return false;
2195 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2196 assert(LTy->isIntegerTy());
2197 LTy = SplitIntTy;
2198 }
2199 if (!canConvertValue(DL, SliceTy, LTy, VScale))
2200 return false;
2201 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2202 if (SI->isVolatile())
2203 return false;
2204 Type *STy = SI->getValueOperand()->getType();
2205 // Disable vector promotion when there are loads or stores of an FCA.
2206 if (STy->isStructTy())
2207 return false;
2208 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2209 assert(STy->isIntegerTy());
2210 STy = SplitIntTy;
2211 }
2212 if (!canConvertValue(DL, STy, SliceTy, VScale))
2213 return false;
2214 } else {
2215 return false;
2216 }
2217
2218 return true;
2219}
2220
2221/// Test whether any vector type in \p CandidateTys is viable for promotion.
2222///
2223/// This implements the necessary checking for \c isVectorPromotionViable over
2224/// all slices of the alloca for the given VectorType.
2225static VectorType *
2227 SmallVectorImpl<VectorType *> &CandidateTys,
2228 bool HaveCommonEltTy, Type *CommonEltTy,
2229 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2230 VectorType *CommonVecPtrTy, unsigned VScale) {
2231 // If we didn't find a vector type, nothing to do here.
2232 if (CandidateTys.empty())
2233 return nullptr;
2234
2235 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2236 // then we should choose it, not some other alternative.
2237 // But, we can't perform a no-op pointer address space change via bitcast,
2238 // so if we didn't have a common pointer element type, bail.
2239 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2240 return nullptr;
2241
2242 // Try to pick the "best" element type out of the choices.
2243 if (!HaveCommonEltTy && HaveVecPtrTy) {
2244 // If there was a pointer element type, there's really only one choice.
2245 CandidateTys.clear();
2246 CandidateTys.push_back(CommonVecPtrTy);
2247 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2248 // Integer-ify vector types.
2249 for (VectorType *&VTy : CandidateTys) {
2250 if (!VTy->getElementType()->isIntegerTy())
2251 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2252 VTy->getContext(), VTy->getScalarSizeInBits())));
2253 }
2254
2255 // Rank the remaining candidate vector types. This is easy because we know
2256 // they're all integer vectors. We sort by ascending number of elements.
2257 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2258 (void)DL;
2259 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261 "Cannot have vector types of different sizes!");
2262 assert(RHSTy->getElementType()->isIntegerTy() &&
2263 "All non-integer types eliminated!");
2264 assert(LHSTy->getElementType()->isIntegerTy() &&
2265 "All non-integer types eliminated!");
2266 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2267 cast<FixedVectorType>(LHSTy)->getNumElements();
2268 };
2269 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2270 (void)DL;
2271 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2272 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2273 "Cannot have vector types of different sizes!");
2274 assert(RHSTy->getElementType()->isIntegerTy() &&
2275 "All non-integer types eliminated!");
2276 assert(LHSTy->getElementType()->isIntegerTy() &&
2277 "All non-integer types eliminated!");
2278 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2279 cast<FixedVectorType>(LHSTy)->getNumElements();
2280 };
2281 llvm::sort(CandidateTys, RankVectorTypesComp);
2282 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2283 CandidateTys.end());
2284 } else {
2285// The only way to have the same element type in every vector type is to
2286// have the same vector type. Check that and remove all but one.
2287#ifndef NDEBUG
2288 for (VectorType *VTy : CandidateTys) {
2289 assert(VTy->getElementType() == CommonEltTy &&
2290 "Unaccounted for element type!");
2291 assert(VTy == CandidateTys[0] &&
2292 "Different vector types with the same element type!");
2293 }
2294#endif
2295 CandidateTys.resize(1);
2296 }
2297
2298 // FIXME: hack. Do we have a named constant for this?
2299 // SDAG SDNode can't have more than 65535 operands.
2300 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2301 return cast<FixedVectorType>(VTy)->getNumElements() >
2302 std::numeric_limits<unsigned short>::max();
2303 });
2304
2305 // Find a vector type viable for promotion by iterating over all slices.
2306 auto *VTy = llvm::find_if(CandidateTys, [&](VectorType *VTy) -> bool {
2307 uint64_t ElementSize =
2308 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2309
2310 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2311 // that aren't byte sized.
2312 if (ElementSize % 8)
2313 return false;
2314 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2315 "vector size not a multiple of element size?");
2316 ElementSize /= 8;
2317
2318 for (const Slice &S : P)
2319 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
2320 return false;
2321
2322 for (const Slice *S : P.splitSliceTails())
2323 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
2324 return false;
2325
2326 return true;
2327 });
2328 return VTy != CandidateTys.end() ? *VTy : nullptr;
2329}
2330
2332 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2333 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2334 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2335 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2336 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2337 [[maybe_unused]] VectorType *OriginalElt =
2338 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2339 // Consider additional vector types where the element type size is a
2340 // multiple of load/store element size.
2341 for (Type *Ty : OtherTys) {
2343 continue;
2344 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2345 // Make a copy of CandidateTys and iterate through it, because we
2346 // might append to CandidateTys in the loop.
2347 for (VectorType *const VTy : CandidateTysCopy) {
2348 // The elements in the copy should remain invariant throughout the loop
2349 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2350 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2351 unsigned ElementSize =
2352 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2353 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2354 VectorSize % TypeSize == 0) {
2355 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2356 CheckCandidateType(NewVTy);
2357 }
2358 }
2359 }
2360
2362 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2363 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2364}
2365
2366/// Test whether the given alloca partitioning and range of slices can be
2367/// promoted to a vector.
2368///
2369/// This is a quick test to check whether we can rewrite a particular alloca
2370/// partition (and its newly formed alloca) into a vector alloca with only
2371/// whole-vector loads and stores such that it could be promoted to a vector
2372/// SSA value. We only can ensure this for a limited set of operations, and we
2373/// don't want to do the rewrites unless we are confident that the result will
2374/// be promotable, so we have an early test here.
2376 unsigned VScale) {
2377 // Collect the candidate types for vector-based promotion. Also track whether
2378 // we have different element types.
2379 SmallVector<VectorType *, 4> CandidateTys;
2380 SetVector<Type *> LoadStoreTys;
2381 SetVector<Type *> DeferredTys;
2382 Type *CommonEltTy = nullptr;
2383 VectorType *CommonVecPtrTy = nullptr;
2384 bool HaveVecPtrTy = false;
2385 bool HaveCommonEltTy = true;
2386 bool HaveCommonVecPtrTy = true;
2387 auto CheckCandidateType = [&](Type *Ty) {
2388 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2389 // Return if bitcast to vectors is different for total size in bits.
2390 if (!CandidateTys.empty()) {
2391 VectorType *V = CandidateTys[0];
2392 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2393 DL.getTypeSizeInBits(V).getFixedValue()) {
2394 CandidateTys.clear();
2395 return;
2396 }
2397 }
2398 CandidateTys.push_back(VTy);
2399 Type *EltTy = VTy->getElementType();
2400
2401 if (!CommonEltTy)
2402 CommonEltTy = EltTy;
2403 else if (CommonEltTy != EltTy)
2404 HaveCommonEltTy = false;
2405
2406 if (EltTy->isPointerTy()) {
2407 HaveVecPtrTy = true;
2408 if (!CommonVecPtrTy)
2409 CommonVecPtrTy = VTy;
2410 else if (CommonVecPtrTy != VTy)
2411 HaveCommonVecPtrTy = false;
2412 }
2413 }
2414 };
2415
2416 // Put load and store types into a set for de-duplication.
2417 for (const Slice &S : P) {
2418 Type *Ty;
2419 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2420 Ty = LI->getType();
2421 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2422 Ty = SI->getValueOperand()->getType();
2423 else
2424 continue;
2425
2426 auto CandTy = Ty->getScalarType();
2427 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2428 S.endOffset() != P.endOffset())) {
2429 DeferredTys.insert(Ty);
2430 continue;
2431 }
2432
2433 LoadStoreTys.insert(Ty);
2434 // Consider any loads or stores that are the exact size of the slice.
2435 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2436 CheckCandidateType(Ty);
2437 }
2438
2439 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2441 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2442 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2443 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2444 return VTy;
2445
2446 CandidateTys.clear();
2448 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2449 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2450 CommonVecPtrTy, VScale);
2451}
2452
2453/// Test whether a slice of an alloca is valid for integer widening.
2454///
2455/// This implements the necessary checking for the \c isIntegerWideningViable
2456/// test below on a single slice of the alloca.
2457static bool isIntegerWideningViableForSlice(const Slice &S,
2458 uint64_t AllocBeginOffset,
2459 Type *AllocaTy,
2460 const DataLayout &DL,
2461 bool &WholeAllocaOp) {
2462 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2463
2464 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2465 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2466
2467 Use *U = S.getUse();
2468
2469 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2470 // larger than other load/store slices (RelEnd > Size). But lifetime are
2471 // always promotable and should not impact other slices' promotability of the
2472 // partition.
2473 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2474 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2475 return true;
2476 }
2477
2478 // We can't reasonably handle cases where the load or store extends past
2479 // the end of the alloca's type and into its padding.
2480 if (RelEnd > Size)
2481 return false;
2482
2483 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2484 if (LI->isVolatile())
2485 return false;
2486 // We can't handle loads that extend past the allocated memory.
2487 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
2488 if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
2489 return false;
2490 // So far, AllocaSliceRewriter does not support widening split slice tails
2491 // in rewriteIntegerLoad.
2492 if (S.beginOffset() < AllocBeginOffset)
2493 return false;
2494 // Note that we don't count vector loads or stores as whole-alloca
2495 // operations which enable integer widening because we would prefer to use
2496 // vector widening instead.
2497 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2498 WholeAllocaOp = true;
2499 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2500 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2501 return false;
2502 } else if (RelBegin != 0 || RelEnd != Size ||
2503 !canConvertValue(DL, AllocaTy, LI->getType())) {
2504 // Non-integer loads need to be convertible from the alloca type so that
2505 // they are promotable.
2506 return false;
2507 }
2508 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2509 Type *ValueTy = SI->getValueOperand()->getType();
2510 if (SI->isVolatile())
2511 return false;
2512 // We can't handle stores that extend past the allocated memory.
2513 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2514 if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
2515 return false;
2516 // So far, AllocaSliceRewriter does not support widening split slice tails
2517 // in rewriteIntegerStore.
2518 if (S.beginOffset() < AllocBeginOffset)
2519 return false;
2520 // Note that we don't count vector loads or stores as whole-alloca
2521 // operations which enable integer widening because we would prefer to use
2522 // vector widening instead.
2523 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2524 WholeAllocaOp = true;
2525 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2526 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2527 return false;
2528 } else if (RelBegin != 0 || RelEnd != Size ||
2529 !canConvertValue(DL, ValueTy, AllocaTy)) {
2530 // Non-integer stores need to be convertible to the alloca type so that
2531 // they are promotable.
2532 return false;
2533 }
2534 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2535 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2536 return false;
2537 if (!S.isSplittable())
2538 return false; // Skip any unsplittable intrinsics.
2539 } else {
2540 return false;
2541 }
2542
2543 return true;
2544}
2545
2546/// Test whether the given alloca partition's integer operations can be
2547/// widened to promotable ones.
2548///
2549/// This is a quick test to check whether we can rewrite the integer loads and
2550/// stores to a particular alloca into wider loads and stores and be able to
2551/// promote the resulting alloca.
2552static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2553 const DataLayout &DL) {
2554 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2555 // Don't create integer types larger than the maximum bitwidth.
2556 if (SizeInBits > IntegerType::MAX_INT_BITS)
2557 return false;
2558
2559 // Don't try to handle allocas with bit-padding.
2560 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2561 return false;
2562
2563 // We need to ensure that an integer type with the appropriate bitwidth can
2564 // be converted to the alloca type, whatever that is. We don't want to force
2565 // the alloca itself to have an integer type if there is a more suitable one.
2566 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2567 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2568 !canConvertValue(DL, IntTy, AllocaTy))
2569 return false;
2570
2571 // While examining uses, we ensure that the alloca has a covering load or
2572 // store. We don't want to widen the integer operations only to fail to
2573 // promote due to some other unsplittable entry (which we may make splittable
2574 // later). However, if there are only splittable uses, go ahead and assume
2575 // that we cover the alloca.
2576 // FIXME: We shouldn't consider split slices that happen to start in the
2577 // partition here...
2578 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2579
2580 for (const Slice &S : P)
2581 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2582 WholeAllocaOp))
2583 return false;
2584
2585 for (const Slice *S : P.splitSliceTails())
2586 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2587 WholeAllocaOp))
2588 return false;
2589
2590 return WholeAllocaOp;
2591}
2592
2593static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2595 const Twine &Name) {
2596 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2597 IntegerType *IntTy = cast<IntegerType>(V->getType());
2598 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2599 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2600 "Element extends past full value");
2601 uint64_t ShAmt = 8 * Offset;
2602 if (DL.isBigEndian())
2603 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2604 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2605 if (ShAmt) {
2606 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2607 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2608 }
2609 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2610 "Cannot extract to a larger integer!");
2611 if (Ty != IntTy) {
2612 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2613 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2614 }
2615 return V;
2616}
2617
2618static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2619 Value *V, uint64_t Offset, const Twine &Name) {
2620 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2621 IntegerType *Ty = cast<IntegerType>(V->getType());
2622 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2623 "Cannot insert a larger integer!");
2624 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2625 if (Ty != IntTy) {
2626 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2627 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2628 }
2629 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2630 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2631 "Element store outside of alloca store");
2632 uint64_t ShAmt = 8 * Offset;
2633 if (DL.isBigEndian())
2634 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2635 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2636 if (ShAmt) {
2637 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2638 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2639 }
2640
2641 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2642 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2643 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2644 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2645 V = IRB.CreateOr(Old, V, Name + ".insert");
2646 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2647 }
2648 return V;
2649}
2650
2651static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2652 unsigned EndIndex, const Twine &Name) {
2653 auto *VecTy = cast<FixedVectorType>(V->getType());
2654 unsigned NumElements = EndIndex - BeginIndex;
2655 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2656
2657 if (NumElements == VecTy->getNumElements())
2658 return V;
2659
2660 if (NumElements == 1) {
2661 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2662 Name + ".extract");
2663 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2664 return V;
2665 }
2666
2667 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2668 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2669 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2670 return V;
2671}
2672
2673static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2674 unsigned BeginIndex, const Twine &Name) {
2675 VectorType *VecTy = cast<VectorType>(Old->getType());
2676 assert(VecTy && "Can only insert a vector into a vector");
2677
2678 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2679 if (!Ty) {
2680 // Single element to insert.
2681 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2682 Name + ".insert");
2683 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2684 return V;
2685 }
2686
2689 "Too many elements!");
2692 assert(V->getType() == VecTy && "Vector type mismatch");
2693 return V;
2694 }
2695 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2696
2697 // When inserting a smaller vector into the larger to store, we first
2698 // use a shuffle vector to widen it with undef elements, and then
2699 // a second shuffle vector to select between the loaded vector and the
2700 // incoming vector.
2702 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2703 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2704 if (i >= BeginIndex && i < EndIndex)
2705 Mask.push_back(i - BeginIndex);
2706 else
2707 Mask.push_back(-1);
2708 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2709 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2710
2713 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2714 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2715
2716 // No profiling support for vector selects.
2717 V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
2718 DEBUG_TYPE, Name + "blend");
2719
2720 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2721 return V;
2722}
2723
2724/// This function takes two vector values and combines them into a single vector
2725/// by concatenating their elements. The function handles:
2726///
2727/// 1. Element type mismatch: If either vector's element type differs from
2728/// NewAIEltType, the function bitcasts the vector to use NewAIEltType while
2729/// preserving the total bit width (adjusting the number of elements
2730/// accordingly).
2731///
2732/// 2. Size mismatch: After transforming the vectors to have the desired element
2733/// type, if the two vectors have different numbers of elements, the smaller
2734/// vector is extended with poison values to match the size of the larger
2735/// vector before concatenation.
2736///
2737/// 3. Concatenation: The vectors are merged using a shuffle operation that
2738/// places all elements of V0 first, followed by all elements of V1.
2739///
2740/// \param V0 The first vector to merge (must be a vector type)
2741/// \param V1 The second vector to merge (must be a vector type)
2742/// \param DL The data layout for size calculations
2743/// \param NewAIEltTy The desired element type for the result vector
2744/// \param Builder IRBuilder for creating new instructions
2745/// \return A new vector containing all elements from V0 followed by all
2746/// elements from V1
2748 Type *NewAIEltTy, IRBuilder<> &Builder) {
2749 // V0 and V1 are vectors
2750 // Create a new vector type with combined elements
2751 // Use ShuffleVector to concatenate the vectors
2752 auto *VecType0 = cast<FixedVectorType>(V0->getType());
2753 auto *VecType1 = cast<FixedVectorType>(V1->getType());
2754
2755 // If V0/V1 element types are different from NewAllocaElementType,
2756 // we need to introduce bitcasts before merging them
2757 auto BitcastIfNeeded = [&](Value *&V, FixedVectorType *&VecType,
2758 const char *DebugName) {
2759 Type *EltType = VecType->getElementType();
2760 if (EltType != NewAIEltTy) {
2761 // Calculate new number of elements to maintain same bit width
2762 unsigned TotalBits =
2763 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2764 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2765
2766 auto *NewVecType = FixedVectorType::get(NewAIEltTy, NewNumElts);
2767 V = Builder.CreateBitCast(V, NewVecType);
2768 VecType = NewVecType;
2769 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2770 }
2771 };
2772
2773 BitcastIfNeeded(V0, VecType0, "V0");
2774 BitcastIfNeeded(V1, VecType1, "V1");
2775
2776 unsigned NumElts0 = VecType0->getNumElements();
2777 unsigned NumElts1 = VecType1->getNumElements();
2778
2779 SmallVector<int, 16> ShuffleMask;
2780
2781 if (NumElts0 == NumElts1) {
2782 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2783 ShuffleMask.push_back(i);
2784 } else {
2785 // If two vectors have different sizes, we need to extend
2786 // the smaller vector to the size of the larger vector.
2787 unsigned SmallSize = std::min(NumElts0, NumElts1);
2788 unsigned LargeSize = std::max(NumElts0, NumElts1);
2789 bool IsV0Smaller = NumElts0 < NumElts1;
2790 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2791 SmallVector<int, 16> ExtendMask;
2792 for (unsigned i = 0; i < SmallSize; ++i)
2793 ExtendMask.push_back(i);
2794 for (unsigned i = SmallSize; i < LargeSize; ++i)
2795 ExtendMask.push_back(PoisonMaskElem);
2796 ExtendedVec = Builder.CreateShuffleVector(
2797 ExtendedVec, PoisonValue::get(ExtendedVec->getType()), ExtendMask);
2798 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2799 for (unsigned i = 0; i < NumElts0; ++i)
2800 ShuffleMask.push_back(i);
2801 for (unsigned i = 0; i < NumElts1; ++i)
2802 ShuffleMask.push_back(LargeSize + i);
2803 }
2804
2805 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2806}
2807
2808namespace {
2809
2810/// Visitor to rewrite instructions using p particular slice of an alloca
2811/// to use a new alloca.
2812///
2813/// Also implements the rewriting to vector-based accesses when the partition
2814/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2815/// lives here.
2816class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2817 // Befriend the base class so it can delegate to private visit methods.
2818 friend class InstVisitor<AllocaSliceRewriter, bool>;
2819
2820 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2821
2822 const DataLayout &DL;
2823 AllocaSlices &AS;
2824 SROA &Pass;
2825 AllocaInst &OldAI, &NewAI;
2826 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2827 Type *NewAllocaTy;
2828
2829 // This is a convenience and flag variable that will be null unless the new
2830 // alloca's integer operations should be widened to this integer type due to
2831 // passing isIntegerWideningViable above. If it is non-null, the desired
2832 // integer type will be stored here for easy access during rewriting.
2833 IntegerType *IntTy;
2834
2835 // If we are rewriting an alloca partition which can be written as pure
2836 // vector operations, we stash extra information here. When VecTy is
2837 // non-null, we have some strict guarantees about the rewritten alloca:
2838 // - The new alloca is exactly the size of the vector type here.
2839 // - The accesses all either map to the entire vector or to a single
2840 // element.
2841 // - The set of accessing instructions is only one of those handled above
2842 // in isVectorPromotionViable. Generally these are the same access kinds
2843 // which are promotable via mem2reg.
2844 VectorType *VecTy;
2845 Type *ElementTy;
2846 uint64_t ElementSize;
2847
2848 // The original offset of the slice currently being rewritten relative to
2849 // the original alloca.
2850 uint64_t BeginOffset = 0;
2851 uint64_t EndOffset = 0;
2852
2853 // The new offsets of the slice currently being rewritten relative to the
2854 // original alloca.
2855 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2856
2857 uint64_t SliceSize = 0;
2858 bool IsSplittable = false;
2859 bool IsSplit = false;
2860 Use *OldUse = nullptr;
2861 Instruction *OldPtr = nullptr;
2862
2863 // Track post-rewrite users which are PHI nodes and Selects.
2864 SmallSetVector<PHINode *, 8> &PHIUsers;
2865 SmallSetVector<SelectInst *, 8> &SelectUsers;
2866
2867 // Utility IR builder, whose name prefix is setup for each visited use, and
2868 // the insertion point is set to point to the user.
2869 IRBuilderTy IRB;
2870
2871 // Return the new alloca, addrspacecasted if required to avoid changing the
2872 // addrspace of a volatile access.
2873 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2874 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2875 return &NewAI;
2876
2877 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2878 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2879 }
2880
2881public:
2882 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2883 AllocaInst &OldAI, AllocaInst &NewAI,
2884 uint64_t NewAllocaBeginOffset,
2885 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2886 VectorType *PromotableVecTy,
2887 SmallSetVector<PHINode *, 8> &PHIUsers,
2888 SmallSetVector<SelectInst *, 8> &SelectUsers)
2889 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2890 NewAllocaBeginOffset(NewAllocaBeginOffset),
2891 NewAllocaEndOffset(NewAllocaEndOffset),
2892 NewAllocaTy(NewAI.getAllocatedType()),
2893 IntTy(
2894 IsIntegerPromotable
2895 ? Type::getIntNTy(NewAI.getContext(),
2896 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2897 .getFixedValue())
2898 : nullptr),
2899 VecTy(PromotableVecTy),
2900 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2901 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2902 : 0),
2903 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2904 IRB(NewAI.getContext(), ConstantFolder()) {
2905 if (VecTy) {
2906 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2907 "Only multiple-of-8 sized vector elements are viable");
2908 ++NumVectorized;
2909 }
2910 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2911 }
2912
2913 bool visit(AllocaSlices::const_iterator I) {
2914 bool CanSROA = true;
2915 BeginOffset = I->beginOffset();
2916 EndOffset = I->endOffset();
2917 IsSplittable = I->isSplittable();
2918 IsSplit =
2919 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2920 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2921 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2922 LLVM_DEBUG(dbgs() << "\n");
2923
2924 // Compute the intersecting offset range.
2925 assert(BeginOffset < NewAllocaEndOffset);
2926 assert(EndOffset > NewAllocaBeginOffset);
2927 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2928 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2929
2930 SliceSize = NewEndOffset - NewBeginOffset;
2931 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2932 << ") NewBegin:(" << NewBeginOffset << ", "
2933 << NewEndOffset << ") NewAllocaBegin:("
2934 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2935 << ")\n");
2936 assert(IsSplit || NewBeginOffset == BeginOffset);
2937 OldUse = I->getUse();
2938 OldPtr = cast<Instruction>(OldUse->get());
2939
2940 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2941 IRB.SetInsertPoint(OldUserI);
2942 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2943 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2944 Twine(BeginOffset) + ".");
2945
2946 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2947 if (VecTy || IntTy)
2948 assert(CanSROA);
2949 return CanSROA;
2950 }
2951
2952 /// Attempts to rewrite a partition using tree-structured merge optimization.
2953 ///
2954 /// This function analyzes a partition to determine if it can be optimized
2955 /// using a tree-structured merge pattern, where multiple non-overlapping
2956 /// stores completely fill an alloca. And there is no load from the alloca in
2957 /// the middle of the stores. Such patterns can be optimized by eliminating
2958 /// the intermediate stores and directly constructing the final vector by
2959 /// using shufflevectors.
2960 ///
2961 /// Example transformation:
2962 /// Before: (stores do not have to be in order)
2963 /// %alloca = alloca <8 x float>
2964 /// store <2 x float> %val0, ptr %alloca ; offset 0-1
2965 /// store <2 x float> %val2, ptr %alloca+16 ; offset 4-5
2966 /// store <2 x float> %val1, ptr %alloca+8 ; offset 2-3
2967 /// store <2 x float> %val3, ptr %alloca+24 ; offset 6-7
2968 ///
2969 /// After:
2970 /// %alloca = alloca <8 x float>
2971 /// %shuffle0 = shufflevector %val0, %val1, <4 x i32> <i32 0, i32 1, i32 2,
2972 /// i32 3>
2973 /// %shuffle1 = shufflevector %val2, %val3, <4 x i32> <i32 0, i32 1, i32 2,
2974 /// i32 3>
2975 /// %shuffle2 = shufflevector %shuffle0, %shuffle1, <8 x i32> <i32 0, i32 1,
2976 /// i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2977 /// store %shuffle2, ptr %alloca
2978 ///
2979 /// The optimization looks for partitions that:
2980 /// 1. Have no overlapping split slice tails
2981 /// 2. Contain non-overlapping stores that cover the entire alloca
2982 /// 3. Have exactly one load that reads the complete alloca structure and not
2983 /// in the middle of the stores (TODO: maybe we can relax the constraint
2984 /// about reading the entire alloca structure)
2985 ///
2986 /// \param P The partition to analyze and potentially rewrite
2987 /// \return An optional vector of values that were deleted during the rewrite
2988 /// process, or std::nullopt if the partition cannot be optimized
2989 /// using tree-structured merge
2990 std::optional<SmallVector<Value *, 4>>
2991 rewriteTreeStructuredMerge(Partition &P) {
2992 // No tail slices that overlap with the partition
2993 if (P.splitSliceTails().size() > 0)
2994 return std::nullopt;
2995
2996 SmallVector<Value *, 4> DeletedValues;
2997 LoadInst *TheLoad = nullptr;
2998
2999 // Structure to hold store information
3000 struct StoreInfo {
3001 StoreInst *Store;
3002 uint64_t BeginOffset;
3003 uint64_t EndOffset;
3004 Value *StoredValue;
3005 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
3006 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3007 };
3008
3009 SmallVector<StoreInfo, 4> StoreInfos;
3010
3011 // If the new alloca is a fixed vector type, we use its element type as the
3012 // allocated element type, otherwise we use i8 as the allocated element
3013 Type *AllocatedEltTy =
3015 ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
3016 : Type::getInt8Ty(NewAI.getContext());
3017 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
3018
3019 // Helper to check if a type is
3020 // 1. A fixed vector type
3021 // 2. The element type is not a pointer
3022 // 3. The element type size is byte-aligned
3023 // We only handle the cases that the ld/st meet these conditions
3024 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
3025 auto *FixedVecTy = dyn_cast<FixedVectorType>(Ty);
3026 return FixedVecTy &&
3027 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3028 !FixedVecTy->getElementType()->isPointerTy();
3029 };
3030
3031 for (Slice &S : P) {
3032 auto *User = cast<Instruction>(S.getUse()->getUser());
3033 if (auto *LI = dyn_cast<LoadInst>(User)) {
3034 // Do not handle the case if
3035 // 1. There is more than one load
3036 // 2. The load is volatile
3037 // 3. The load does not read the entire alloca structure
3038 // 4. The load does not meet the conditions in the helper function
3039 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
3040 S.beginOffset() != NewAllocaBeginOffset ||
3041 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
3042 return std::nullopt;
3043 TheLoad = LI;
3044 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
3045 // Do not handle the case if
3046 // 1. The store does not meet the conditions in the helper function
3047 // 2. The store is volatile
3048 // 3. The total store size is not a multiple of the allocated element
3049 // type size
3050 if (!IsTypeValidForTreeStructuredMerge(
3051 SI->getValueOperand()->getType()) ||
3052 SI->isVolatile())
3053 return std::nullopt;
3054 auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
3055 unsigned NumElts = VecTy->getNumElements();
3056 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
3057 if (NumElts * EltSize % AllocatedEltTySize != 0)
3058 return std::nullopt;
3059 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
3060 SI->getValueOperand());
3061 } else {
3062 // If we have instructions other than load and store, we cannot do the
3063 // tree structured merge
3064 return std::nullopt;
3065 }
3066 }
3067 // If we do not have any load, we cannot do the tree structured merge
3068 if (!TheLoad)
3069 return std::nullopt;
3070
3071 // If we do not have multiple stores, we cannot do the tree structured merge
3072 if (StoreInfos.size() < 2)
3073 return std::nullopt;
3074
3075 // Stores should not overlap and should cover the whole alloca
3076 // Sort by begin offset
3077 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
3078 return A.BeginOffset < B.BeginOffset;
3079 });
3080
3081 // Check for overlaps and coverage
3082 uint64_t ExpectedStart = NewAllocaBeginOffset;
3083 for (auto &StoreInfo : StoreInfos) {
3084 uint64_t BeginOff = StoreInfo.BeginOffset;
3085 uint64_t EndOff = StoreInfo.EndOffset;
3086
3087 // Check for gap or overlap
3088 if (BeginOff != ExpectedStart)
3089 return std::nullopt;
3090
3091 ExpectedStart = EndOff;
3092 }
3093 // Check that stores cover the entire alloca
3094 if (ExpectedStart != NewAllocaEndOffset)
3095 return std::nullopt;
3096
3097 // Stores should be in the same basic block
3098 // The load should not be in the middle of the stores
3099 // Note:
3100 // If the load is in a different basic block with the stores, we can still
3101 // do the tree structured merge. This is because we do not have the
3102 // store->load forwarding here. The merged vector will be stored back to
3103 // NewAI and the new load will load from NewAI. The forwarding will be
3104 // handled later when we try to promote NewAI.
3105 BasicBlock *LoadBB = TheLoad->getParent();
3106 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3107
3108 for (auto &StoreInfo : StoreInfos) {
3109 if (StoreInfo.Store->getParent() != StoreBB)
3110 return std::nullopt;
3111 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3112 return std::nullopt;
3113 }
3114
3115 // If we reach here, the partition can be merged with a tree structured
3116 // merge
3117 LLVM_DEBUG({
3118 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
3119 << "\n Ordered stores:\n";
3120 for (auto [i, Info] : enumerate(StoreInfos))
3121 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
3122 << Info.EndOffset << ") \tStore: " << *Info.Store
3123 << "\tValue: " << *Info.StoredValue << "\n";
3124 });
3125
3126 // Instead of having these stores, we merge all the stored values into a
3127 // vector and store the merged value into the alloca
3128 std::queue<Value *> VecElements;
3129 IRBuilder<> Builder(StoreInfos.back().Store);
3130 for (const auto &Info : StoreInfos) {
3131 DeletedValues.push_back(Info.Store);
3132 VecElements.push(Info.StoredValue);
3133 }
3134
3135 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3136 while (VecElements.size() > 1) {
3137 const auto NumElts = VecElements.size();
3138 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3139 Value *V0 = VecElements.front();
3140 VecElements.pop();
3141 Value *V1 = VecElements.front();
3142 VecElements.pop();
3143 Value *Merged = mergeTwoVectors(V0, V1, DL, AllocatedEltTy, Builder);
3144 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3145 VecElements.push(Merged);
3146 }
3147 if (NumElts % 2 == 1) {
3148 Value *V = VecElements.front();
3149 VecElements.pop();
3150 VecElements.push(V);
3151 }
3152 }
3153
3154 // Store the merged value into the alloca
3155 Value *MergedValue = VecElements.front();
3156 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3157
3158 IRBuilder<> LoadBuilder(TheLoad);
3159 TheLoad->replaceAllUsesWith(LoadBuilder.CreateAlignedLoad(
3160 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3161 TheLoad->getName() + ".sroa.new.load"));
3162 DeletedValues.push_back(TheLoad);
3163
3164 return DeletedValues;
3165 }
3166
3167private:
3168 // Make sure the other visit overloads are visible.
3169 using Base::visit;
3170
3171 // Every instruction which can end up as a user must have a rewrite rule.
3172 bool visitInstruction(Instruction &I) {
3173 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3174 llvm_unreachable("No rewrite rule for this instruction!");
3175 }
3176
3177 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
3178 // Note that the offset computation can use BeginOffset or NewBeginOffset
3179 // interchangeably for unsplit slices.
3180 assert(IsSplit || BeginOffset == NewBeginOffset);
3181 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3182
3183 StringRef OldName = OldPtr->getName();
3184 // Skip through the last '.sroa.' component of the name.
3185 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3186 if (LastSROAPrefix != StringRef::npos) {
3187 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3188 // Look for an SROA slice index.
3189 size_t IndexEnd = OldName.find_first_not_of("0123456789");
3190 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3191 // Strip the index and look for the offset.
3192 OldName = OldName.substr(IndexEnd + 1);
3193 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
3194 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3195 // Strip the offset.
3196 OldName = OldName.substr(OffsetEnd + 1);
3197 }
3198 }
3199 // Strip any SROA suffixes as well.
3200 OldName = OldName.substr(0, OldName.find(".sroa_"));
3201
3202 return getAdjustedPtr(IRB, DL, &NewAI,
3203 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
3204 PointerTy, Twine(OldName) + ".");
3205 }
3206
3207 /// Compute suitable alignment to access this slice of the *new*
3208 /// alloca.
3209 ///
3210 /// You can optionally pass a type to this routine and if that type's ABI
3211 /// alignment is itself suitable, this will return zero.
3212 Align getSliceAlign() {
3213 return commonAlignment(NewAI.getAlign(),
3214 NewBeginOffset - NewAllocaBeginOffset);
3215 }
3216
3217 unsigned getIndex(uint64_t Offset) {
3218 assert(VecTy && "Can only call getIndex when rewriting a vector");
3219 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3220 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3221 uint32_t Index = RelOffset / ElementSize;
3222 assert(Index * ElementSize == RelOffset);
3223 return Index;
3224 }
3225
3226 void deleteIfTriviallyDead(Value *V) {
3229 Pass.DeadInsts.push_back(I);
3230 }
3231
3232 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3233 unsigned BeginIndex = getIndex(NewBeginOffset);
3234 unsigned EndIndex = getIndex(NewEndOffset);
3235 assert(EndIndex > BeginIndex && "Empty vector!");
3236
3237 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3238 NewAI.getAlign(), "load");
3239
3240 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3241 LLVMContext::MD_access_group});
3242 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3243 }
3244
3245 Value *rewriteIntegerLoad(LoadInst &LI) {
3246 assert(IntTy && "We cannot insert an integer to the alloca");
3247 assert(!LI.isVolatile());
3248 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3249 NewAI.getAlign(), "load");
3250 V = convertValue(DL, IRB, V, IntTy);
3251 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3252 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3253 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3254 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3255 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
3256 }
3257 // It is possible that the extracted type is not the load type. This
3258 // happens if there is a load past the end of the alloca, and as
3259 // a consequence the slice is narrower but still a candidate for integer
3260 // lowering. To handle this case, we just zero extend the extracted
3261 // integer.
3262 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
3263 "Can only handle an extract for an overly wide load");
3264 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
3265 V = IRB.CreateZExt(V, LI.getType());
3266 return V;
3267 }
3268
3269 bool visitLoadInst(LoadInst &LI) {
3270 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3271 Value *OldOp = LI.getOperand(0);
3272 assert(OldOp == OldPtr);
3273
3274 AAMDNodes AATags = LI.getAAMetadata();
3275
3276 unsigned AS = LI.getPointerAddressSpace();
3277
3278 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3279 : LI.getType();
3280 bool IsPtrAdjusted = false;
3281 Value *V;
3282 if (VecTy) {
3283 V = rewriteVectorizedLoadInst(LI);
3284 } else if (IntTy && LI.getType()->isIntegerTy()) {
3285 V = rewriteIntegerLoad(LI);
3286 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3287 NewEndOffset == NewAllocaEndOffset &&
3288 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
3289 (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
3290 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3291 !LI.isVolatile()))) {
3292 Value *NewPtr =
3293 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3294 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3295 NewAI.getAlign(), LI.isVolatile(),
3296 LI.getName());
3297 if (LI.isVolatile())
3298 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3299 if (NewLI->isAtomic())
3300 NewLI->setAlignment(LI.getAlign());
3301
3302 // Copy any metadata that is valid for the new load. This may require
3303 // conversion to a different kind of metadata, e.g. !nonnull might change
3304 // to !range or vice versa.
3305 copyMetadataForLoad(*NewLI, LI);
3306
3307 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
3308 if (AATags)
3309 NewLI->setAAMetadata(AATags.adjustForAccess(
3310 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3311
3312 // Try to preserve nonnull metadata
3313 V = NewLI;
3314
3315 // If this is an integer load past the end of the slice (which means the
3316 // bytes outside the slice are undef or this load is dead) just forcibly
3317 // fix the integer size with correct handling of endianness.
3318 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3319 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3320 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3321 V = IRB.CreateZExt(V, TITy, "load.ext");
3322 if (DL.isBigEndian())
3323 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3324 "endian_shift");
3325 }
3326 } else {
3327 Type *LTy = IRB.getPtrTy(AS);
3328 LoadInst *NewLI =
3329 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3330 getSliceAlign(), LI.isVolatile(), LI.getName());
3331
3332 if (AATags)
3333 NewLI->setAAMetadata(AATags.adjustForAccess(
3334 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3335
3336 if (LI.isVolatile())
3337 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3338 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3339 LLVMContext::MD_access_group});
3340
3341 V = NewLI;
3342 IsPtrAdjusted = true;
3343 }
3344 V = convertValue(DL, IRB, V, TargetTy);
3345
3346 if (IsSplit) {
3347 assert(!LI.isVolatile());
3348 assert(LI.getType()->isIntegerTy() &&
3349 "Only integer type loads and stores are split");
3350 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3351 "Split load isn't smaller than original load");
3352 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
3353 "Non-byte-multiple bit width");
3354 // Move the insertion point just past the load so that we can refer to it.
3355 BasicBlock::iterator LIIt = std::next(LI.getIterator());
3356 // Ensure the insertion point comes before any debug-info immediately
3357 // after the load, so that variable values referring to the load are
3358 // dominated by it.
3359 LIIt.setHeadBit(true);
3360 IRB.SetInsertPoint(LI.getParent(), LIIt);
3361 // Create a placeholder value with the same type as LI to use as the
3362 // basis for the new value. This allows us to replace the uses of LI with
3363 // the computed value, and then replace the placeholder with LI, leaving
3364 // LI only used for this computation.
3365 Value *Placeholder =
3366 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
3367 false, Align(1));
3368 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3369 "insert");
3370 LI.replaceAllUsesWith(V);
3371 Placeholder->replaceAllUsesWith(&LI);
3372 Placeholder->deleteValue();
3373 } else {
3374 LI.replaceAllUsesWith(V);
3375 }
3376
3377 Pass.DeadInsts.push_back(&LI);
3378 deleteIfTriviallyDead(OldOp);
3379 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3380 return !LI.isVolatile() && !IsPtrAdjusted;
3381 }
3382
3383 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3384 AAMDNodes AATags) {
3385 // Capture V for the purpose of debug-info accounting once it's converted
3386 // to a vector store.
3387 Value *OrigV = V;
3388 if (V->getType() != VecTy) {
3389 unsigned BeginIndex = getIndex(NewBeginOffset);
3390 unsigned EndIndex = getIndex(NewEndOffset);
3391 assert(EndIndex > BeginIndex && "Empty vector!");
3392 unsigned NumElements = EndIndex - BeginIndex;
3393 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3394 "Too many elements!");
3395 Type *SliceTy = (NumElements == 1)
3396 ? ElementTy
3397 : FixedVectorType::get(ElementTy, NumElements);
3398 if (V->getType() != SliceTy)
3399 V = convertValue(DL, IRB, V, SliceTy);
3400
3401 // Mix in the existing elements.
3402 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3403 NewAI.getAlign(), "load");
3404 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3405 }
3406 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3407 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3408 LLVMContext::MD_access_group});
3409 if (AATags)
3410 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3411 V->getType(), DL));
3412 Pass.DeadInsts.push_back(&SI);
3413
3414 // NOTE: Careful to use OrigV rather than V.
3415 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3416 Store, Store->getPointerOperand(), OrigV, DL);
3417 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3418 return true;
3419 }
3420
3421 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3422 assert(IntTy && "We cannot extract an integer from the alloca");
3423 assert(!SI.isVolatile());
3424 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3425 IntTy->getBitWidth()) {
3426 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3427 NewAI.getAlign(), "oldload");
3428 Old = convertValue(DL, IRB, Old, IntTy);
3429 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3430 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3431 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3432 }
3433 V = convertValue(DL, IRB, V, NewAllocaTy);
3434 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3435 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3436 LLVMContext::MD_access_group});
3437 if (AATags)
3438 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3439 V->getType(), DL));
3440
3441 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3442 Store, Store->getPointerOperand(),
3443 Store->getValueOperand(), DL);
3444
3445 Pass.DeadInsts.push_back(&SI);
3446 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3447 return true;
3448 }
3449
3450 bool visitStoreInst(StoreInst &SI) {
3451 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3452 Value *OldOp = SI.getOperand(1);
3453 assert(OldOp == OldPtr);
3454
3455 AAMDNodes AATags = SI.getAAMetadata();
3456 Value *V = SI.getValueOperand();
3457
3458 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3459 // alloca that should be re-examined after promoting this alloca.
3460 if (V->getType()->isPointerTy())
3461 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3462 Pass.PostPromotionWorklist.insert(AI);
3463
3464 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3465 if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
3466 assert(!SI.isVolatile());
3467 assert(V->getType()->isIntegerTy() &&
3468 "Only integer type loads and stores are split");
3469 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3470 "Non-byte-multiple bit width");
3471 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3472 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3473 "extract");
3474 }
3475
3476 if (VecTy)
3477 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3478 if (IntTy && V->getType()->isIntegerTy())
3479 return rewriteIntegerStore(V, SI, AATags);
3480
3481 StoreInst *NewSI;
3482 if (NewBeginOffset == NewAllocaBeginOffset &&
3483 NewEndOffset == NewAllocaEndOffset &&
3484 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3485 V = convertValue(DL, IRB, V, NewAllocaTy);
3486 Value *NewPtr =
3487 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3488
3489 NewSI =
3490 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3491 } else {
3492 unsigned AS = SI.getPointerAddressSpace();
3493 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3494 NewSI =
3495 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3496 }
3497 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3498 LLVMContext::MD_access_group});
3499 if (AATags)
3500 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3501 V->getType(), DL));
3502 if (SI.isVolatile())
3503 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3504 if (NewSI->isAtomic())
3505 NewSI->setAlignment(SI.getAlign());
3506
3507 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3508 NewSI, NewSI->getPointerOperand(),
3509 NewSI->getValueOperand(), DL);
3510
3511 Pass.DeadInsts.push_back(&SI);
3512 deleteIfTriviallyDead(OldOp);
3513
3514 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3515 return NewSI->getPointerOperand() == &NewAI &&
3516 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3517 !SI.isVolatile();
3518 }
3519
3520 /// Compute an integer value from splatting an i8 across the given
3521 /// number of bytes.
3522 ///
3523 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3524 /// call this routine.
3525 /// FIXME: Heed the advice above.
3526 ///
3527 /// \param V The i8 value to splat.
3528 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3529 Value *getIntegerSplat(Value *V, unsigned Size) {
3530 assert(Size > 0 && "Expected a positive number of bytes.");
3531 IntegerType *VTy = cast<IntegerType>(V->getType());
3532 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3533 if (Size == 1)
3534 return V;
3535
3536 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3537 V = IRB.CreateMul(
3538 IRB.CreateZExt(V, SplatIntTy, "zext"),
3539 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3540 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3541 SplatIntTy)),
3542 "isplat");
3543 return V;
3544 }
3545
3546 /// Compute a vector splat for a given element value.
3547 Value *getVectorSplat(Value *V, unsigned NumElements) {
3548 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3549 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3550 return V;
3551 }
3552
3553 bool visitMemSetInst(MemSetInst &II) {
3554 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3555 assert(II.getRawDest() == OldPtr);
3556
3557 AAMDNodes AATags = II.getAAMetadata();
3558
3559 // If the memset has a variable size, it cannot be split, just adjust the
3560 // pointer to the new alloca.
3561 if (!isa<ConstantInt>(II.getLength())) {
3562 assert(!IsSplit);
3563 assert(NewBeginOffset == BeginOffset);
3564 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3565 II.setDestAlignment(getSliceAlign());
3566 // In theory we should call migrateDebugInfo here. However, we do not
3567 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3568 // constant geps, or storing a variable number of bytes.
3570 "AT: Unexpected link to non-const GEP");
3571 deleteIfTriviallyDead(OldPtr);
3572 return false;
3573 }
3574
3575 // Record this instruction for deletion.
3576 Pass.DeadInsts.push_back(&II);
3577
3578 Type *AllocaTy = NewAI.getAllocatedType();
3579 Type *ScalarTy = AllocaTy->getScalarType();
3580
3581 const bool CanContinue = [&]() {
3582 if (VecTy || IntTy)
3583 return true;
3584 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3585 return false;
3586 // Length must be in range for FixedVectorType.
3587 auto *C = cast<ConstantInt>(II.getLength());
3588 const uint64_t Len = C->getLimitedValue();
3589 if (Len > std::numeric_limits<unsigned>::max())
3590 return false;
3591 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3592 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3593 return canConvertValue(DL, SrcTy, AllocaTy) &&
3594 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3595 }();
3596
3597 // If this doesn't map cleanly onto the alloca type, and that type isn't
3598 // a single value type, just emit a memset.
3599 if (!CanContinue) {
3600 Type *SizeTy = II.getLength()->getType();
3601 unsigned Sz = NewEndOffset - NewBeginOffset;
3602 Constant *Size = ConstantInt::get(SizeTy, Sz);
3603 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3604 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3605 MaybeAlign(getSliceAlign()), II.isVolatile()));
3606 if (AATags)
3607 New->setAAMetadata(
3608 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3609
3610 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3611 New, New->getRawDest(), nullptr, DL);
3612
3613 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3614 return false;
3615 }
3616
3617 // If we can represent this as a simple value, we have to build the actual
3618 // value to store, which requires expanding the byte present in memset to
3619 // a sensible representation for the alloca type. This is essentially
3620 // splatting the byte to a sufficiently wide integer, splatting it across
3621 // any desired vector width, and bitcasting to the final type.
3622 Value *V;
3623
3624 if (VecTy) {
3625 // If this is a memset of a vectorized alloca, insert it.
3626 assert(ElementTy == ScalarTy);
3627
3628 unsigned BeginIndex = getIndex(NewBeginOffset);
3629 unsigned EndIndex = getIndex(NewEndOffset);
3630 assert(EndIndex > BeginIndex && "Empty vector!");
3631 unsigned NumElements = EndIndex - BeginIndex;
3632 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3633 "Too many elements!");
3634
3635 Value *Splat = getIntegerSplat(
3636 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3637 Splat = convertValue(DL, IRB, Splat, ElementTy);
3638 if (NumElements > 1)
3639 Splat = getVectorSplat(Splat, NumElements);
3640
3641 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3642 NewAI.getAlign(), "oldload");
3643 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3644 } else if (IntTy) {
3645 // If this is a memset on an alloca where we can widen stores, insert the
3646 // set integer.
3647 assert(!II.isVolatile());
3648
3649 uint64_t Size = NewEndOffset - NewBeginOffset;
3650 V = getIntegerSplat(II.getValue(), Size);
3651
3652 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3653 EndOffset != NewAllocaBeginOffset)) {
3654 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3655 NewAI.getAlign(), "oldload");
3656 Old = convertValue(DL, IRB, Old, IntTy);
3657 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3658 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3659 } else {
3660 assert(V->getType() == IntTy &&
3661 "Wrong type for an alloca wide integer!");
3662 }
3663 V = convertValue(DL, IRB, V, AllocaTy);
3664 } else {
3665 // Established these invariants above.
3666 assert(NewBeginOffset == NewAllocaBeginOffset);
3667 assert(NewEndOffset == NewAllocaEndOffset);
3668
3669 V = getIntegerSplat(II.getValue(),
3670 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3671 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3672 V = getVectorSplat(
3673 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3674
3675 V = convertValue(DL, IRB, V, AllocaTy);
3676 }
3677
3678 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3679 StoreInst *New =
3680 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3681 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3682 LLVMContext::MD_access_group});
3683 if (AATags)
3684 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3685 V->getType(), DL));
3686
3687 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3688 New, New->getPointerOperand(), V, DL);
3689
3690 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3691 return !II.isVolatile();
3692 }
3693
3694 bool visitMemTransferInst(MemTransferInst &II) {
3695 // Rewriting of memory transfer instructions can be a bit tricky. We break
3696 // them into two categories: split intrinsics and unsplit intrinsics.
3697
3698 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3699
3700 AAMDNodes AATags = II.getAAMetadata();
3701
3702 bool IsDest = &II.getRawDestUse() == OldUse;
3703 assert((IsDest && II.getRawDest() == OldPtr) ||
3704 (!IsDest && II.getRawSource() == OldPtr));
3705
3706 Align SliceAlign = getSliceAlign();
3707 // For unsplit intrinsics, we simply modify the source and destination
3708 // pointers in place. This isn't just an optimization, it is a matter of
3709 // correctness. With unsplit intrinsics we may be dealing with transfers
3710 // within a single alloca before SROA ran, or with transfers that have
3711 // a variable length. We may also be dealing with memmove instead of
3712 // memcpy, and so simply updating the pointers is the necessary for us to
3713 // update both source and dest of a single call.
3714 if (!IsSplittable) {
3715 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3716 if (IsDest) {
3717 // Update the address component of linked dbg.assigns.
3718 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(&II)) {
3719 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3720 DbgAssign->getAddress() == II.getDest())
3721 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3722 }
3723 II.setDest(AdjustedPtr);
3724 II.setDestAlignment(SliceAlign);
3725 } else {
3726 II.setSource(AdjustedPtr);
3727 II.setSourceAlignment(SliceAlign);
3728 }
3729
3730 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3731 deleteIfTriviallyDead(OldPtr);
3732 return false;
3733 }
3734 // For split transfer intrinsics we have an incredibly useful assurance:
3735 // the source and destination do not reside within the same alloca, and at
3736 // least one of them does not escape. This means that we can replace
3737 // memmove with memcpy, and we don't need to worry about all manner of
3738 // downsides to splitting and transforming the operations.
3739
3740 // If this doesn't map cleanly onto the alloca type, and that type isn't
3741 // a single value type, just emit a memcpy.
3742 bool EmitMemCpy =
3743 !VecTy && !IntTy &&
3744 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3745 SliceSize !=
3746 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
3747 !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
3749
3750 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3751 // size hasn't been shrunk based on analysis of the viable range, this is
3752 // a no-op.
3753 if (EmitMemCpy && &OldAI == &NewAI) {
3754 // Ensure the start lines up.
3755 assert(NewBeginOffset == BeginOffset);
3756
3757 // Rewrite the size as needed.
3758 if (NewEndOffset != EndOffset)
3759 II.setLength(NewEndOffset - NewBeginOffset);
3760 return false;
3761 }
3762 // Record this instruction for deletion.
3763 Pass.DeadInsts.push_back(&II);
3764
3765 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3766 // alloca that should be re-examined after rewriting this instruction.
3767 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3768 if (AllocaInst *AI =
3770 assert(AI != &OldAI && AI != &NewAI &&
3771 "Splittable transfers cannot reach the same alloca on both ends.");
3772 Pass.Worklist.insert(AI);
3773 }
3774
3775 Type *OtherPtrTy = OtherPtr->getType();
3776 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3777
3778 // Compute the relative offset for the other pointer within the transfer.
3779 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3780 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3781 Align OtherAlign =
3782 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3783 OtherAlign =
3784 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3785
3786 if (EmitMemCpy) {
3787 // Compute the other pointer, folding as much as possible to produce
3788 // a single, simple GEP in most cases.
3789 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3790 OtherPtr->getName() + ".");
3791
3792 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3793 Type *SizeTy = II.getLength()->getType();
3794 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3795
3796 Value *DestPtr, *SrcPtr;
3797 MaybeAlign DestAlign, SrcAlign;
3798 // Note: IsDest is true iff we're copying into the new alloca slice
3799 if (IsDest) {
3800 DestPtr = OurPtr;
3801 DestAlign = SliceAlign;
3802 SrcPtr = OtherPtr;
3803 SrcAlign = OtherAlign;
3804 } else {
3805 DestPtr = OtherPtr;
3806 DestAlign = OtherAlign;
3807 SrcPtr = OurPtr;
3808 SrcAlign = SliceAlign;
3809 }
3810 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3811 Size, II.isVolatile());
3812 if (AATags)
3813 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3814
3815 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3816 if (IsDest) {
3817 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3818 &II, New, DestPtr, nullptr, DL);
3819 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3821 DL, Offset, /*AllowNonInbounds*/ true))) {
3822 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3823 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3824 }
3825 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3826 return false;
3827 }
3828
3829 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3830 NewEndOffset == NewAllocaEndOffset;
3831 uint64_t Size = NewEndOffset - NewBeginOffset;
3832 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3833 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3834 unsigned NumElements = EndIndex - BeginIndex;
3835 IntegerType *SubIntTy =
3836 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3837
3838 // Reset the other pointer type to match the register type we're going to
3839 // use, but using the address space of the original other pointer.
3840 Type *OtherTy;
3841 if (VecTy && !IsWholeAlloca) {
3842 if (NumElements == 1)
3843 OtherTy = VecTy->getElementType();
3844 else
3845 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3846 } else if (IntTy && !IsWholeAlloca) {
3847 OtherTy = SubIntTy;
3848 } else {
3849 OtherTy = NewAllocaTy;
3850 }
3851
3852 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3853 OtherPtr->getName() + ".");
3854 MaybeAlign SrcAlign = OtherAlign;
3855 MaybeAlign DstAlign = SliceAlign;
3856 if (!IsDest)
3857 std::swap(SrcAlign, DstAlign);
3858
3859 Value *SrcPtr;
3860 Value *DstPtr;
3861
3862 if (IsDest) {
3863 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3864 SrcPtr = AdjPtr;
3865 } else {
3866 DstPtr = AdjPtr;
3867 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3868 }
3869
3870 Value *Src;
3871 if (VecTy && !IsWholeAlloca && !IsDest) {
3872 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3873 NewAI.getAlign(), "load");
3874 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3875 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3876 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3877 NewAI.getAlign(), "load");
3878 Src = convertValue(DL, IRB, Src, IntTy);
3879 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3880 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3881 } else {
3882 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3883 II.isVolatile(), "copyload");
3884 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3885 LLVMContext::MD_access_group});
3886 if (AATags)
3887 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3888 Load->getType(), DL));
3889 Src = Load;
3890 }
3891
3892 if (VecTy && !IsWholeAlloca && IsDest) {
3893 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3894 NewAI.getAlign(), "oldload");
3895 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3896 } else if (IntTy && !IsWholeAlloca && IsDest) {
3897 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3898 NewAI.getAlign(), "oldload");
3899 Old = convertValue(DL, IRB, Old, IntTy);
3900 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3901 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3902 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3903 }
3904
3905 StoreInst *Store = cast<StoreInst>(
3906 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3907 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3908 LLVMContext::MD_access_group});
3909 if (AATags)
3910 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3911 Src->getType(), DL));
3912
3913 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3914 if (IsDest) {
3915
3916 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3917 Store, DstPtr, Src, DL);
3918 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3920 DL, Offset, /*AllowNonInbounds*/ true))) {
3921 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3922 &II, Store, DstPtr, Src, DL);
3923 }
3924
3925 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3926 return !II.isVolatile();
3927 }
3928
3929 bool visitIntrinsicInst(IntrinsicInst &II) {
3930 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3931 "Unexpected intrinsic!");
3932 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3933
3934 // Record this instruction for deletion.
3935 Pass.DeadInsts.push_back(&II);
3936
3937 if (II.isDroppable()) {
3938 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3939 // TODO For now we forget assumed information, this can be improved.
3940 OldPtr->dropDroppableUsesIn(II);
3941 return true;
3942 }
3943
3944 assert(II.getArgOperand(0) == OldPtr);
3945 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3946 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3947 Value *New;
3948 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3949 New = IRB.CreateLifetimeStart(Ptr);
3950 else
3951 New = IRB.CreateLifetimeEnd(Ptr);
3952
3953 (void)New;
3954 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3955
3956 return true;
3957 }
3958
3959 void fixLoadStoreAlign(Instruction &Root) {
3960 // This algorithm implements the same visitor loop as
3961 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3962 // or store found.
3963 SmallPtrSet<Instruction *, 4> Visited;
3964 SmallVector<Instruction *, 4> Uses;
3965 Visited.insert(&Root);
3966 Uses.push_back(&Root);
3967 do {
3968 Instruction *I = Uses.pop_back_val();
3969
3970 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3971 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3972 continue;
3973 }
3974 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3975 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3976 continue;
3977 }
3978
3982 for (User *U : I->users())
3983 if (Visited.insert(cast<Instruction>(U)).second)
3984 Uses.push_back(cast<Instruction>(U));
3985 } while (!Uses.empty());
3986 }
3987
3988 bool visitPHINode(PHINode &PN) {
3989 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3990 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3991 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3992
3993 // We would like to compute a new pointer in only one place, but have it be
3994 // as local as possible to the PHI. To do that, we re-use the location of
3995 // the old pointer, which necessarily must be in the right position to
3996 // dominate the PHI.
3997 IRBuilderBase::InsertPointGuard Guard(IRB);
3998 if (isa<PHINode>(OldPtr))
3999 IRB.SetInsertPoint(OldPtr->getParent(),
4000 OldPtr->getParent()->getFirstInsertionPt());
4001 else
4002 IRB.SetInsertPoint(OldPtr);
4003 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
4004
4005 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4006 // Replace the operands which were using the old pointer.
4007 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
4008
4009 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
4010 deleteIfTriviallyDead(OldPtr);
4011
4012 // Fix the alignment of any loads or stores using this PHI node.
4013 fixLoadStoreAlign(PN);
4014
4015 // PHIs can't be promoted on their own, but often can be speculated. We
4016 // check the speculation outside of the rewriter so that we see the
4017 // fully-rewritten alloca.
4018 PHIUsers.insert(&PN);
4019 return true;
4020 }
4021
4022 bool visitSelectInst(SelectInst &SI) {
4023 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4024 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
4025 "Pointer isn't an operand!");
4026 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
4027 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
4028
4029 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4030 // Replace the operands which were using the old pointer.
4031 if (SI.getOperand(1) == OldPtr)
4032 SI.setOperand(1, NewPtr);
4033 if (SI.getOperand(2) == OldPtr)
4034 SI.setOperand(2, NewPtr);
4035
4036 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
4037 deleteIfTriviallyDead(OldPtr);
4038
4039 // Fix the alignment of any loads or stores using this select.
4040 fixLoadStoreAlign(SI);
4041
4042 // Selects can't be promoted on their own, but often can be speculated. We
4043 // check the speculation outside of the rewriter so that we see the
4044 // fully-rewritten alloca.
4045 SelectUsers.insert(&SI);
4046 return true;
4047 }
4048};
4049
4050/// Visitor to rewrite aggregate loads and stores as scalar.
4051///
4052/// This pass aggressively rewrites all aggregate loads and stores on
4053/// a particular pointer (or any pointer derived from it which we can identify)
4054/// with scalar loads and stores.
4055class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
4056 // Befriend the base class so it can delegate to private visit methods.
4057 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4058
4059 /// Queue of pointer uses to analyze and potentially rewrite.
4061
4062 /// Set to prevent us from cycling with phi nodes and loops.
4063 SmallPtrSet<User *, 8> Visited;
4064
4065 /// The current pointer use being rewritten. This is used to dig up the used
4066 /// value (as opposed to the user).
4067 Use *U = nullptr;
4068
4069 /// Used to calculate offsets, and hence alignment, of subobjects.
4070 const DataLayout &DL;
4071
4072 IRBuilderTy &IRB;
4073
4074public:
4075 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
4076 : DL(DL), IRB(IRB) {}
4077
4078 /// Rewrite loads and stores through a pointer and all pointers derived from
4079 /// it.
4080 bool rewrite(Instruction &I) {
4081 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
4082 enqueueUsers(I);
4083 bool Changed = false;
4084 while (!Queue.empty()) {
4085 U = Queue.pop_back_val();
4086 Changed |= visit(cast<Instruction>(U->getUser()));
4087 }
4088 return Changed;
4089 }
4090
4091private:
4092 /// Enqueue all the users of the given instruction for further processing.
4093 /// This uses a set to de-duplicate users.
4094 void enqueueUsers(Instruction &I) {
4095 for (Use &U : I.uses())
4096 if (Visited.insert(U.getUser()).second)
4097 Queue.push_back(&U);
4098 }
4099
4100 // Conservative default is to not rewrite anything.
4101 bool visitInstruction(Instruction &I) { return false; }
4102
4103 /// Generic recursive split emission class.
4104 template <typename Derived> class OpSplitter {
4105 protected:
4106 /// The builder used to form new instructions.
4107 IRBuilderTy &IRB;
4108
4109 /// The indices which to be used with insert- or extractvalue to select the
4110 /// appropriate value within the aggregate.
4111 SmallVector<unsigned, 4> Indices;
4112
4113 /// The indices to a GEP instruction which will move Ptr to the correct slot
4114 /// within the aggregate.
4115 SmallVector<Value *, 4> GEPIndices;
4116
4117 /// The base pointer of the original op, used as a base for GEPing the
4118 /// split operations.
4119 Value *Ptr;
4120
4121 /// The base pointee type being GEPed into.
4122 Type *BaseTy;
4123
4124 /// Known alignment of the base pointer.
4125 Align BaseAlign;
4126
4127 /// To calculate offset of each component so we can correctly deduce
4128 /// alignments.
4129 const DataLayout &DL;
4130
4131 /// Initialize the splitter with an insertion point, Ptr and start with a
4132 /// single zero GEP index.
4133 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4134 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4135 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4136 BaseAlign(BaseAlign), DL(DL) {
4137 IRB.SetInsertPoint(InsertionPoint);
4138 }
4139
4140 public:
4141 /// Generic recursive split emission routine.
4142 ///
4143 /// This method recursively splits an aggregate op (load or store) into
4144 /// scalar or vector ops. It splits recursively until it hits a single value
4145 /// and emits that single value operation via the template argument.
4146 ///
4147 /// The logic of this routine relies on GEPs and insertvalue and
4148 /// extractvalue all operating with the same fundamental index list, merely
4149 /// formatted differently (GEPs need actual values).
4150 ///
4151 /// \param Ty The type being split recursively into smaller ops.
4152 /// \param Agg The aggregate value being built up or stored, depending on
4153 /// whether this is splitting a load or a store respectively.
4154 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4155 if (Ty->isSingleValueType()) {
4156 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4157 return static_cast<Derived *>(this)->emitFunc(
4158 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
4159 }
4160
4161 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
4162 unsigned OldSize = Indices.size();
4163 (void)OldSize;
4164 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4165 ++Idx) {
4166 assert(Indices.size() == OldSize && "Did not return to the old size");
4167 Indices.push_back(Idx);
4168 GEPIndices.push_back(IRB.getInt32(Idx));
4169 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4170 GEPIndices.pop_back();
4171 Indices.pop_back();
4172 }
4173 return;
4174 }
4175
4176 if (StructType *STy = dyn_cast<StructType>(Ty)) {
4177 unsigned OldSize = Indices.size();
4178 (void)OldSize;
4179 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4180 ++Idx) {
4181 assert(Indices.size() == OldSize && "Did not return to the old size");
4182 Indices.push_back(Idx);
4183 GEPIndices.push_back(IRB.getInt32(Idx));
4184 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4185 GEPIndices.pop_back();
4186 Indices.pop_back();
4187 }
4188 return;
4189 }
4190
4191 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4192 }
4193 };
4194
4195 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
4196 AAMDNodes AATags;
4197 // A vector to hold the split components that we want to emit
4198 // separate fake uses for.
4199 SmallVector<Value *, 4> Components;
4200 // A vector to hold all the fake uses of the struct that we are splitting.
4201 // Usually there should only be one, but we are handling the general case.
4203
4204 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4205 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4206 IRBuilderTy &IRB)
4207 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4208 IRB),
4209 AATags(AATags) {}
4210
4211 /// Emit a leaf load of a single value. This is called at the leaves of the
4212 /// recursive emission to actually load values.
4213 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4215 // Load the single value and insert it using the indices.
4216 Value *GEP =
4217 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4218 LoadInst *Load =
4219 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4220
4221 APInt Offset(
4222 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4223 if (AATags &&
4224 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
4225 Load->setAAMetadata(
4226 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
4227 // Record the load so we can generate a fake use for this aggregate
4228 // component.
4229 Components.push_back(Load);
4230
4231 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4232 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
4233 }
4234
4235 // Stash the fake uses that use the value generated by this instruction.
4236 void recordFakeUses(LoadInst &LI) {
4237 for (Use &U : LI.uses())
4238 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
4239 if (II->getIntrinsicID() == Intrinsic::fake_use)
4240 FakeUses.push_back(II);
4241 }
4242
4243 // Replace all fake uses of the aggregate with a series of fake uses, one
4244 // for each split component.
4245 void emitFakeUses() {
4246 for (Instruction *I : FakeUses) {
4247 IRB.SetInsertPoint(I);
4248 for (auto *V : Components)
4249 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4250 I->eraseFromParent();
4251 }
4252 }
4253 };
4254
4255 bool visitLoadInst(LoadInst &LI) {
4256 assert(LI.getPointerOperand() == *U);
4257 if (!LI.isSimple() || LI.getType()->isSingleValueType())
4258 return false;
4259
4260 // We have an aggregate being loaded, split it apart.
4261 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
4262 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
4263 getAdjustedAlignment(&LI, 0), DL, IRB);
4264 Splitter.recordFakeUses(LI);
4266 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4267 Splitter.emitFakeUses();
4268 Visited.erase(&LI);
4269 LI.replaceAllUsesWith(V);
4270 LI.eraseFromParent();
4271 return true;
4272 }
4273
4274 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
4275 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4276 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4277 const DataLayout &DL, IRBuilderTy &IRB)
4278 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4279 DL, IRB),
4280 AATags(AATags), AggStore(AggStore) {}
4281 AAMDNodes AATags;
4282 StoreInst *AggStore;
4283 /// Emit a leaf store of a single value. This is called at the leaves of the
4284 /// recursive emission to actually produce stores.
4285 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4287 // Extract the single value and store it using the indices.
4288 //
4289 // The gep and extractvalue values are factored out of the CreateStore
4290 // call to make the output independent of the argument evaluation order.
4291 Value *ExtractValue =
4292 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4293 Value *InBoundsGEP =
4294 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4295 StoreInst *Store =
4296 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4297
4298 APInt Offset(
4299 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4300 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
4301 if (AATags) {
4302 Store->setAAMetadata(AATags.adjustForAccess(
4303 Offset.getZExtValue(), ExtractValue->getType(), DL));
4304 }
4305
4306 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
4307 // If we cannot (because there's an intervening non-const or unbounded
4308 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
4309 // this instruction.
4311 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
4312 uint64_t SizeInBits =
4313 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4314 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
4315 SizeInBits, AggStore, Store,
4316 Store->getPointerOperand(), Store->getValueOperand(),
4317 DL);
4318 } else {
4320 "AT: unexpected debug.assign linked to store through "
4321 "unbounded GEP");
4322 }
4323 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
4324 }
4325 };
4326
4327 bool visitStoreInst(StoreInst &SI) {
4328 if (!SI.isSimple() || SI.getPointerOperand() != *U)
4329 return false;
4330 Value *V = SI.getValueOperand();
4331 if (V->getType()->isSingleValueType())
4332 return false;
4333
4334 // We have an aggregate being stored, split it apart.
4335 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4336 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4337 getAdjustedAlignment(&SI, 0), DL, IRB);
4338 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4339 Visited.erase(&SI);
4340 // The stores replacing SI each have markers describing fragments of the
4341 // assignment so delete the assignment markers linked to SI.
4343 SI.eraseFromParent();
4344 return true;
4345 }
4346
4347 bool visitBitCastInst(BitCastInst &BC) {
4348 enqueueUsers(BC);
4349 return false;
4350 }
4351
4352 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4353 enqueueUsers(ASC);
4354 return false;
4355 }
4356
4357 // Unfold gep (select cond, ptr1, ptr2), idx
4358 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
4359 // and gep ptr, (select cond, idx1, idx2)
4360 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
4361 // We also allow for i1 zext indices, which are equivalent to selects.
4362 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4363 // Check whether the GEP has exactly one select operand and all indices
4364 // will become constant after the transform.
4366 for (Value *Op : GEPI.indices()) {
4367 if (auto *SI = dyn_cast<SelectInst>(Op)) {
4368 if (Sel)
4369 return false;
4370
4371 Sel = SI;
4372 if (!isa<ConstantInt>(SI->getTrueValue()) ||
4373 !isa<ConstantInt>(SI->getFalseValue()))
4374 return false;
4375 continue;
4376 }
4377 if (auto *ZI = dyn_cast<ZExtInst>(Op)) {
4378 if (Sel)
4379 return false;
4380 Sel = ZI;
4381 if (!ZI->getSrcTy()->isIntegerTy(1))
4382 return false;
4383 continue;
4384 }
4385
4386 if (!isa<ConstantInt>(Op))
4387 return false;
4388 }
4389
4390 if (!Sel)
4391 return false;
4392
4393 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4394 dbgs() << " original: " << *Sel << "\n";
4395 dbgs() << " " << GEPI << "\n";);
4396
4397 auto GetNewOps = [&](Value *SelOp) {
4398 SmallVector<Value *> NewOps;
4399 for (Value *Op : GEPI.operands())
4400 if (Op == Sel)
4401 NewOps.push_back(SelOp);
4402 else
4403 NewOps.push_back(Op);
4404 return NewOps;
4405 };
4406
4407 Value *Cond, *True, *False;
4408 Instruction *MDFrom = nullptr;
4409 if (auto *SI = dyn_cast<SelectInst>(Sel)) {
4410 Cond = SI->getCondition();
4411 True = SI->getTrueValue();
4412 False = SI->getFalseValue();
4414 MDFrom = SI;
4415 } else {
4416 Cond = Sel->getOperand(0);
4417 True = ConstantInt::get(Sel->getType(), 1);
4418 False = ConstantInt::get(Sel->getType(), 0);
4419 }
4420 SmallVector<Value *> TrueOps = GetNewOps(True);
4421 SmallVector<Value *> FalseOps = GetNewOps(False);
4422
4423 IRB.SetInsertPoint(&GEPI);
4424 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4425
4426 Type *Ty = GEPI.getSourceElementType();
4427 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4428 True->getName() + ".sroa.gep", NW);
4429
4430 Value *NFalse =
4431 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4432 False->getName() + ".sroa.gep", NW);
4433
4434 Value *NSel = MDFrom
4435 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4436 Sel->getName() + ".sroa.sel", MDFrom)
4437 : IRB.CreateSelectWithUnknownProfile(
4438 Cond, NTrue, NFalse, DEBUG_TYPE,
4439 Sel->getName() + ".sroa.sel");
4440 Visited.erase(&GEPI);
4441 GEPI.replaceAllUsesWith(NSel);
4442 GEPI.eraseFromParent();
4443 Instruction *NSelI = cast<Instruction>(NSel);
4444 Visited.insert(NSelI);
4445 enqueueUsers(*NSelI);
4446
4447 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4448 dbgs() << " " << *NFalse << "\n";
4449 dbgs() << " " << *NSel << "\n";);
4450
4451 return true;
4452 }
4453
4454 // Unfold gep (phi ptr1, ptr2), idx
4455 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4456 // and gep ptr, (phi idx1, idx2)
4457 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4458 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4459 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4460 // operand (looking through the phi if it is the phi we want to unfold) is
4461 // an instruction besides a static alloca.
4462 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4463 auto IsInvalidPointerOperand = [](Value *V) {
4464 if (!isa<Instruction>(V))
4465 return false;
4466 if (auto *AI = dyn_cast<AllocaInst>(V))
4467 return !AI->isStaticAlloca();
4468 return true;
4469 };
4470 if (Phi) {
4471 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4472 return false;
4473 } else {
4474 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4475 return false;
4476 }
4477 // Check whether the GEP has exactly one phi operand (including the pointer
4478 // operand) and all indices will become constant after the transform.
4479 for (Value *Op : GEPI.indices()) {
4480 if (auto *SI = dyn_cast<PHINode>(Op)) {
4481 if (Phi)
4482 return false;
4483
4484 Phi = SI;
4485 if (!all_of(Phi->incoming_values(),
4486 [](Value *V) { return isa<ConstantInt>(V); }))
4487 return false;
4488 continue;
4489 }
4490
4491 if (!isa<ConstantInt>(Op))
4492 return false;
4493 }
4494
4495 if (!Phi)
4496 return false;
4497
4498 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4499 dbgs() << " original: " << *Phi << "\n";
4500 dbgs() << " " << GEPI << "\n";);
4501
4502 auto GetNewOps = [&](Value *PhiOp) {
4503 SmallVector<Value *> NewOps;
4504 for (Value *Op : GEPI.operands())
4505 if (Op == Phi)
4506 NewOps.push_back(PhiOp);
4507 else
4508 NewOps.push_back(Op);
4509 return NewOps;
4510 };
4511
4512 IRB.SetInsertPoint(Phi);
4513 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4514 Phi->getName() + ".sroa.phi");
4515
4516 Type *SourceTy = GEPI.getSourceElementType();
4517 // We only handle arguments, constants, and static allocas here, so we can
4518 // insert GEPs at the end of the entry block.
4519 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4520 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4521 Value *Op = Phi->getIncomingValue(I);
4522 BasicBlock *BB = Phi->getIncomingBlock(I);
4523 Value *NewGEP;
4524 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4525 NewGEP = NewPhi->getIncomingValue(NI);
4526 } else {
4527 SmallVector<Value *> NewOps = GetNewOps(Op);
4528 NewGEP =
4529 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4530 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4531 }
4532 NewPhi->addIncoming(NewGEP, BB);
4533 }
4534
4535 Visited.erase(&GEPI);
4536 GEPI.replaceAllUsesWith(NewPhi);
4537 GEPI.eraseFromParent();
4538 Visited.insert(NewPhi);
4539 enqueueUsers(*NewPhi);
4540
4541 LLVM_DEBUG(dbgs() << " to: ";
4542 for (Value *In
4543 : NewPhi->incoming_values()) dbgs()
4544 << "\n " << *In;
4545 dbgs() << "\n " << *NewPhi << '\n');
4546
4547 return true;
4548 }
4549
4550 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4551 if (unfoldGEPSelect(GEPI))
4552 return true;
4553
4554 if (unfoldGEPPhi(GEPI))
4555 return true;
4556
4557 enqueueUsers(GEPI);
4558 return false;
4559 }
4560
4561 bool visitPHINode(PHINode &PN) {
4562 enqueueUsers(PN);
4563 return false;
4564 }
4565
4566 bool visitSelectInst(SelectInst &SI) {
4567 enqueueUsers(SI);
4568 return false;
4569 }
4570};
4571
4572} // end anonymous namespace
4573
4574/// Strip aggregate type wrapping.
4575///
4576/// This removes no-op aggregate types wrapping an underlying type. It will
4577/// strip as many layers of types as it can without changing either the type
4578/// size or the allocated size.
4580 if (Ty->isSingleValueType())
4581 return Ty;
4582
4583 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4584 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4585
4586 Type *InnerTy;
4587 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4588 InnerTy = ArrTy->getElementType();
4589 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4590 const StructLayout *SL = DL.getStructLayout(STy);
4591 unsigned Index = SL->getElementContainingOffset(0);
4592 InnerTy = STy->getElementType(Index);
4593 } else {
4594 return Ty;
4595 }
4596
4597 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4598 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4599 return Ty;
4600
4601 return stripAggregateTypeWrapping(DL, InnerTy);
4602}
4603
4604/// Try to find a partition of the aggregate type passed in for a given
4605/// offset and size.
4606///
4607/// This recurses through the aggregate type and tries to compute a subtype
4608/// based on the offset and size. When the offset and size span a sub-section
4609/// of an array, it will even compute a new array type for that sub-section,
4610/// and the same for structs.
4611///
4612/// Note that this routine is very strict and tries to find a partition of the
4613/// type which produces the *exact* right offset and size. It is not forgiving
4614/// when the size or offset cause either end of type-based partition to be off.
4615/// Also, this is a best-effort routine. It is reasonable to give up and not
4616/// return a type if necessary.
4618 uint64_t Size) {
4619 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4620 return stripAggregateTypeWrapping(DL, Ty);
4621 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4622 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4623 return nullptr;
4624
4625 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4626 Type *ElementTy;
4627 uint64_t TyNumElements;
4628 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4629 ElementTy = AT->getElementType();
4630 TyNumElements = AT->getNumElements();
4631 } else {
4632 // FIXME: This isn't right for vectors with non-byte-sized or
4633 // non-power-of-two sized elements.
4634 auto *VT = cast<FixedVectorType>(Ty);
4635 ElementTy = VT->getElementType();
4636 TyNumElements = VT->getNumElements();
4637 }
4638 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4639 uint64_t NumSkippedElements = Offset / ElementSize;
4640 if (NumSkippedElements >= TyNumElements)
4641 return nullptr;
4642 Offset -= NumSkippedElements * ElementSize;
4643
4644 // First check if we need to recurse.
4645 if (Offset > 0 || Size < ElementSize) {
4646 // Bail if the partition ends in a different array element.
4647 if ((Offset + Size) > ElementSize)
4648 return nullptr;
4649 // Recurse through the element type trying to peel off offset bytes.
4650 return getTypePartition(DL, ElementTy, Offset, Size);
4651 }
4652 assert(Offset == 0);
4653
4654 if (Size == ElementSize)
4655 return stripAggregateTypeWrapping(DL, ElementTy);
4656 assert(Size > ElementSize);
4657 uint64_t NumElements = Size / ElementSize;
4658 if (NumElements * ElementSize != Size)
4659 return nullptr;
4660 return ArrayType::get(ElementTy, NumElements);
4661 }
4662
4664 if (!STy)
4665 return nullptr;
4666
4667 const StructLayout *SL = DL.getStructLayout(STy);
4668
4669 if (SL->getSizeInBits().isScalable())
4670 return nullptr;
4671
4672 if (Offset >= SL->getSizeInBytes())
4673 return nullptr;
4674 uint64_t EndOffset = Offset + Size;
4675 if (EndOffset > SL->getSizeInBytes())
4676 return nullptr;
4677
4678 unsigned Index = SL->getElementContainingOffset(Offset);
4679 Offset -= SL->getElementOffset(Index);
4680
4681 Type *ElementTy = STy->getElementType(Index);
4682 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4683 if (Offset >= ElementSize)
4684 return nullptr; // The offset points into alignment padding.
4685
4686 // See if any partition must be contained by the element.
4687 if (Offset > 0 || Size < ElementSize) {
4688 if ((Offset + Size) > ElementSize)
4689 return nullptr;
4690 return getTypePartition(DL, ElementTy, Offset, Size);
4691 }
4692 assert(Offset == 0);
4693
4694 if (Size == ElementSize)
4695 return stripAggregateTypeWrapping(DL, ElementTy);
4696
4697 StructType::element_iterator EI = STy->element_begin() + Index,
4698 EE = STy->element_end();
4699 if (EndOffset < SL->getSizeInBytes()) {
4700 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4701 if (Index == EndIndex)
4702 return nullptr; // Within a single element and its padding.
4703
4704 // Don't try to form "natural" types if the elements don't line up with the
4705 // expected size.
4706 // FIXME: We could potentially recurse down through the last element in the
4707 // sub-struct to find a natural end point.
4708 if (SL->getElementOffset(EndIndex) != EndOffset)
4709 return nullptr;
4710
4711 assert(Index < EndIndex);
4712 EE = STy->element_begin() + EndIndex;
4713 }
4714
4715 // Try to build up a sub-structure.
4716 StructType *SubTy =
4717 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4718 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4719 if (Size != SubSL->getSizeInBytes())
4720 return nullptr; // The sub-struct doesn't have quite the size needed.
4721
4722 return SubTy;
4723}
4724
4725/// Pre-split loads and stores to simplify rewriting.
4726///
4727/// We want to break up the splittable load+store pairs as much as
4728/// possible. This is important to do as a preprocessing step, as once we
4729/// start rewriting the accesses to partitions of the alloca we lose the
4730/// necessary information to correctly split apart paired loads and stores
4731/// which both point into this alloca. The case to consider is something like
4732/// the following:
4733///
4734/// %a = alloca [12 x i8]
4735/// %gep1 = getelementptr i8, ptr %a, i32 0
4736/// %gep2 = getelementptr i8, ptr %a, i32 4
4737/// %gep3 = getelementptr i8, ptr %a, i32 8
4738/// store float 0.0, ptr %gep1
4739/// store float 1.0, ptr %gep2
4740/// %v = load i64, ptr %gep1
4741/// store i64 %v, ptr %gep2
4742/// %f1 = load float, ptr %gep2
4743/// %f2 = load float, ptr %gep3
4744///
4745/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4746/// promote everything so we recover the 2 SSA values that should have been
4747/// there all along.
4748///
4749/// \returns true if any changes are made.
4750bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4751 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4752
4753 // Track the loads and stores which are candidates for pre-splitting here, in
4754 // the order they first appear during the partition scan. These give stable
4755 // iteration order and a basis for tracking which loads and stores we
4756 // actually split.
4759
4760 // We need to accumulate the splits required of each load or store where we
4761 // can find them via a direct lookup. This is important to cross-check loads
4762 // and stores against each other. We also track the slice so that we can kill
4763 // all the slices that end up split.
4764 struct SplitOffsets {
4765 Slice *S;
4766 std::vector<uint64_t> Splits;
4767 };
4768 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4769
4770 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4771 // This is important as we also cannot pre-split stores of those loads!
4772 // FIXME: This is all pretty gross. It means that we can be more aggressive
4773 // in pre-splitting when the load feeding the store happens to come from
4774 // a separate alloca. Put another way, the effectiveness of SROA would be
4775 // decreased by a frontend which just concatenated all of its local allocas
4776 // into one big flat alloca. But defeating such patterns is exactly the job
4777 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4778 // change store pre-splitting to actually force pre-splitting of the load
4779 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4780 // maybe it would make it more principled?
4781 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4782
4783 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4784 for (auto &P : AS.partitions()) {
4785 for (Slice &S : P) {
4786 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4787 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4788 // If this is a load we have to track that it can't participate in any
4789 // pre-splitting. If this is a store of a load we have to track that
4790 // that load also can't participate in any pre-splitting.
4791 if (auto *LI = dyn_cast<LoadInst>(I))
4792 UnsplittableLoads.insert(LI);
4793 else if (auto *SI = dyn_cast<StoreInst>(I))
4794 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4795 UnsplittableLoads.insert(LI);
4796 continue;
4797 }
4798 assert(P.endOffset() > S.beginOffset() &&
4799 "Empty or backwards partition!");
4800
4801 // Determine if this is a pre-splittable slice.
4802 if (auto *LI = dyn_cast<LoadInst>(I)) {
4803 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4804
4805 // The load must be used exclusively to store into other pointers for
4806 // us to be able to arbitrarily pre-split it. The stores must also be
4807 // simple to avoid changing semantics.
4808 auto IsLoadSimplyStored = [](LoadInst *LI) {
4809 for (User *LU : LI->users()) {
4810 auto *SI = dyn_cast<StoreInst>(LU);
4811 if (!SI || !SI->isSimple())
4812 return false;
4813 }
4814 return true;
4815 };
4816 if (!IsLoadSimplyStored(LI)) {
4817 UnsplittableLoads.insert(LI);
4818 continue;
4819 }
4820
4821 Loads.push_back(LI);
4822 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4823 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4824 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4825 continue;
4826 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4827 if (!StoredLoad || !StoredLoad->isSimple())
4828 continue;
4829 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4830
4831 Stores.push_back(SI);
4832 } else {
4833 // Other uses cannot be pre-split.
4834 continue;
4835 }
4836
4837 // Record the initial split.
4838 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4839 auto &Offsets = SplitOffsetsMap[I];
4840 assert(Offsets.Splits.empty() &&
4841 "Should not have splits the first time we see an instruction!");
4842 Offsets.S = &S;
4843 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4844 }
4845
4846 // Now scan the already split slices, and add a split for any of them which
4847 // we're going to pre-split.
4848 for (Slice *S : P.splitSliceTails()) {
4849 auto SplitOffsetsMapI =
4850 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4851 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4852 continue;
4853 auto &Offsets = SplitOffsetsMapI->second;
4854
4855 assert(Offsets.S == S && "Found a mismatched slice!");
4856 assert(!Offsets.Splits.empty() &&
4857 "Cannot have an empty set of splits on the second partition!");
4858 assert(Offsets.Splits.back() ==
4859 P.beginOffset() - Offsets.S->beginOffset() &&
4860 "Previous split does not end where this one begins!");
4861
4862 // Record each split. The last partition's end isn't needed as the size
4863 // of the slice dictates that.
4864 if (S->endOffset() > P.endOffset())
4865 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4866 }
4867 }
4868
4869 // We may have split loads where some of their stores are split stores. For
4870 // such loads and stores, we can only pre-split them if their splits exactly
4871 // match relative to their starting offset. We have to verify this prior to
4872 // any rewriting.
4873 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4874 // Lookup the load we are storing in our map of split
4875 // offsets.
4876 auto *LI = cast<LoadInst>(SI->getValueOperand());
4877 // If it was completely unsplittable, then we're done,
4878 // and this store can't be pre-split.
4879 if (UnsplittableLoads.count(LI))
4880 return true;
4881
4882 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4883 if (LoadOffsetsI == SplitOffsetsMap.end())
4884 return false; // Unrelated loads are definitely safe.
4885 auto &LoadOffsets = LoadOffsetsI->second;
4886
4887 // Now lookup the store's offsets.
4888 auto &StoreOffsets = SplitOffsetsMap[SI];
4889
4890 // If the relative offsets of each split in the load and
4891 // store match exactly, then we can split them and we
4892 // don't need to remove them here.
4893 if (LoadOffsets.Splits == StoreOffsets.Splits)
4894 return false;
4895
4896 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4897 << " " << *LI << "\n"
4898 << " " << *SI << "\n");
4899
4900 // We've found a store and load that we need to split
4901 // with mismatched relative splits. Just give up on them
4902 // and remove both instructions from our list of
4903 // candidates.
4904 UnsplittableLoads.insert(LI);
4905 return true;
4906 });
4907 // Now we have to go *back* through all the stores, because a later store may
4908 // have caused an earlier store's load to become unsplittable and if it is
4909 // unsplittable for the later store, then we can't rely on it being split in
4910 // the earlier store either.
4911 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4912 auto *LI = cast<LoadInst>(SI->getValueOperand());
4913 return UnsplittableLoads.count(LI);
4914 });
4915 // Once we've established all the loads that can't be split for some reason,
4916 // filter any that made it into our list out.
4917 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4918 return UnsplittableLoads.count(LI);
4919 });
4920
4921 // If no loads or stores are left, there is no pre-splitting to be done for
4922 // this alloca.
4923 if (Loads.empty() && Stores.empty())
4924 return false;
4925
4926 // From here on, we can't fail and will be building new accesses, so rig up
4927 // an IR builder.
4928 IRBuilderTy IRB(&AI);
4929
4930 // Collect the new slices which we will merge into the alloca slices.
4931 SmallVector<Slice, 4> NewSlices;
4932
4933 // Track any allocas we end up splitting loads and stores for so we iterate
4934 // on them.
4935 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4936
4937 // At this point, we have collected all of the loads and stores we can
4938 // pre-split, and the specific splits needed for them. We actually do the
4939 // splitting in a specific order in order to handle when one of the loads in
4940 // the value operand to one of the stores.
4941 //
4942 // First, we rewrite all of the split loads, and just accumulate each split
4943 // load in a parallel structure. We also build the slices for them and append
4944 // them to the alloca slices.
4945 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4946 std::vector<LoadInst *> SplitLoads;
4947 const DataLayout &DL = AI.getDataLayout();
4948 for (LoadInst *LI : Loads) {
4949 SplitLoads.clear();
4950
4951 auto &Offsets = SplitOffsetsMap[LI];
4952 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4953 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4954 "Load must have type size equal to store size");
4955 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4956 "Load must be >= slice size");
4957
4958 uint64_t BaseOffset = Offsets.S->beginOffset();
4959 assert(BaseOffset + SliceSize > BaseOffset &&
4960 "Cannot represent alloca access size using 64-bit integers!");
4961
4963 IRB.SetInsertPoint(LI);
4964
4965 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4966
4967 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4968 int Idx = 0, Size = Offsets.Splits.size();
4969 for (;;) {
4970 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4971 auto AS = LI->getPointerAddressSpace();
4972 auto *PartPtrTy = LI->getPointerOperandType();
4973 LoadInst *PLoad = IRB.CreateAlignedLoad(
4974 PartTy,
4975 getAdjustedPtr(IRB, DL, BasePtr,
4976 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4977 PartPtrTy, BasePtr->getName() + "."),
4978 getAdjustedAlignment(LI, PartOffset),
4979 /*IsVolatile*/ false, LI->getName());
4980 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4981 LLVMContext::MD_access_group});
4982
4983 // Append this load onto the list of split loads so we can find it later
4984 // to rewrite the stores.
4985 SplitLoads.push_back(PLoad);
4986
4987 // Now build a new slice for the alloca.
4988 NewSlices.push_back(
4989 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4990 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4991 /*IsSplittable*/ false, nullptr));
4992 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4993 << ", " << NewSlices.back().endOffset()
4994 << "): " << *PLoad << "\n");
4995
4996 // See if we've handled all the splits.
4997 if (Idx >= Size)
4998 break;
4999
5000 // Setup the next partition.
5001 PartOffset = Offsets.Splits[Idx];
5002 ++Idx;
5003 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
5004 }
5005
5006 // Now that we have the split loads, do the slow walk over all uses of the
5007 // load and rewrite them as split stores, or save the split loads to use
5008 // below if the store is going to be split there anyways.
5009 bool DeferredStores = false;
5010 for (User *LU : LI->users()) {
5011 StoreInst *SI = cast<StoreInst>(LU);
5012 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
5013 DeferredStores = true;
5014 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
5015 << "\n");
5016 continue;
5017 }
5018
5019 Value *StoreBasePtr = SI->getPointerOperand();
5020 IRB.SetInsertPoint(SI);
5021 AAMDNodes AATags = SI->getAAMetadata();
5022
5023 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
5024
5025 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
5026 LoadInst *PLoad = SplitLoads[Idx];
5027 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
5028 auto *PartPtrTy = SI->getPointerOperandType();
5029
5030 auto AS = SI->getPointerAddressSpace();
5031 StoreInst *PStore = IRB.CreateAlignedStore(
5032 PLoad,
5033 getAdjustedPtr(IRB, DL, StoreBasePtr,
5034 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5035 PartPtrTy, StoreBasePtr->getName() + "."),
5036 getAdjustedAlignment(SI, PartOffset),
5037 /*IsVolatile*/ false);
5038 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5039 LLVMContext::MD_access_group,
5040 LLVMContext::MD_DIAssignID});
5041
5042 if (AATags)
5043 PStore->setAAMetadata(
5044 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
5045 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
5046 }
5047
5048 // We want to immediately iterate on any allocas impacted by splitting
5049 // this store, and we have to track any promotable alloca (indicated by
5050 // a direct store) as needing to be resplit because it is no longer
5051 // promotable.
5052 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
5053 ResplitPromotableAllocas.insert(OtherAI);
5054 Worklist.insert(OtherAI);
5055 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5056 StoreBasePtr->stripInBoundsOffsets())) {
5057 Worklist.insert(OtherAI);
5058 }
5059
5060 // Mark the original store as dead.
5061 DeadInsts.push_back(SI);
5062 }
5063
5064 // Save the split loads if there are deferred stores among the users.
5065 if (DeferredStores)
5066 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
5067
5068 // Mark the original load as dead and kill the original slice.
5069 DeadInsts.push_back(LI);
5070 Offsets.S->kill();
5071 }
5072
5073 // Second, we rewrite all of the split stores. At this point, we know that
5074 // all loads from this alloca have been split already. For stores of such
5075 // loads, we can simply look up the pre-existing split loads. For stores of
5076 // other loads, we split those loads first and then write split stores of
5077 // them.
5078 for (StoreInst *SI : Stores) {
5079 auto *LI = cast<LoadInst>(SI->getValueOperand());
5080 IntegerType *Ty = cast<IntegerType>(LI->getType());
5081 assert(Ty->getBitWidth() % 8 == 0);
5082 uint64_t StoreSize = Ty->getBitWidth() / 8;
5083 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
5084
5085 auto &Offsets = SplitOffsetsMap[SI];
5086 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
5087 "Slice size should always match load size exactly!");
5088 uint64_t BaseOffset = Offsets.S->beginOffset();
5089 assert(BaseOffset + StoreSize > BaseOffset &&
5090 "Cannot represent alloca access size using 64-bit integers!");
5091
5092 Value *LoadBasePtr = LI->getPointerOperand();
5093 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
5094
5095 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
5096
5097 // Check whether we have an already split load.
5098 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
5099 std::vector<LoadInst *> *SplitLoads = nullptr;
5100 if (SplitLoadsMapI != SplitLoadsMap.end()) {
5101 SplitLoads = &SplitLoadsMapI->second;
5102 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
5103 "Too few split loads for the number of splits in the store!");
5104 } else {
5105 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
5106 }
5107
5108 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
5109 int Idx = 0, Size = Offsets.Splits.size();
5110 for (;;) {
5111 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
5112 auto *LoadPartPtrTy = LI->getPointerOperandType();
5113 auto *StorePartPtrTy = SI->getPointerOperandType();
5114
5115 // Either lookup a split load or create one.
5116 LoadInst *PLoad;
5117 if (SplitLoads) {
5118 PLoad = (*SplitLoads)[Idx];
5119 } else {
5120 IRB.SetInsertPoint(LI);
5121 auto AS = LI->getPointerAddressSpace();
5122 PLoad = IRB.CreateAlignedLoad(
5123 PartTy,
5124 getAdjustedPtr(IRB, DL, LoadBasePtr,
5125 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5126 LoadPartPtrTy, LoadBasePtr->getName() + "."),
5127 getAdjustedAlignment(LI, PartOffset),
5128 /*IsVolatile*/ false, LI->getName());
5129 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5130 LLVMContext::MD_access_group});
5131 }
5132
5133 // And store this partition.
5134 IRB.SetInsertPoint(SI);
5135 auto AS = SI->getPointerAddressSpace();
5136 StoreInst *PStore = IRB.CreateAlignedStore(
5137 PLoad,
5138 getAdjustedPtr(IRB, DL, StoreBasePtr,
5139 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5140 StorePartPtrTy, StoreBasePtr->getName() + "."),
5141 getAdjustedAlignment(SI, PartOffset),
5142 /*IsVolatile*/ false);
5143 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5144 LLVMContext::MD_access_group});
5145
5146 // Now build a new slice for the alloca.
5147 // ProtectedFieldDisc==nullptr is a lie, but it doesn't matter because we
5148 // already determined that all accesses are consistent.
5149 NewSlices.push_back(
5150 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5151 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
5152 /*IsSplittable*/ false, nullptr));
5153 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5154 << ", " << NewSlices.back().endOffset()
5155 << "): " << *PStore << "\n");
5156 if (!SplitLoads) {
5157 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5158 }
5159
5160 // See if we've finished all the splits.
5161 if (Idx >= Size)
5162 break;
5163
5164 // Setup the next partition.
5165 PartOffset = Offsets.Splits[Idx];
5166 ++Idx;
5167 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5168 }
5169
5170 // We want to immediately iterate on any allocas impacted by splitting
5171 // this load, which is only relevant if it isn't a load of this alloca and
5172 // thus we didn't already split the loads above. We also have to keep track
5173 // of any promotable allocas we split loads on as they can no longer be
5174 // promoted.
5175 if (!SplitLoads) {
5176 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
5177 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5178 ResplitPromotableAllocas.insert(OtherAI);
5179 Worklist.insert(OtherAI);
5180 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5181 LoadBasePtr->stripInBoundsOffsets())) {
5182 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5183 Worklist.insert(OtherAI);
5184 }
5185 }
5186
5187 // Mark the original store as dead now that we've split it up and kill its
5188 // slice. Note that we leave the original load in place unless this store
5189 // was its only use. It may in turn be split up if it is an alloca load
5190 // for some other alloca, but it may be a normal load. This may introduce
5191 // redundant loads, but where those can be merged the rest of the optimizer
5192 // should handle the merging, and this uncovers SSA splits which is more
5193 // important. In practice, the original loads will almost always be fully
5194 // split and removed eventually, and the splits will be merged by any
5195 // trivial CSE, including instcombine.
5196 if (LI->hasOneUse()) {
5197 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5198 DeadInsts.push_back(LI);
5199 }
5200 DeadInsts.push_back(SI);
5201 Offsets.S->kill();
5202 }
5203
5204 // Remove the killed slices that have ben pre-split.
5205 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5206
5207 // Insert our new slices. This will sort and merge them into the sorted
5208 // sequence.
5209 AS.insert(NewSlices);
5210
5211 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
5212#ifndef NDEBUG
5213 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5214 LLVM_DEBUG(AS.print(dbgs(), I, " "));
5215#endif
5216
5217 // Finally, don't try to promote any allocas that new require re-splitting.
5218 // They have already been added to the worklist above.
5219 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5220
5221 return true;
5222}
5223
5224/// Select a partition type for an alloca partition.
5225///
5226/// Try to compute a friendly type for this partition of the alloca. This
5227/// won't always succeed, in which case we fall back to a legal integer type
5228/// or an i8 array of an appropriate size.
5229///
5230/// \returns A tuple with the following elements:
5231/// - PartitionType: The computed type for this partition.
5232/// - IsIntegerWideningViable: True if integer widening promotion is used.
5233/// - VectorType: The vector type if vector promotion is used, otherwise
5234/// nullptr.
5235static std::tuple<Type *, bool, VectorType *>
5237 LLVMContext &C) {
5238 // First check if the partition is viable for vector promotion.
5239 //
5240 // We prefer vector promotion over integer widening promotion when:
5241 // - The vector element type is a floating-point type.
5242 // - All the loads/stores to the alloca are vector loads/stores to the
5243 // entire alloca or load/store a single element of the vector.
5244 //
5245 // Otherwise when there is an integer vector with mixed type loads/stores we
5246 // prefer integer widening promotion because it's more likely the user is
5247 // doing bitwise arithmetic and we generate better code.
5248 VectorType *VecTy =
5250 // If the vector element type is a floating-point type, we prefer vector
5251 // promotion. If the vector has one element, let the below code select
5252 // whether we promote with the vector or scalar.
5253 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5254 VecTy->getElementCount().getFixedValue() > 1)
5255 return {VecTy, false, VecTy};
5256
5257 // Check if there is a common type that all slices of the partition use that
5258 // spans the partition.
5259 auto [CommonUseTy, LargestIntTy] =
5260 findCommonType(P.begin(), P.end(), P.endOffset());
5261 if (CommonUseTy) {
5262 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy);
5263 if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
5264 // We prefer vector promotion here because if vector promotion is viable
5265 // and there is a common type used, then it implies the second listed
5266 // condition for preferring vector promotion is true.
5267 if (VecTy)
5268 return {VecTy, false, VecTy};
5269 return {CommonUseTy, isIntegerWideningViable(P, CommonUseTy, DL),
5270 nullptr};
5271 }
5272 }
5273
5274 // Can we find an appropriate subtype in the original allocated
5275 // type?
5276 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
5277 P.beginOffset(), P.size())) {
5278 // If the partition is an integer array that can be spanned by a legal
5279 // integer type, prefer to represent it as a legal integer type because
5280 // it's more likely to be promotable.
5281 if (TypePartitionTy->isArrayTy() &&
5282 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5283 DL.isLegalInteger(P.size() * 8))
5284 TypePartitionTy = Type::getIntNTy(C, P.size() * 8);
5285 // There was no common type used, so we prefer integer widening promotion.
5286 if (isIntegerWideningViable(P, TypePartitionTy, DL))
5287 return {TypePartitionTy, true, nullptr};
5288 if (VecTy)
5289 return {VecTy, false, VecTy};
5290 // If we couldn't promote with TypePartitionTy, try with the largest
5291 // integer type used.
5292 if (LargestIntTy &&
5293 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size() &&
5294 isIntegerWideningViable(P, LargestIntTy, DL))
5295 return {LargestIntTy, true, nullptr};
5296
5297 // Fallback to TypePartitionTy and we probably won't promote.
5298 return {TypePartitionTy, false, nullptr};
5299 }
5300
5301 // Select the largest integer type used if it spans the partition.
5302 if (LargestIntTy &&
5303 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size())
5304 return {LargestIntTy, false, nullptr};
5305
5306 // Select a legal integer type if it spans the partition.
5307 if (DL.isLegalInteger(P.size() * 8))
5308 return {Type::getIntNTy(C, P.size() * 8), false, nullptr};
5309
5310 // Fallback to an i8 array.
5311 return {ArrayType::get(Type::getInt8Ty(C), P.size()), false, nullptr};
5312}
5313
5314/// Rewrite an alloca partition's users.
5315///
5316/// This routine drives both of the rewriting goals of the SROA pass. It tries
5317/// to rewrite uses of an alloca partition to be conducive for SSA value
5318/// promotion. If the partition needs a new, more refined alloca, this will
5319/// build that new alloca, preserving as much type information as possible, and
5320/// rewrite the uses of the old alloca to point at the new one and have the
5321/// appropriate new offsets. It also evaluates how successful the rewrite was
5322/// at enabling promotion and if it was successful queues the alloca to be
5323/// promoted.
5324AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5325 Partition &P) {
5326 const DataLayout &DL = AI.getDataLayout();
5327 // Select the type for the new alloca that spans the partition.
5328 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5329 selectPartitionType(P, DL, AI, *C);
5330
5331 // Check for the case where we're going to rewrite to a new alloca of the
5332 // exact same type as the original, and with the same access offsets. In that
5333 // case, re-use the existing alloca, but still run through the rewriter to
5334 // perform phi and select speculation.
5335 // P.beginOffset() can be non-zero even with the same type in a case with
5336 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
5337 AllocaInst *NewAI;
5338 if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5339 NewAI = &AI;
5340 // FIXME: We should be able to bail at this point with "nothing changed".
5341 // FIXME: We might want to defer PHI speculation until after here.
5342 // FIXME: return nullptr;
5343 } else {
5344 // Make sure the alignment is compatible with P.beginOffset().
5345 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
5346 // If we will get at least this much alignment from the type alone, leave
5347 // the alloca's alignment unconstrained.
5348 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
5349 NewAI = new AllocaInst(
5350 PartitionTy, AI.getAddressSpace(), nullptr,
5351 IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
5352 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5353 AI.getIterator());
5354 // Copy the old AI debug location over to the new one.
5355 NewAI->setDebugLoc(AI.getDebugLoc());
5356 ++NumNewAllocas;
5357 }
5358
5359 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5360 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5361
5362 // Track the high watermark on the worklist as it is only relevant for
5363 // promoted allocas. We will reset it to this point if the alloca is not in
5364 // fact scheduled for promotion.
5365 unsigned PPWOldSize = PostPromotionWorklist.size();
5366 unsigned NumUses = 0;
5367 SmallSetVector<PHINode *, 8> PHIUsers;
5368 SmallSetVector<SelectInst *, 8> SelectUsers;
5369
5370 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
5371 P.endOffset(), IsIntegerWideningViable, VecTy,
5372 PHIUsers, SelectUsers);
5373 bool Promotable = true;
5374 // Check whether we can have tree-structured merge.
5375 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5376 NumUses += DeletedValues->size() + 1;
5377 for (Value *V : *DeletedValues)
5378 DeadInsts.push_back(V);
5379 } else {
5380 for (Slice *S : P.splitSliceTails()) {
5381 Promotable &= Rewriter.visit(S);
5382 ++NumUses;
5383 }
5384 for (Slice &S : P) {
5385 Promotable &= Rewriter.visit(&S);
5386 ++NumUses;
5387 }
5388 }
5389
5390 NumAllocaPartitionUses += NumUses;
5391 MaxUsesPerAllocaPartition.updateMax(NumUses);
5392
5393 // Now that we've processed all the slices in the new partition, check if any
5394 // PHIs or Selects would block promotion.
5395 for (PHINode *PHI : PHIUsers)
5396 if (!isSafePHIToSpeculate(*PHI)) {
5397 Promotable = false;
5398 PHIUsers.clear();
5399 SelectUsers.clear();
5400 break;
5401 }
5402
5404 NewSelectsToRewrite;
5405 NewSelectsToRewrite.reserve(SelectUsers.size());
5406 for (SelectInst *Sel : SelectUsers) {
5407 std::optional<RewriteableMemOps> Ops =
5408 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5409 if (!Ops) {
5410 Promotable = false;
5411 PHIUsers.clear();
5412 SelectUsers.clear();
5413 NewSelectsToRewrite.clear();
5414 break;
5415 }
5416 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5417 }
5418
5419 if (Promotable) {
5420 for (Use *U : AS.getDeadUsesIfPromotable()) {
5421 auto *OldInst = dyn_cast<Instruction>(U->get());
5422 Value::dropDroppableUse(*U);
5423 if (OldInst)
5424 if (isInstructionTriviallyDead(OldInst))
5425 DeadInsts.push_back(OldInst);
5426 }
5427 if (PHIUsers.empty() && SelectUsers.empty()) {
5428 // Promote the alloca.
5429 PromotableAllocas.insert(NewAI);
5430 } else {
5431 // If we have either PHIs or Selects to speculate, add them to those
5432 // worklists and re-queue the new alloca so that we promote in on the
5433 // next iteration.
5434 SpeculatablePHIs.insert_range(PHIUsers);
5435 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5436 NewSelectsToRewrite.size());
5437 for (auto &&KV : llvm::make_range(
5438 std::make_move_iterator(NewSelectsToRewrite.begin()),
5439 std::make_move_iterator(NewSelectsToRewrite.end())))
5440 SelectsToRewrite.insert(std::move(KV));
5441 Worklist.insert(NewAI);
5442 }
5443 } else {
5444 // Drop any post-promotion work items if promotion didn't happen.
5445 while (PostPromotionWorklist.size() > PPWOldSize)
5446 PostPromotionWorklist.pop_back();
5447
5448 // We couldn't promote and we didn't create a new partition, nothing
5449 // happened.
5450 if (NewAI == &AI)
5451 return nullptr;
5452
5453 // If we can't promote the alloca, iterate on it to check for new
5454 // refinements exposed by splitting the current alloca. Don't iterate on an
5455 // alloca which didn't actually change and didn't get promoted.
5456 Worklist.insert(NewAI);
5457 }
5458
5459 return NewAI;
5460}
5461
5462// There isn't a shared interface to get the "address" parts out of a
5463// dbg.declare and dbg.assign, so provide some wrappers.
5466 return DVR->isKillAddress();
5467 return DVR->isKillLocation();
5468}
5469
5472 return DVR->getAddressExpression();
5473 return DVR->getExpression();
5474}
5475
5476/// Create or replace an existing fragment in a DIExpression with \p Frag.
5477/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5478/// operation, add \p BitExtractOffset to the offset part.
5479///
5480/// Returns the new expression, or nullptr if this fails (see details below).
5481///
5482/// This function is similar to DIExpression::createFragmentExpression except
5483/// for 3 important distinctions:
5484/// 1. The new fragment isn't relative to an existing fragment.
5485/// 2. It assumes the computed location is a memory location. This means we
5486/// don't need to perform checks that creating the fragment preserves the
5487/// expression semantics.
5488/// 3. Existing extract_bits are modified independently of fragment changes
5489/// using \p BitExtractOffset. A change to the fragment offset or size
5490/// may affect a bit extract. But a bit extract offset can change
5491/// independently of the fragment dimensions.
5492///
5493/// Returns the new expression, or nullptr if one couldn't be created.
5494/// Ideally this is only used to signal that a bit-extract has become
5495/// zero-sized (and thus the new debug record has no size and can be
5496/// dropped), however, it fails for other reasons too - see the FIXME below.
5497///
5498/// FIXME: To keep the change that introduces this function NFC it bails
5499/// in some situations unecessarily, e.g. when fragment and bit extract
5500/// sizes differ.
5503 int64_t BitExtractOffset) {
5505 bool HasFragment = false;
5506 bool HasBitExtract = false;
5507
5508 for (auto &Op : Expr->expr_ops()) {
5509 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5510 HasFragment = true;
5511 continue;
5512 }
5513 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5515 HasBitExtract = true;
5516 int64_t ExtractOffsetInBits = Op.getArg(0);
5517 int64_t ExtractSizeInBits = Op.getArg(1);
5518
5519 // DIExpression::createFragmentExpression doesn't know how to handle
5520 // a fragment that is smaller than the extract. Copy the behaviour
5521 // (bail) to avoid non-NFC changes.
5522 // FIXME: Don't do this.
5523 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5524 return nullptr;
5525
5526 assert(BitExtractOffset <= 0);
5527 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5528
5529 // DIExpression::createFragmentExpression doesn't know what to do
5530 // if the new extract starts "outside" the existing one. Copy the
5531 // behaviour (bail) to avoid non-NFC changes.
5532 // FIXME: Don't do this.
5533 if (AdjustedOffset < 0)
5534 return nullptr;
5535
5536 Ops.push_back(Op.getOp());
5537 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5538 Ops.push_back(ExtractSizeInBits);
5539 continue;
5540 }
5541 Op.appendToVector(Ops);
5542 }
5543
5544 // Unsupported by createFragmentExpression, so don't support it here yet to
5545 // preserve NFC-ness.
5546 if (HasFragment && HasBitExtract)
5547 return nullptr;
5548
5549 if (!HasBitExtract) {
5551 Ops.push_back(Frag.OffsetInBits);
5552 Ops.push_back(Frag.SizeInBits);
5553 }
5554 return DIExpression::get(Expr->getContext(), Ops);
5555}
5556
5557/// Insert a new DbgRecord.
5558/// \p Orig Original to copy record type, debug loc and variable from, and
5559/// additionally value and value expression for dbg_assign records.
5560/// \p NewAddr Location's new base address.
5561/// \p NewAddrExpr New expression to apply to address.
5562/// \p BeforeInst Insert position.
5563/// \p NewFragment New fragment (absolute, non-relative).
5564/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5565static void
5567 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5568 std::optional<DIExpression::FragmentInfo> NewFragment,
5569 int64_t BitExtractAdjustment) {
5570 (void)DIB;
5571
5572 // A dbg_assign puts fragment info in the value expression only. The address
5573 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5574 // new fragment info into NewAddrExpr (as it only has one expression).
5575 DIExpression *NewFragmentExpr =
5576 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5577 if (NewFragment)
5578 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5579 BitExtractAdjustment);
5580 if (!NewFragmentExpr)
5581 return;
5582
5583 if (Orig->isDbgDeclare()) {
5585 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5586 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5587 BeforeInst->getIterator());
5588 return;
5589 }
5590
5591 if (Orig->isDbgValue()) {
5593 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5594 // Drop debug information if the expression doesn't start with a
5595 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5596 // describes the address of alloca rather than the value inside the alloca.
5597 if (!NewFragmentExpr->startsWithDeref())
5598 DVR->setKillAddress();
5599 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5600 BeforeInst->getIterator());
5601 return;
5602 }
5603
5604 // Apply a DIAssignID to the store if it doesn't already have it.
5605 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5606 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5608 }
5609
5611 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5612 NewAddrExpr, Orig->getDebugLoc());
5613 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5614 (void)NewAssign;
5615}
5616
5617/// Walks the slices of an alloca and form partitions based on them,
5618/// rewriting each of their uses.
5619bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5620 if (AS.begin() == AS.end())
5621 return false;
5622
5623 unsigned NumPartitions = 0;
5624 bool Changed = false;
5625 const DataLayout &DL = AI.getModule()->getDataLayout();
5626
5627 // First try to pre-split loads and stores.
5628 Changed |= presplitLoadsAndStores(AI, AS);
5629
5630 // Now that we have identified any pre-splitting opportunities,
5631 // mark loads and stores unsplittable except for the following case.
5632 // We leave a slice splittable if all other slices are disjoint or fully
5633 // included in the slice, such as whole-alloca loads and stores.
5634 // If we fail to split these during pre-splitting, we want to force them
5635 // to be rewritten into a partition.
5636 bool IsSorted = true;
5637
5638 uint64_t AllocaSize =
5639 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5640 const uint64_t MaxBitVectorSize = 1024;
5641 if (AllocaSize <= MaxBitVectorSize) {
5642 // If a byte boundary is included in any load or store, a slice starting or
5643 // ending at the boundary is not splittable.
5644 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5645 for (Slice &S : AS)
5646 for (unsigned O = S.beginOffset() + 1;
5647 O < S.endOffset() && O < AllocaSize; O++)
5648 SplittableOffset.reset(O);
5649
5650 for (Slice &S : AS) {
5651 if (!S.isSplittable())
5652 continue;
5653
5654 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5655 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5656 continue;
5657
5658 if (isa<LoadInst>(S.getUse()->getUser()) ||
5659 isa<StoreInst>(S.getUse()->getUser())) {
5660 S.makeUnsplittable();
5661 IsSorted = false;
5662 }
5663 }
5664 } else {
5665 // We only allow whole-alloca splittable loads and stores
5666 // for a large alloca to avoid creating too large BitVector.
5667 for (Slice &S : AS) {
5668 if (!S.isSplittable())
5669 continue;
5670
5671 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5672 continue;
5673
5674 if (isa<LoadInst>(S.getUse()->getUser()) ||
5675 isa<StoreInst>(S.getUse()->getUser())) {
5676 S.makeUnsplittable();
5677 IsSorted = false;
5678 }
5679 }
5680 }
5681
5682 if (!IsSorted)
5684
5685 /// Describes the allocas introduced by rewritePartition in order to migrate
5686 /// the debug info.
5687 struct Fragment {
5688 AllocaInst *Alloca;
5689 uint64_t Offset;
5690 uint64_t Size;
5691 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5692 : Alloca(AI), Offset(O), Size(S) {}
5693 };
5694 SmallVector<Fragment, 4> Fragments;
5695
5696 // Rewrite each partition.
5697 for (auto &P : AS.partitions()) {
5698 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5699 Changed = true;
5700 if (NewAI != &AI) {
5701 uint64_t SizeOfByte = 8;
5702 uint64_t AllocaSize =
5703 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5704 // Don't include any padding.
5705 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5706 Fragments.push_back(
5707 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5708 }
5709 }
5710 ++NumPartitions;
5711 }
5712
5713 NumAllocaPartitions += NumPartitions;
5714 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5715
5716 // Migrate debug information from the old alloca to the new alloca(s)
5717 // and the individual partitions.
5718 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5719 // Can't overlap with undef memory.
5720 if (isKillAddress(DbgVariable))
5721 return;
5722
5723 const Value *DbgPtr = DbgVariable->getAddress();
5725 DbgVariable->getFragmentOrEntireVariable();
5726 // Get the address expression constant offset if one exists and the ops
5727 // that come after it.
5728 int64_t CurrentExprOffsetInBytes = 0;
5729 SmallVector<uint64_t> PostOffsetOps;
5730 if (!getAddressExpression(DbgVariable)
5731 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5732 return; // Couldn't interpret this DIExpression - drop the var.
5733
5734 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5735 int64_t ExtractOffsetInBits = 0;
5736 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5737 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5739 ExtractOffsetInBits = Op.getArg(0);
5740 break;
5741 }
5742 }
5743
5744 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5745 for (auto Fragment : Fragments) {
5746 int64_t OffsetFromLocationInBits;
5747 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5748 // Find the variable fragment that the new alloca slice covers.
5749 // Drop debug info for this variable fragment if we can't compute an
5750 // intersect between it and the alloca slice.
5752 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5753 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5754 NewDbgFragment, OffsetFromLocationInBits))
5755 continue; // Do not migrate this fragment to this slice.
5756
5757 // Zero sized fragment indicates there's no intersect between the variable
5758 // fragment and the alloca slice. Skip this slice for this variable
5759 // fragment.
5760 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5761 continue; // Do not migrate this fragment to this slice.
5762
5763 // No fragment indicates DbgVariable's variable or fragment exactly
5764 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5765 if (!NewDbgFragment)
5766 NewDbgFragment = DbgVariable->getFragment();
5767
5768 // Reduce the new expression offset by the bit-extract offset since
5769 // we'll be keeping that.
5770 int64_t OffestFromNewAllocaInBits =
5771 OffsetFromLocationInBits - ExtractOffsetInBits;
5772 // We need to adjust an existing bit extract if the offset expression
5773 // can't eat the slack (i.e., if the new offset would be negative).
5774 int64_t BitExtractOffset =
5775 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5776 // The magnitude of a negative value indicates the number of bits into
5777 // the existing variable fragment that the memory region begins. The new
5778 // variable fragment already excludes those bits - the new DbgPtr offset
5779 // only needs to be applied if it's positive.
5780 OffestFromNewAllocaInBits =
5781 std::max(int64_t(0), OffestFromNewAllocaInBits);
5782
5783 // Rebuild the expression:
5784 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5785 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5786 // address expression but the value expression instead.
5787 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5788 if (OffestFromNewAllocaInBits > 0) {
5789 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5790 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5791 }
5792
5793 // Remove any existing intrinsics on the new alloca describing
5794 // the variable fragment.
5795 auto RemoveOne = [DbgVariable](auto *OldDII) {
5796 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5797 return LHS->getVariable() == RHS->getVariable() &&
5798 LHS->getDebugLoc()->getInlinedAt() ==
5799 RHS->getDebugLoc()->getInlinedAt();
5800 };
5801 if (SameVariableFragment(OldDII, DbgVariable))
5802 OldDII->eraseFromParent();
5803 };
5804 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5805 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5806 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5807 NewDbgFragment, BitExtractOffset);
5808 }
5809 };
5810
5811 // Migrate debug information from the old alloca to the new alloca(s)
5812 // and the individual partitions.
5813 for_each(findDVRDeclares(&AI), MigrateOne);
5814 for_each(findDVRValues(&AI), MigrateOne);
5815 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5816
5817 return Changed;
5818}
5819
5820/// Clobber a use with poison, deleting the used value if it becomes dead.
5821void SROA::clobberUse(Use &U) {
5822 Value *OldV = U;
5823 // Replace the use with an poison value.
5824 U = PoisonValue::get(OldV->getType());
5825
5826 // Check for this making an instruction dead. We have to garbage collect
5827 // all the dead instructions to ensure the uses of any alloca end up being
5828 // minimal.
5829 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5830 if (isInstructionTriviallyDead(OldI)) {
5831 DeadInsts.push_back(OldI);
5832 }
5833}
5834
5835/// A basic LoadAndStorePromoter that does not remove store nodes.
5837public:
5839 Type *ZeroType)
5840 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5841 bool shouldDelete(Instruction *I) const override {
5842 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5843 }
5844
5846 return UndefValue::get(ZeroType);
5847 }
5848
5849private:
5850 Type *ZeroType;
5851};
5852
5853bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5854 // Look through each "partition", looking for slices with the same start/end
5855 // that do not overlap with any before them. The slices are sorted by
5856 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5857 // sophisticated algorithm that takes splittable slices into account.
5858 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5859 bool AllSameAndValid = true;
5860 Type *PartitionType = nullptr;
5862 uint64_t BeginOffset = 0;
5863 uint64_t EndOffset = 0;
5864
5865 auto Flush = [&]() {
5866 if (AllSameAndValid && !Insts.empty()) {
5867 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5868 << EndOffset << ")\n");
5870 SSAUpdater SSA(&NewPHIs);
5871 Insts.push_back(&AI);
5872 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5873 Promoter.run(Insts);
5874 }
5875 AllSameAndValid = true;
5876 PartitionType = nullptr;
5877 Insts.clear();
5878 };
5879
5880 for (Slice &S : AS) {
5881 auto *User = cast<Instruction>(S.getUse()->getUser());
5882 if (isAssumeLikeIntrinsic(User)) {
5883 LLVM_DEBUG({
5884 dbgs() << "Ignoring slice: ";
5885 AS.print(dbgs(), &S);
5886 });
5887 continue;
5888 }
5889 if (S.beginOffset() >= EndOffset) {
5890 Flush();
5891 BeginOffset = S.beginOffset();
5892 EndOffset = S.endOffset();
5893 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5894 if (AllSameAndValid) {
5895 LLVM_DEBUG({
5896 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5897 << EndOffset << ")";
5898 AS.print(dbgs(), &S);
5899 });
5900 AllSameAndValid = false;
5901 }
5902 EndOffset = std::max(EndOffset, S.endOffset());
5903 continue;
5904 }
5905
5906 if (auto *LI = dyn_cast<LoadInst>(User)) {
5907 Type *UserTy = LI->getType();
5908 // LoadAndStorePromoter requires all the types to be the same.
5909 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5910 AllSameAndValid = false;
5911 PartitionType = UserTy;
5912 Insts.push_back(User);
5913 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5914 Type *UserTy = SI->getValueOperand()->getType();
5915 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5916 AllSameAndValid = false;
5917 PartitionType = UserTy;
5918 Insts.push_back(User);
5919 } else {
5920 AllSameAndValid = false;
5921 }
5922 }
5923
5924 Flush();
5925 return true;
5926}
5927
5928/// Analyze an alloca for SROA.
5929///
5930/// This analyzes the alloca to ensure we can reason about it, builds
5931/// the slices of the alloca, and then hands it off to be split and
5932/// rewritten as needed.
5933std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5934SROA::runOnAlloca(AllocaInst &AI) {
5935 bool Changed = false;
5936 bool CFGChanged = false;
5937
5938 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5939 ++NumAllocasAnalyzed;
5940
5941 // Special case dead allocas, as they're trivial.
5942 if (AI.use_empty()) {
5943 AI.eraseFromParent();
5944 Changed = true;
5945 return {Changed, CFGChanged};
5946 }
5947 const DataLayout &DL = AI.getDataLayout();
5948
5949 // Skip alloca forms that this analysis can't handle.
5950 auto *AT = AI.getAllocatedType();
5951 TypeSize Size = DL.getTypeAllocSize(AT);
5952 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5953 Size.getFixedValue() == 0)
5954 return {Changed, CFGChanged};
5955
5956 // First, split any FCA loads and stores touching this alloca to promote
5957 // better splitting and promotion opportunities.
5958 IRBuilderTy IRB(&AI);
5959 AggLoadStoreRewriter AggRewriter(DL, IRB);
5960 Changed |= AggRewriter.rewrite(AI);
5961
5962 // Build the slices using a recursive instruction-visiting builder.
5963 AllocaSlices AS(DL, AI);
5964 LLVM_DEBUG(AS.print(dbgs()));
5965 if (AS.isEscaped())
5966 return {Changed, CFGChanged};
5967
5968 if (AS.isEscapedReadOnly()) {
5969 Changed |= propagateStoredValuesToLoads(AI, AS);
5970 return {Changed, CFGChanged};
5971 }
5972
5973 for (auto &P : AS.partitions()) {
5974 // For now, we can't split if a field is accessed both via protected field
5975 // and not, because that would mean that we would need to introduce sign and
5976 // auth operations to convert between the protected and non-protected uses,
5977 // and this pass doesn't know how to do that. Also, this case is unlikely to
5978 // occur in normal code.
5979 std::optional<Value *> ProtectedFieldDisc;
5980 auto SliceHasMismatch = [&](Slice &S) {
5981 if (auto *II = dyn_cast<IntrinsicInst>(S.getUse()->getUser()))
5982 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
5983 II->getIntrinsicID() == Intrinsic::lifetime_end)
5984 return false;
5985 if (!ProtectedFieldDisc)
5986 ProtectedFieldDisc = S.ProtectedFieldDisc;
5987 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5988 };
5989 for (Slice &S : P)
5990 if (SliceHasMismatch(S))
5991 return {Changed, CFGChanged};
5992 for (Slice *S : P.splitSliceTails())
5993 if (SliceHasMismatch(*S))
5994 return {Changed, CFGChanged};
5995 }
5996
5997 // Delete all the dead users of this alloca before splitting and rewriting it.
5998 for (Instruction *DeadUser : AS.getDeadUsers()) {
5999 // Free up everything used by this instruction.
6000 for (Use &DeadOp : DeadUser->operands())
6001 clobberUse(DeadOp);
6002
6003 // Now replace the uses of this instruction.
6004 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
6005
6006 // And mark it for deletion.
6007 DeadInsts.push_back(DeadUser);
6008 Changed = true;
6009 }
6010 for (Use *DeadOp : AS.getDeadOperands()) {
6011 clobberUse(*DeadOp);
6012 Changed = true;
6013 }
6014 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
6015 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
6016
6017 DeadInsts.push_back(PFPUser);
6018 Changed = true;
6019 }
6020
6021 // No slices to split. Leave the dead alloca for a later pass to clean up.
6022 if (AS.begin() == AS.end())
6023 return {Changed, CFGChanged};
6024
6025 Changed |= splitAlloca(AI, AS);
6026
6027 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
6028 while (!SpeculatablePHIs.empty())
6029 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
6030
6031 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
6032 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6033 while (!RemainingSelectsToRewrite.empty()) {
6034 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
6035 CFGChanged |=
6036 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
6037 }
6038
6039 return {Changed, CFGChanged};
6040}
6041
6042/// Delete the dead instructions accumulated in this run.
6043///
6044/// Recursively deletes the dead instructions we've accumulated. This is done
6045/// at the very end to maximize locality of the recursive delete and to
6046/// minimize the problems of invalidated instruction pointers as such pointers
6047/// are used heavily in the intermediate stages of the algorithm.
6048///
6049/// We also record the alloca instructions deleted here so that they aren't
6050/// subsequently handed to mem2reg to promote.
6051bool SROA::deleteDeadInstructions(
6052 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6053 bool Changed = false;
6054 while (!DeadInsts.empty()) {
6055 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
6056 if (!I)
6057 continue;
6058 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
6059
6060 // If the instruction is an alloca, find the possible dbg.declare connected
6061 // to it, and remove it too. We must do this before calling RAUW or we will
6062 // not be able to find it.
6063 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6064 DeletedAllocas.insert(AI);
6065 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
6066 OldDII->eraseFromParent();
6067 }
6068
6070 I->replaceAllUsesWith(UndefValue::get(I->getType()));
6071
6072 for (Use &Operand : I->operands())
6073 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
6074 // Zero out the operand and see if it becomes trivially dead.
6075 Operand = nullptr;
6077 DeadInsts.push_back(U);
6078 }
6079
6080 ++NumDeleted;
6081 I->eraseFromParent();
6082 Changed = true;
6083 }
6084 return Changed;
6085}
6086/// Promote the allocas, using the best available technique.
6087///
6088/// This attempts to promote whatever allocas have been identified as viable in
6089/// the PromotableAllocas list. If that list is empty, there is nothing to do.
6090/// This function returns whether any promotion occurred.
6091bool SROA::promoteAllocas() {
6092 if (PromotableAllocas.empty())
6093 return false;
6094
6095 if (SROASkipMem2Reg) {
6096 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
6097 } else {
6098 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
6099 NumPromoted += PromotableAllocas.size();
6100 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6101 }
6102
6103 PromotableAllocas.clear();
6104 return true;
6105}
6106
6107std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
6108 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
6109
6110 const DataLayout &DL = F.getDataLayout();
6111 BasicBlock &EntryBB = F.getEntryBlock();
6112 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
6113 I != E; ++I) {
6114 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6115 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6117 PromotableAllocas.insert(AI);
6118 else
6119 Worklist.insert(AI);
6120 }
6121 }
6122
6123 bool Changed = false;
6124 bool CFGChanged = false;
6125 // A set of deleted alloca instruction pointers which should be removed from
6126 // the list of promotable allocas.
6127 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6128
6129 do {
6130 while (!Worklist.empty()) {
6131 auto [IterationChanged, IterationCFGChanged] =
6132 runOnAlloca(*Worklist.pop_back_val());
6133 Changed |= IterationChanged;
6134 CFGChanged |= IterationCFGChanged;
6135
6136 Changed |= deleteDeadInstructions(DeletedAllocas);
6137
6138 // Remove the deleted allocas from various lists so that we don't try to
6139 // continue processing them.
6140 if (!DeletedAllocas.empty()) {
6141 Worklist.set_subtract(DeletedAllocas);
6142 PostPromotionWorklist.set_subtract(DeletedAllocas);
6143 PromotableAllocas.set_subtract(DeletedAllocas);
6144 DeletedAllocas.clear();
6145 }
6146 }
6147
6148 Changed |= promoteAllocas();
6149
6150 Worklist = PostPromotionWorklist;
6151 PostPromotionWorklist.clear();
6152 } while (!Worklist.empty());
6153
6154 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6155 assert((!CFGChanged || !PreserveCFG) &&
6156 "Should not have modified the CFG when told to preserve it.");
6157
6158 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
6159 for (auto &BB : F) {
6161 }
6162 }
6163
6164 return {Changed, CFGChanged};
6165}
6166
6170 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6171 auto [Changed, CFGChanged] =
6172 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6173 if (!Changed)
6174 return PreservedAnalyses::all();
6176 if (!CFGChanged)
6179 return PA;
6180}
6181
6183 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6184 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
6185 OS, MapClassName2PassName);
6186 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
6187 : "<modify-cfg>");
6188}
6189
6190SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {}
6191
6192namespace {
6193
6194/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
6195class SROALegacyPass : public FunctionPass {
6197
6198public:
6199 static char ID;
6200
6204 }
6205
6206 bool runOnFunction(Function &F) override {
6207 if (skipFunction(F))
6208 return false;
6209
6210 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6211 AssumptionCache &AC =
6212 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6213 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6214 auto [Changed, _] =
6215 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6216 return Changed;
6217 }
6218
6219 void getAnalysisUsage(AnalysisUsage &AU) const override {
6220 AU.addRequired<AssumptionCacheTracker>();
6221 AU.addRequired<DominatorTreeWrapperPass>();
6222 AU.addPreserved<GlobalsAAWrapperPass>();
6223 AU.addPreserved<DominatorTreeWrapperPass>();
6224 }
6225
6226 StringRef getPassName() const override { return "SROA"; }
6227};
6228
6229} // end anonymous namespace
6230
6231char SROALegacyPass::ID = 0;
6232
6237
6238INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
6239 "Scalar Replacement Of Aggregates", false, false)
6242INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:593
This file implements a map that provides insertion order iteration.
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:343
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
Definition SROA.cpp:5236
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2375
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1960
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2226
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1526
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4579
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:278
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5501
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2618
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2151
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1949
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2457
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2651
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1020
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1915
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1848
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:1007
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5464
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2673
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2552
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1666
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2331
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:324
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1592
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2593
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5566
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1809
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2747
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5470
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4617
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1970
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1747
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition SROA.cpp:2060
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition SROA.cpp:1032
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1052
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:820
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:967
partition_iterator & operator++()
Definition SROA.cpp:987
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition SROA.cpp:5841
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition SROA.cpp:5838
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition SROA.cpp:5845
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Definition DebugLoc.cpp:67
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition Operator.cpp:114
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2794
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, StringRef Name=StringRef())
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVMContext & getContext() const
Definition Metadata.h:1242
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6167
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6182
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6190
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition StringRef.h:345
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:293
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:723
TypeSize getSizeInBytes() const
Definition DataLayout.h:732
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
TypeSize getSizeInBits() const
Definition DataLayout.h:734
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
element_iterator element_end() const
element_iterator element_begin() const
bool isPacked() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:293
op_iterator op_begin()
Definition User.h:285
const Use & getOperandUse(unsigned i) const
Definition User.h:246
Value * getOperand(unsigned i) const
Definition User.h:233
op_iterator op_end()
Definition User.h:287
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:819
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition Value.cpp:218
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition Dwarf.h:151
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition Dwarf.h:144
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition Dwarf.h:150
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2106
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2114
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1501
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
void * PointerTy
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2176
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
bool capturesFullProvenance(CaptureComponents CC)
Definition ModRef.h:341
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:435
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition DebugInfo.cpp:82
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:48
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition SROA.cpp:6233
SROAOptions
Definition SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NDEBUG
Definition regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition Metadata.h:820
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Describes an element of a Bitfield.
Definition Bitfields.h:176
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70