LLVM  7.0.0svn
StackColoring.cpp
Go to the documentation of this file.
1 //===- StackColoring.cpp --------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements the stack-coloring optimization that looks for
11 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12 // which represent the possible lifetime of stack slots. It attempts to
13 // merge disjoint stack slots and reduce the used stack space.
14 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 //
16 // TODO: In the future we plan to improve stack coloring in the following ways:
17 // 1. Allow merging multiple small slots into a single larger slot at different
18 // offsets.
19 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20 // spill slots.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
39 #include "llvm/CodeGen/Passes.h"
45 #include "llvm/Config/llvm-config.h"
46 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Pass.h"
54 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <limits>
62 #include <memory>
63 #include <utility>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "stack-coloring"
68 
69 static cl::opt<bool>
70 DisableColoring("no-stack-coloring",
71  cl::init(false), cl::Hidden,
72  cl::desc("Disable stack coloring"));
73 
74 /// The user may write code that uses allocas outside of the declared lifetime
75 /// zone. This can happen when the user returns a reference to a local
76 /// data-structure. We can detect these cases and decide not to optimize the
77 /// code. If this flag is enabled, we try to save the user. This option
78 /// is treated as overriding LifetimeStartOnFirstUse below.
79 static cl::opt<bool>
80 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
81  cl::init(false), cl::Hidden,
82  cl::desc("Do not optimize lifetime zones that "
83  "are broken"));
84 
85 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
86 /// use of stack slot as start of slot lifetime, as opposed to looking
87 /// for LIFETIME_START marker). See "Implementation notes" below for
88 /// more info.
89 static cl::opt<bool>
90 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
91  cl::init(true), cl::Hidden,
92  cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
93 
94 
95 STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
96 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
97 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
98 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
99 
100 //===----------------------------------------------------------------------===//
101 // StackColoring Pass
102 //===----------------------------------------------------------------------===//
103 //
104 // Stack Coloring reduces stack usage by merging stack slots when they
105 // can't be used together. For example, consider the following C program:
106 //
107 // void bar(char *, int);
108 // void foo(bool var) {
109 // A: {
110 // char z[4096];
111 // bar(z, 0);
112 // }
113 //
114 // char *p;
115 // char x[4096];
116 // char y[4096];
117 // if (var) {
118 // p = x;
119 // } else {
120 // bar(y, 1);
121 // p = y + 1024;
122 // }
123 // B:
124 // bar(p, 2);
125 // }
126 //
127 // Naively-compiled, this program would use 12k of stack space. However, the
128 // stack slot corresponding to `z` is always destroyed before either of the
129 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
130 // is true, while `y` is only used if `var` is false. So in no time are 2
131 // of the stack slots used together, and therefore we can merge them,
132 // compiling the function using only a single 4k alloca:
133 //
134 // void foo(bool var) { // equivalent
135 // char x[4096];
136 // char *p;
137 // bar(x, 0);
138 // if (var) {
139 // p = x;
140 // } else {
141 // bar(x, 1);
142 // p = x + 1024;
143 // }
144 // bar(p, 2);
145 // }
146 //
147 // This is an important optimization if we want stack space to be under
148 // control in large functions, both open-coded ones and ones created by
149 // inlining.
150 //
151 // Implementation Notes:
152 // ---------------------
153 //
154 // An important part of the above reasoning is that `z` can't be accessed
155 // while the latter 2 calls to `bar` are running. This is justified because
156 // `z`'s lifetime is over after we exit from block `A:`, so any further
157 // accesses to it would be UB. The way we represent this information
158 // in LLVM is by having frontends delimit blocks with `lifetime.start`
159 // and `lifetime.end` intrinsics.
160 //
161 // The effect of these intrinsics seems to be as follows (maybe I should
162 // specify this in the reference?):
163 //
164 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
165 // lifetime intrinsic refers to that stack slot, in which case
166 // it is marked as *in-scope*.
167 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
168 // the stack slot is overwritten with `undef`.
169 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
170 // L4) on function exit, all stack slots are marked as *out-of-scope*.
171 // L5) `lifetime.end` is a no-op when called on a slot that is already
172 // *out-of-scope*.
173 // L6) memory accesses to *out-of-scope* stack slots are UB.
174 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
175 // are invalidated, unless the slot is "degenerate". This is used to
176 // justify not marking slots as in-use until the pointer to them is
177 // used, but feels a bit hacky in the presence of things like LICM. See
178 // the "Degenerate Slots" section for more details.
179 //
180 // Now, let's ground stack coloring on these rules. We'll define a slot
181 // as *in-use* at a (dynamic) point in execution if it either can be
182 // written to at that point, or if it has a live and non-undef content
183 // at that point.
184 //
185 // Obviously, slots that are never *in-use* together can be merged, and
186 // in our example `foo`, the slots for `x`, `y` and `z` are never
187 // in-use together (of course, sometimes slots that *are* in-use together
188 // might still be mergable, but we don't care about that here).
189 //
190 // In this implementation, we successively merge pairs of slots that are
191 // not *in-use* together. We could be smarter - for example, we could merge
192 // a single large slot with 2 small slots, or we could construct the
193 // interference graph and run a "smart" graph coloring algorithm, but with
194 // that aside, how do we find out whether a pair of slots might be *in-use*
195 // together?
196 //
197 // From our rules, we see that *out-of-scope* slots are never *in-use*,
198 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
199 // until their address is taken. Therefore, we can approximate slot activity
200 // using dataflow.
201 //
202 // A subtle point: naively, we might try to figure out which pairs of
203 // stack-slots interfere by propagating `S in-use` through the CFG for every
204 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
205 // which they are both *in-use*.
206 //
207 // That is sound, but overly conservative in some cases: in our (artificial)
208 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
209 // as `x` is only in use if we came in from the `var` edge and `y` only
210 // if we came from the `!var` edge, they still can't be in use together.
211 // See PR32488 for an important real-life case.
212 //
213 // If we wanted to find all points of interference precisely, we could
214 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
215 // would be precise, but requires propagating `O(n^2)` dataflow facts.
216 //
217 // However, we aren't interested in the *set* of points of interference
218 // between 2 stack slots, only *whether* there *is* such a point. So we
219 // can rely on a little trick: for `S` and `T` to be in-use together,
220 // one of them needs to become in-use while the other is in-use (or
221 // they might both become in use simultaneously). We can check this
222 // by also keeping track of the points at which a stack slot might *start*
223 // being in-use.
224 //
225 // Exact first use:
226 // ----------------
227 //
228 // Consider the following motivating example:
229 //
230 // int foo() {
231 // char b1[1024], b2[1024];
232 // if (...) {
233 // char b3[1024];
234 // <uses of b1, b3>;
235 // return x;
236 // } else {
237 // char b4[1024], b5[1024];
238 // <uses of b2, b4, b5>;
239 // return y;
240 // }
241 // }
242 //
243 // In the code above, "b3" and "b4" are declared in distinct lexical
244 // scopes, meaning that it is easy to prove that they can share the
245 // same stack slot. Variables "b1" and "b2" are declared in the same
246 // scope, meaning that from a lexical point of view, their lifetimes
247 // overlap. From a control flow pointer of view, however, the two
248 // variables are accessed in disjoint regions of the CFG, thus it
249 // should be possible for them to share the same stack slot. An ideal
250 // stack allocation for the function above would look like:
251 //
252 // slot 0: b1, b2
253 // slot 1: b3, b4
254 // slot 2: b5
255 //
256 // Achieving this allocation is tricky, however, due to the way
257 // lifetime markers are inserted. Here is a simplified view of the
258 // control flow graph for the code above:
259 //
260 // +------ block 0 -------+
261 // 0| LIFETIME_START b1, b2 |
262 // 1| <test 'if' condition> |
263 // +-----------------------+
264 // ./ \.
265 // +------ block 1 -------+ +------ block 2 -------+
266 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
267 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
268 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
269 // +-----------------------+ +-----------------------+
270 // \. /.
271 // +------ block 3 -------+
272 // 8| <cleanupcode> |
273 // 9| LIFETIME_END b1, b2 |
274 // 10| return |
275 // +-----------------------+
276 //
277 // If we create live intervals for the variables above strictly based
278 // on the lifetime markers, we'll get the set of intervals on the
279 // left. If we ignore the lifetime start markers and instead treat a
280 // variable's lifetime as beginning with the first reference to the
281 // var, then we get the intervals on the right.
282 //
283 // LIFETIME_START First Use
284 // b1: [0,9] [3,4] [8,9]
285 // b2: [0,9] [6,9]
286 // b3: [2,4] [3,4]
287 // b4: [5,7] [6,7]
288 // b5: [5,7] [6,7]
289 //
290 // For the intervals on the left, the best we can do is overlap two
291 // variables (b3 and b4, for example); this gives us a stack size of
292 // 4*1024 bytes, not ideal. When treating first-use as the start of a
293 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
294 // byte stack (better).
295 //
296 // Degenerate Slots:
297 // -----------------
298 //
299 // Relying entirely on first-use of stack slots is problematic,
300 // however, due to the fact that optimizations can sometimes migrate
301 // uses of a variable outside of its lifetime start/end region. Here
302 // is an example:
303 //
304 // int bar() {
305 // char b1[1024], b2[1024];
306 // if (...) {
307 // <uses of b2>
308 // return y;
309 // } else {
310 // <uses of b1>
311 // while (...) {
312 // char b3[1024];
313 // <uses of b3>
314 // }
315 // }
316 // }
317 //
318 // Before optimization, the control flow graph for the code above
319 // might look like the following:
320 //
321 // +------ block 0 -------+
322 // 0| LIFETIME_START b1, b2 |
323 // 1| <test 'if' condition> |
324 // +-----------------------+
325 // ./ \.
326 // +------ block 1 -------+ +------- block 2 -------+
327 // 2| <uses of b2> | 3| <uses of b1> |
328 // +-----------------------+ +-----------------------+
329 // | |
330 // | +------- block 3 -------+ <-\.
331 // | 4| <while condition> | |
332 // | +-----------------------+ |
333 // | / | |
334 // | / +------- block 4 -------+
335 // \ / 5| LIFETIME_START b3 | |
336 // \ / 6| <uses of b3> | |
337 // \ / 7| LIFETIME_END b3 | |
338 // \ | +------------------------+ |
339 // \ | \ /
340 // +------ block 5 -----+ \---------------
341 // 8| <cleanupcode> |
342 // 9| LIFETIME_END b1, b2 |
343 // 10| return |
344 // +---------------------+
345 //
346 // During optimization, however, it can happen that an instruction
347 // computing an address in "b3" (for example, a loop-invariant GEP) is
348 // hoisted up out of the loop from block 4 to block 2. [Note that
349 // this is not an actual load from the stack, only an instruction that
350 // computes the address to be loaded]. If this happens, there is now a
351 // path leading from the first use of b3 to the return instruction
352 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
353 // now larger than if we were computing live intervals strictly based
354 // on lifetime markers. In the example above, this lengthened lifetime
355 // would mean that it would appear illegal to overlap b3 with b2.
356 //
357 // To deal with this such cases, the code in ::collectMarkers() below
358 // tries to identify "degenerate" slots -- those slots where on a single
359 // forward pass through the CFG we encounter a first reference to slot
360 // K before we hit the slot K lifetime start marker. For such slots,
361 // we fall back on using the lifetime start marker as the beginning of
362 // the variable's lifetime. NB: with this implementation, slots can
363 // appear degenerate in cases where there is unstructured control flow:
364 //
365 // if (q) goto mid;
366 // if (x > 9) {
367 // int b[100];
368 // memcpy(&b[0], ...);
369 // mid: b[k] = ...;
370 // abc(&b);
371 // }
372 //
373 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
374 // before visiting the memcpy block (which will contain the lifetime start
375 // for "b" then it will appear that 'b' has a degenerate lifetime.
376 //
377 
378 namespace {
379 
380 /// StackColoring - A machine pass for merging disjoint stack allocations,
381 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
382 class StackColoring : public MachineFunctionPass {
383  MachineFrameInfo *MFI;
384  MachineFunction *MF;
385 
386  /// A class representing liveness information for a single basic block.
387  /// Each bit in the BitVector represents the liveness property
388  /// for a different stack slot.
389  struct BlockLifetimeInfo {
390  /// Which slots BEGINs in each basic block.
391  BitVector Begin;
392 
393  /// Which slots ENDs in each basic block.
394  BitVector End;
395 
396  /// Which slots are marked as LIVE_IN, coming into each basic block.
397  BitVector LiveIn;
398 
399  /// Which slots are marked as LIVE_OUT, coming out of each basic block.
400  BitVector LiveOut;
401  };
402 
403  /// Maps active slots (per bit) for each basic block.
405  LivenessMap BlockLiveness;
406 
407  /// Maps serial numbers to basic blocks.
409 
410  /// Maps basic blocks to a serial number.
411  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
412 
413  /// Maps slots to their use interval. Outside of this interval, slots
414  /// values are either dead or `undef` and they will not be written to.
416 
417  /// Maps slots to the points where they can become in-use.
419 
420  /// VNInfo is used for the construction of LiveIntervals.
421  VNInfo::Allocator VNInfoAllocator;
422 
423  /// SlotIndex analysis object.
424  SlotIndexes *Indexes;
425 
426  /// The stack protector object.
428 
429  /// The list of lifetime markers found. These markers are to be removed
430  /// once the coloring is done.
432 
433  /// Record the FI slots for which we have seen some sort of
434  /// lifetime marker (either start or end).
435  BitVector InterestingSlots;
436 
437  /// FI slots that need to be handled conservatively (for these
438  /// slots lifetime-start-on-first-use is disabled).
439  BitVector ConservativeSlots;
440 
441  /// Number of iterations taken during data flow analysis.
442  unsigned NumIterations;
443 
444 public:
445  static char ID;
446 
447  StackColoring() : MachineFunctionPass(ID) {
449  }
450 
451  void getAnalysisUsage(AnalysisUsage &AU) const override;
452  bool runOnMachineFunction(MachineFunction &MF) override;
453 
454 private:
455  /// Used in collectMarkers
456  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
457 
458  /// Debug.
459  void dump() const;
460  void dumpIntervals() const;
461  void dumpBB(MachineBasicBlock *MBB) const;
462  void dumpBV(const char *tag, const BitVector &BV) const;
463 
464  /// Removes all of the lifetime marker instructions from the function.
465  /// \returns true if any markers were removed.
466  bool removeAllMarkers();
467 
468  /// Scan the machine function and find all of the lifetime markers.
469  /// Record the findings in the BEGIN and END vectors.
470  /// \returns the number of markers found.
471  unsigned collectMarkers(unsigned NumSlot);
472 
473  /// Perform the dataflow calculation and calculate the lifetime for each of
474  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
475  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
476  /// in and out blocks.
477  void calculateLocalLiveness();
478 
479  /// Returns TRUE if we're using the first-use-begins-lifetime method for
480  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
481  bool applyFirstUse(int Slot) {
483  return false;
484  if (ConservativeSlots.test(Slot))
485  return false;
486  return true;
487  }
488 
489  /// Examines the specified instruction and returns TRUE if the instruction
490  /// represents the start or end of an interesting lifetime. The slot or slots
491  /// starting or ending are added to the vector "slots" and "isStart" is set
492  /// accordingly.
493  /// \returns True if inst contains a lifetime start or end
494  bool isLifetimeStartOrEnd(const MachineInstr &MI,
496  bool &isStart);
497 
498  /// Construct the LiveIntervals for the slots.
499  void calculateLiveIntervals(unsigned NumSlots);
500 
501  /// Go over the machine function and change instructions which use stack
502  /// slots to use the joint slots.
503  void remapInstructions(DenseMap<int, int> &SlotRemap);
504 
505  /// The input program may contain instructions which are not inside lifetime
506  /// markers. This can happen due to a bug in the compiler or due to a bug in
507  /// user code (for example, returning a reference to a local variable).
508  /// This procedure checks all of the instructions in the function and
509  /// invalidates lifetime ranges which do not contain all of the instructions
510  /// which access that frame slot.
511  void removeInvalidSlotRanges();
512 
513  /// Map entries which point to other entries to their destination.
514  /// A->B->C becomes A->C.
515  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
516 };
517 
518 } // end anonymous namespace
519 
520 char StackColoring::ID = 0;
521 
523 
524 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
525  "Merge disjoint stack slots", false, false)
529  "Merge disjoint stack slots", false, false)
530 
531 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
532  AU.addRequired<SlotIndexes>();
533  AU.addRequired<StackProtector>();
535 }
536 
537 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
538 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
539  const BitVector &BV) const {
540  dbgs() << tag << " : { ";
541  for (unsigned I = 0, E = BV.size(); I != E; ++I)
542  dbgs() << BV.test(I) << " ";
543  dbgs() << "}\n";
544 }
545 
546 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
547  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
548  assert(BI != BlockLiveness.end() && "Block not found");
549  const BlockLifetimeInfo &BlockInfo = BI->second;
550 
551  dumpBV("BEGIN", BlockInfo.Begin);
552  dumpBV("END", BlockInfo.End);
553  dumpBV("LIVE_IN", BlockInfo.LiveIn);
554  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
555 }
556 
558  for (MachineBasicBlock *MBB : depth_first(MF)) {
559  dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
560  << MBB->getName() << "]\n";
561  dumpBB(MBB);
562  }
563 }
564 
565 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
566  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
567  dbgs() << "Interval[" << I << "]:\n";
568  Intervals[I]->dump();
569  }
570 }
571 #endif
572 
573 static inline int getStartOrEndSlot(const MachineInstr &MI)
574 {
577  "Expected LIFETIME_START or LIFETIME_END op");
578  const MachineOperand &MO = MI.getOperand(0);
579  int Slot = MO.getIndex();
580  if (Slot >= 0)
581  return Slot;
582  return -1;
583 }
584 
585 // At the moment the only way to end a variable lifetime is with
586 // a VARIABLE_LIFETIME op (which can't contain a start). If things
587 // change and the IR allows for a single inst that both begins
588 // and ends lifetime(s), this interface will need to be reworked.
589 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
591  bool &isStart) {
594  int Slot = getStartOrEndSlot(MI);
595  if (Slot < 0)
596  return false;
597  if (!InterestingSlots.test(Slot))
598  return false;
599  slots.push_back(Slot);
601  isStart = false;
602  return true;
603  }
604  if (!applyFirstUse(Slot)) {
605  isStart = true;
606  return true;
607  }
609  if (!MI.isDebugInstr()) {
610  bool found = false;
611  for (const MachineOperand &MO : MI.operands()) {
612  if (!MO.isFI())
613  continue;
614  int Slot = MO.getIndex();
615  if (Slot<0)
616  continue;
617  if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
618  slots.push_back(Slot);
619  found = true;
620  }
621  }
622  if (found) {
623  isStart = true;
624  return true;
625  }
626  }
627  }
628  return false;
629 }
630 
631 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
632  unsigned MarkersFound = 0;
633  BlockBitVecMap SeenStartMap;
634  InterestingSlots.clear();
635  InterestingSlots.resize(NumSlot);
636  ConservativeSlots.clear();
637  ConservativeSlots.resize(NumSlot);
638 
639  // number of start and end lifetime ops for each slot
640  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
641  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
642 
643  // Step 1: collect markers and populate the "InterestingSlots"
644  // and "ConservativeSlots" sets.
645  for (MachineBasicBlock *MBB : depth_first(MF)) {
646  // Compute the set of slots for which we've seen a START marker but have
647  // not yet seen an END marker at this point in the walk (e.g. on entry
648  // to this bb).
649  BitVector BetweenStartEnd;
650  BetweenStartEnd.resize(NumSlot);
652  PE = MBB->pred_end(); PI != PE; ++PI) {
653  BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
654  if (I != SeenStartMap.end()) {
655  BetweenStartEnd |= I->second;
656  }
657  }
658 
659  // Walk the instructions in the block to look for start/end ops.
660  for (MachineInstr &MI : *MBB) {
663  int Slot = getStartOrEndSlot(MI);
664  if (Slot < 0)
665  continue;
666  InterestingSlots.set(Slot);
668  BetweenStartEnd.set(Slot);
669  NumStartLifetimes[Slot] += 1;
670  } else {
671  BetweenStartEnd.reset(Slot);
672  NumEndLifetimes[Slot] += 1;
673  }
674  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
675  if (Allocation) {
676  LLVM_DEBUG(dbgs() << "Found a lifetime ");
678  ? "start"
679  : "end"));
680  LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
681  LLVM_DEBUG(dbgs()
682  << " with allocation: " << Allocation->getName() << "\n");
683  }
684  Markers.push_back(&MI);
685  MarkersFound += 1;
686  } else {
687  for (const MachineOperand &MO : MI.operands()) {
688  if (!MO.isFI())
689  continue;
690  int Slot = MO.getIndex();
691  if (Slot < 0)
692  continue;
693  if (! BetweenStartEnd.test(Slot)) {
694  ConservativeSlots.set(Slot);
695  }
696  }
697  }
698  }
699  BitVector &SeenStart = SeenStartMap[MBB];
700  SeenStart |= BetweenStartEnd;
701  }
702  if (!MarkersFound) {
703  return 0;
704  }
705 
706  // PR27903: slots with multiple start or end lifetime ops are not
707  // safe to enable for "lifetime-start-on-first-use".
708  for (unsigned slot = 0; slot < NumSlot; ++slot)
709  if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
710  ConservativeSlots.set(slot);
711  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
712 
713  // Step 2: compute begin/end sets for each block
714 
715  // NOTE: We use a depth-first iteration to ensure that we obtain a
716  // deterministic numbering.
717  for (MachineBasicBlock *MBB : depth_first(MF)) {
718  // Assign a serial number to this basic block.
719  BasicBlocks[MBB] = BasicBlockNumbering.size();
720  BasicBlockNumbering.push_back(MBB);
721 
722  // Keep a reference to avoid repeated lookups.
723  BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
724 
725  BlockInfo.Begin.resize(NumSlot);
726  BlockInfo.End.resize(NumSlot);
727 
729  for (MachineInstr &MI : *MBB) {
730  bool isStart = false;
731  slots.clear();
732  if (isLifetimeStartOrEnd(MI, slots, isStart)) {
733  if (!isStart) {
734  assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
735  int Slot = slots[0];
736  if (BlockInfo.Begin.test(Slot)) {
737  BlockInfo.Begin.reset(Slot);
738  }
739  BlockInfo.End.set(Slot);
740  } else {
741  for (auto Slot : slots) {
742  LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
743  LLVM_DEBUG(dbgs()
744  << " at " << printMBBReference(*MBB) << " index ");
745  LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
746  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
747  if (Allocation) {
748  LLVM_DEBUG(dbgs()
749  << " with allocation: " << Allocation->getName());
750  }
751  LLVM_DEBUG(dbgs() << "\n");
752  if (BlockInfo.End.test(Slot)) {
753  BlockInfo.End.reset(Slot);
754  }
755  BlockInfo.Begin.set(Slot);
756  }
757  }
758  }
759  }
760  }
761 
762  // Update statistics.
763  NumMarkerSeen += MarkersFound;
764  return MarkersFound;
765 }
766 
767 void StackColoring::calculateLocalLiveness() {
768  unsigned NumIters = 0;
769  bool changed = true;
770  while (changed) {
771  changed = false;
772  ++NumIters;
773 
774  for (const MachineBasicBlock *BB : BasicBlockNumbering) {
775  // Use an iterator to avoid repeated lookups.
776  LivenessMap::iterator BI = BlockLiveness.find(BB);
777  assert(BI != BlockLiveness.end() && "Block not found");
778  BlockLifetimeInfo &BlockInfo = BI->second;
779 
780  // Compute LiveIn by unioning together the LiveOut sets of all preds.
781  BitVector LocalLiveIn;
782  for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
783  PE = BB->pred_end(); PI != PE; ++PI) {
784  LivenessMap::const_iterator I = BlockLiveness.find(*PI);
785  // PR37130: transformations prior to stack coloring can
786  // sometimes leave behind statically unreachable blocks; these
787  // can be safely skipped here.
788  if (I != BlockLiveness.end())
789  LocalLiveIn |= I->second.LiveOut;
790  }
791 
792  // Compute LiveOut by subtracting out lifetimes that end in this
793  // block, then adding in lifetimes that begin in this block. If
794  // we have both BEGIN and END markers in the same basic block
795  // then we know that the BEGIN marker comes after the END,
796  // because we already handle the case where the BEGIN comes
797  // before the END when collecting the markers (and building the
798  // BEGIN/END vectors).
799  BitVector LocalLiveOut = LocalLiveIn;
800  LocalLiveOut.reset(BlockInfo.End);
801  LocalLiveOut |= BlockInfo.Begin;
802 
803  // Update block LiveIn set, noting whether it has changed.
804  if (LocalLiveIn.test(BlockInfo.LiveIn)) {
805  changed = true;
806  BlockInfo.LiveIn |= LocalLiveIn;
807  }
808 
809  // Update block LiveOut set, noting whether it has changed.
810  if (LocalLiveOut.test(BlockInfo.LiveOut)) {
811  changed = true;
812  BlockInfo.LiveOut |= LocalLiveOut;
813  }
814  }
815  } // while changed.
816 
817  NumIterations = NumIters;
818 }
819 
820 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
822  SmallVector<bool, 16> DefinitelyInUse;
823 
824  // For each block, find which slots are active within this block
825  // and update the live intervals.
826  for (const MachineBasicBlock &MBB : *MF) {
827  Starts.clear();
828  Starts.resize(NumSlots);
829  DefinitelyInUse.clear();
830  DefinitelyInUse.resize(NumSlots);
831 
832  // Start the interval of the slots that we previously found to be 'in-use'.
833  BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
834  for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
835  pos = MBBLiveness.LiveIn.find_next(pos)) {
836  Starts[pos] = Indexes->getMBBStartIdx(&MBB);
837  }
838 
839  // Create the interval for the basic blocks containing lifetime begin/end.
840  for (const MachineInstr &MI : MBB) {
842  bool IsStart = false;
843  if (!isLifetimeStartOrEnd(MI, slots, IsStart))
844  continue;
845  SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
846  for (auto Slot : slots) {
847  if (IsStart) {
848  // If a slot is already definitely in use, we don't have to emit
849  // a new start marker because there is already a pre-existing
850  // one.
851  if (!DefinitelyInUse[Slot]) {
852  LiveStarts[Slot].push_back(ThisIndex);
853  DefinitelyInUse[Slot] = true;
854  }
855  if (!Starts[Slot].isValid())
856  Starts[Slot] = ThisIndex;
857  } else {
858  if (Starts[Slot].isValid()) {
859  VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
860  Intervals[Slot]->addSegment(
861  LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
862  Starts[Slot] = SlotIndex(); // Invalidate the start index
863  DefinitelyInUse[Slot] = false;
864  }
865  }
866  }
867  }
868 
869  // Finish up started segments
870  for (unsigned i = 0; i < NumSlots; ++i) {
871  if (!Starts[i].isValid())
872  continue;
873 
874  SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
875  VNInfo *VNI = Intervals[i]->getValNumInfo(0);
876  Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
877  }
878  }
879 }
880 
881 bool StackColoring::removeAllMarkers() {
882  unsigned Count = 0;
883  for (MachineInstr *MI : Markers) {
884  MI->eraseFromParent();
885  Count++;
886  }
887  Markers.clear();
888 
889  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
890  return Count;
891 }
892 
893 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
894  unsigned FixedInstr = 0;
895  unsigned FixedMemOp = 0;
896  unsigned FixedDbg = 0;
897 
898  // Remap debug information that refers to stack slots.
899  for (auto &VI : MF->getVariableDbgInfo()) {
900  if (!VI.Var)
901  continue;
902  if (SlotRemap.count(VI.Slot)) {
903  LLVM_DEBUG(dbgs() << "Remapping debug info for ["
904  << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
905  VI.Slot = SlotRemap[VI.Slot];
906  FixedDbg++;
907  }
908  }
909 
910  // Keep a list of *allocas* which need to be remapped.
912 
913  // Keep a list of allocas which has been affected by the remap.
915 
916  for (const std::pair<int, int> &SI : SlotRemap) {
917  const AllocaInst *From = MFI->getObjectAllocation(SI.first);
918  const AllocaInst *To = MFI->getObjectAllocation(SI.second);
919  assert(To && From && "Invalid allocation object");
920  Allocas[From] = To;
921 
922  // AA might be used later for instruction scheduling, and we need it to be
923  // able to deduce the correct aliasing releationships between pointers
924  // derived from the alloca being remapped and the target of that remapping.
925  // The only safe way, without directly informing AA about the remapping
926  // somehow, is to directly update the IR to reflect the change being made
927  // here.
928  Instruction *Inst = const_cast<AllocaInst *>(To);
929  if (From->getType() != To->getType()) {
930  BitCastInst *Cast = new BitCastInst(Inst, From->getType());
931  Cast->insertAfter(Inst);
932  Inst = Cast;
933  }
934 
935  // We keep both slots to maintain AliasAnalysis metadata later.
936  MergedAllocas.insert(From);
937  MergedAllocas.insert(To);
938 
939  // Allow the stack protector to adjust its value map to account for the
940  // upcoming replacement.
941  SP->adjustForColoring(From, To);
942 
943  // The new alloca might not be valid in a llvm.dbg.declare for this
944  // variable, so undef out the use to make the verifier happy.
945  AllocaInst *FromAI = const_cast<AllocaInst *>(From);
946  if (FromAI->isUsedByMetadata())
948  for (auto &Use : FromAI->uses()) {
949  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
950  if (BCI->isUsedByMetadata())
951  ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
952  }
953 
954  // Note that this will not replace uses in MMOs (which we'll update below),
955  // or anywhere else (which is why we won't delete the original
956  // instruction).
957  FromAI->replaceAllUsesWith(Inst);
958  }
959 
960  // Remap all instructions to the new stack slots.
961  for (MachineBasicBlock &BB : *MF)
962  for (MachineInstr &I : BB) {
963  // Skip lifetime markers. We'll remove them soon.
964  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
965  I.getOpcode() == TargetOpcode::LIFETIME_END)
966  continue;
967 
968  // Update the MachineMemOperand to use the new alloca.
969  for (MachineMemOperand *MMO : I.memoperands()) {
970  // We've replaced IR-level uses of the remapped allocas, so we only
971  // need to replace direct uses here.
972  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
973  if (!AI)
974  continue;
975 
976  if (!Allocas.count(AI))
977  continue;
978 
979  MMO->setValue(Allocas[AI]);
980  FixedMemOp++;
981  }
982 
983  // Update all of the machine instruction operands.
984  for (MachineOperand &MO : I.operands()) {
985  if (!MO.isFI())
986  continue;
987  int FromSlot = MO.getIndex();
988 
989  // Don't touch arguments.
990  if (FromSlot<0)
991  continue;
992 
993  // Only look at mapped slots.
994  if (!SlotRemap.count(FromSlot))
995  continue;
996 
997  // In a debug build, check that the instruction that we are modifying is
998  // inside the expected live range. If the instruction is not inside
999  // the calculated range then it means that the alloca usage moved
1000  // outside of the lifetime markers, or that the user has a bug.
1001  // NOTE: Alloca address calculations which happen outside the lifetime
1002  // zone are okay, despite the fact that we don't have a good way
1003  // for validating all of the usages of the calculation.
1004 #ifndef NDEBUG
1005  bool TouchesMemory = I.mayLoad() || I.mayStore();
1006  // If we *don't* protect the user from escaped allocas, don't bother
1007  // validating the instructions.
1008  if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1009  SlotIndex Index = Indexes->getInstructionIndex(I);
1010  const LiveInterval *Interval = &*Intervals[FromSlot];
1011  assert(Interval->find(Index) != Interval->end() &&
1012  "Found instruction usage outside of live range.");
1013  }
1014 #endif
1015 
1016  // Fix the machine instructions.
1017  int ToSlot = SlotRemap[FromSlot];
1018  MO.setIndex(ToSlot);
1019  FixedInstr++;
1020  }
1021 
1022  // We adjust AliasAnalysis information for merged stack slots.
1023  MachineSDNode::mmo_iterator NewMemOps =
1024  MF->allocateMemRefsArray(I.getNumMemOperands());
1025  unsigned MemOpIdx = 0;
1026  bool ReplaceMemOps = false;
1027  for (MachineMemOperand *MMO : I.memoperands()) {
1028  // If this memory location can be a slot remapped here,
1029  // we remove AA information.
1030  bool MayHaveConflictingAAMD = false;
1031  if (MMO->getAAInfo()) {
1032  if (const Value *MMOV = MMO->getValue()) {
1034  getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1035 
1036  if (Objs.empty())
1037  MayHaveConflictingAAMD = true;
1038  else
1039  for (Value *V : Objs) {
1040  // If this memory location comes from a known stack slot
1041  // that is not remapped, we continue checking.
1042  // Otherwise, we need to invalidate AA infomation.
1043  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1044  if (AI && MergedAllocas.count(AI)) {
1045  MayHaveConflictingAAMD = true;
1046  break;
1047  }
1048  }
1049  }
1050  }
1051  if (MayHaveConflictingAAMD) {
1052  NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
1053  ReplaceMemOps = true;
1054  }
1055  else
1056  NewMemOps[MemOpIdx++] = MMO;
1057  }
1058 
1059  // If any memory operand is updated, set memory references of
1060  // this instruction.
1061  if (ReplaceMemOps)
1062  I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
1063  }
1064 
1065  // Update the location of C++ catch objects for the MSVC personality routine.
1066  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1067  for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1068  for (WinEHHandlerType &H : TBME.HandlerArray)
1070  SlotRemap.count(H.CatchObj.FrameIndex))
1071  H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1072 
1073  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1074  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1075  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1076 }
1077 
1078 void StackColoring::removeInvalidSlotRanges() {
1079  for (MachineBasicBlock &BB : *MF)
1080  for (MachineInstr &I : BB) {
1081  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1082  I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1083  continue;
1084 
1085  // Some intervals are suspicious! In some cases we find address
1086  // calculations outside of the lifetime zone, but not actual memory
1087  // read or write. Memory accesses outside of the lifetime zone are a clear
1088  // violation, but address calculations are okay. This can happen when
1089  // GEPs are hoisted outside of the lifetime zone.
1090  // So, in here we only check instructions which can read or write memory.
1091  if (!I.mayLoad() && !I.mayStore())
1092  continue;
1093 
1094  // Check all of the machine operands.
1095  for (const MachineOperand &MO : I.operands()) {
1096  if (!MO.isFI())
1097  continue;
1098 
1099  int Slot = MO.getIndex();
1100 
1101  if (Slot<0)
1102  continue;
1103 
1104  if (Intervals[Slot]->empty())
1105  continue;
1106 
1107  // Check that the used slot is inside the calculated lifetime range.
1108  // If it is not, warn about it and invalidate the range.
1109  LiveInterval *Interval = &*Intervals[Slot];
1110  SlotIndex Index = Indexes->getInstructionIndex(I);
1111  if (Interval->find(Index) == Interval->end()) {
1112  Interval->clear();
1113  LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1114  EscapedAllocas++;
1115  }
1116  }
1117  }
1118 }
1119 
1120 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1121  unsigned NumSlots) {
1122  // Expunge slot remap map.
1123  for (unsigned i=0; i < NumSlots; ++i) {
1124  // If we are remapping i
1125  if (SlotRemap.count(i)) {
1126  int Target = SlotRemap[i];
1127  // As long as our target is mapped to something else, follow it.
1128  while (SlotRemap.count(Target)) {
1129  Target = SlotRemap[Target];
1130  SlotRemap[i] = Target;
1131  }
1132  }
1133  }
1134 }
1135 
1136 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1137  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1138  << "********** Function: " << Func.getName() << '\n');
1139  MF = &Func;
1140  MFI = &MF->getFrameInfo();
1141  Indexes = &getAnalysis<SlotIndexes>();
1142  SP = &getAnalysis<StackProtector>();
1143  BlockLiveness.clear();
1144  BasicBlocks.clear();
1145  BasicBlockNumbering.clear();
1146  Markers.clear();
1147  Intervals.clear();
1148  LiveStarts.clear();
1149  VNInfoAllocator.Reset();
1150 
1151  unsigned NumSlots = MFI->getObjectIndexEnd();
1152 
1153  // If there are no stack slots then there are no markers to remove.
1154  if (!NumSlots)
1155  return false;
1156 
1157  SmallVector<int, 8> SortedSlots;
1158  SortedSlots.reserve(NumSlots);
1159  Intervals.reserve(NumSlots);
1160  LiveStarts.resize(NumSlots);
1161 
1162  unsigned NumMarkers = collectMarkers(NumSlots);
1163 
1164  unsigned TotalSize = 0;
1165  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1166  << " slots\n");
1167  LLVM_DEBUG(dbgs() << "Slot structure:\n");
1168 
1169  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1170  LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1171  << " bytes.\n");
1172  TotalSize += MFI->getObjectSize(i);
1173  }
1174 
1175  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1176 
1177  // Don't continue because there are not enough lifetime markers, or the
1178  // stack is too small, or we are told not to optimize the slots.
1179  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1180  skipFunction(Func.getFunction())) {
1181  LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1182  return removeAllMarkers();
1183  }
1184 
1185  for (unsigned i=0; i < NumSlots; ++i) {
1186  std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1187  LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1188  Intervals.push_back(std::move(LI));
1189  SortedSlots.push_back(i);
1190  }
1191 
1192  // Calculate the liveness of each block.
1193  calculateLocalLiveness();
1194  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1195  LLVM_DEBUG(dump());
1196 
1197  // Propagate the liveness information.
1198  calculateLiveIntervals(NumSlots);
1199  LLVM_DEBUG(dumpIntervals());
1200 
1201  // Search for allocas which are used outside of the declared lifetime
1202  // markers.
1204  removeInvalidSlotRanges();
1205 
1206  // Maps old slots to new slots.
1207  DenseMap<int, int> SlotRemap;
1208  unsigned RemovedSlots = 0;
1209  unsigned ReducedSize = 0;
1210 
1211  // Do not bother looking at empty intervals.
1212  for (unsigned I = 0; I < NumSlots; ++I) {
1213  if (Intervals[SortedSlots[I]]->empty())
1214  SortedSlots[I] = -1;
1215  }
1216 
1217  // This is a simple greedy algorithm for merging allocas. First, sort the
1218  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1219  // for disjoint slots. When you find disjoint slots, merge the samller one
1220  // into the bigger one and update the live interval. Remove the small alloca
1221  // and continue.
1222 
1223  // Sort the slots according to their size. Place unused slots at the end.
1224  // Use stable sort to guarantee deterministic code generation.
1225  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1226  [this](int LHS, int RHS) {
1227  // We use -1 to denote a uninteresting slot. Place these slots at the end.
1228  if (LHS == -1) return false;
1229  if (RHS == -1) return true;
1230  // Sort according to size.
1231  return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1232  });
1233 
1234  for (auto &s : LiveStarts)
1235  llvm::sort(s.begin(), s.end());
1236 
1237  bool Changed = true;
1238  while (Changed) {
1239  Changed = false;
1240  for (unsigned I = 0; I < NumSlots; ++I) {
1241  if (SortedSlots[I] == -1)
1242  continue;
1243 
1244  for (unsigned J=I+1; J < NumSlots; ++J) {
1245  if (SortedSlots[J] == -1)
1246  continue;
1247 
1248  int FirstSlot = SortedSlots[I];
1249  int SecondSlot = SortedSlots[J];
1250  LiveInterval *First = &*Intervals[FirstSlot];
1251  LiveInterval *Second = &*Intervals[SecondSlot];
1252  auto &FirstS = LiveStarts[FirstSlot];
1253  auto &SecondS = LiveStarts[SecondSlot];
1254  assert(!First->empty() && !Second->empty() && "Found an empty range");
1255 
1256  // Merge disjoint slots. This is a little bit tricky - see the
1257  // Implementation Notes section for an explanation.
1258  if (!First->isLiveAtIndexes(SecondS) &&
1259  !Second->isLiveAtIndexes(FirstS)) {
1260  Changed = true;
1261  First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1262 
1263  int OldSize = FirstS.size();
1264  FirstS.append(SecondS.begin(), SecondS.end());
1265  auto Mid = FirstS.begin() + OldSize;
1266  std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1267 
1268  SlotRemap[SecondSlot] = FirstSlot;
1269  SortedSlots[J] = -1;
1270  LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1271  << SecondSlot << " together.\n");
1272  unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1273  MFI->getObjectAlignment(SecondSlot));
1274 
1275  assert(MFI->getObjectSize(FirstSlot) >=
1276  MFI->getObjectSize(SecondSlot) &&
1277  "Merging a small object into a larger one");
1278 
1279  RemovedSlots+=1;
1280  ReducedSize += MFI->getObjectSize(SecondSlot);
1281  MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1282  MFI->RemoveStackObject(SecondSlot);
1283  }
1284  }
1285  }
1286  }// While changed.
1287 
1288  // Record statistics.
1289  StackSpaceSaved += ReducedSize;
1290  StackSlotMerged += RemovedSlots;
1291  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1292  << ReducedSize << " bytes\n");
1293 
1294  // Scan the entire function and update all machine operands that use frame
1295  // indices to use the remapped frame index.
1296  expungeSlotMap(SlotRemap, NumSlots);
1297  remapInstructions(SlotRemap);
1298 
1299  return removeAllMarkers();
1300 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
bool empty() const
Definition: LiveInterval.h:370
void push_back(const T &Elt)
Definition: SmallVector.h:213
BitVector & set()
Definition: BitVector.h:398
iterator_range< use_iterator > uses()
Definition: Value.h:354
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:77
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static int getStartOrEndSlot(const MachineInstr &MI)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:37
Merge disjoint stack slots
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
This file contains the declarations for metadata subclasses.
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
STATISTIC(NumFunctions, "Total number of functions")
std::vector< MachineBasicBlock * >::const_iterator const_pred_iterator
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:377
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
VariableDbgInfoMapTy & getVariableDbgInfo()
Value * get() const
Definition: Use.h:108
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
A description of a memory reference used in the backend.
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:367
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it...
Definition: Allocator.h:194
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
union llvm::WinEHHandlerType::@173 CatchObj
The CatchObj starts out life as an LLVM alloca and is eventually turned frame index.
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
iterator end()
Definition: LiveInterval.h:212
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
SlotIndex getInstructionIndex(const MachineInstr &MI) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:414
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
void MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo)
Merge all of the live segments of a specific val# in RHS into this live range as the specified value ...
static void handleRAUW(Value *From, Value *To)
Definition: Metadata.cpp:392
SlotIndexes pass.
Definition: SlotIndexes.h:331
This class represents a no-op cast from one type to another.
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_END(StackColoring
int getObjectIndexEnd() const
Return one past the maximum frame object index.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:796
Local Stack Slot Allocation
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:487
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:439
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:397
static const unsigned End
void adjustForColoring(const AllocaInst *From, const AllocaInst *To)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
#define DEBUG_TYPE
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:497
R600 Clause Merge
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
bool isDebugInstr() const
Definition: MachineInstr.h:851
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
void initializeStackColoringPass(PassRegistry &)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition: Value.h:488
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Target - Wrapper for Target specific information.
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:305
Representation of each machine instruction.
Definition: MachineInstr.h:60
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:80
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:170
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
LLVM Value Representation.
Definition: Value.h:73
IRTranslator LLVM IR MI
void print(raw_ostream &os) const
Print this index to the given raw_ostream.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
an instruction to allocate memory on the stack
Definition: Instructions.h:60
void resize(size_type N)
Definition: SmallVector.h:352