LLVM  6.0.0svn
StackColoring.cpp
Go to the documentation of this file.
1 //===- StackColoring.cpp --------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements the stack-coloring optimization that looks for
11 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12 // which represent the possible lifetime of stack slots. It attempts to
13 // merge disjoint stack slots and reduce the used stack space.
14 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 //
16 // TODO: In the future we plan to improve stack coloring in the following ways:
17 // 1. Allow merging multiple small slots into a single larger slot at different
18 // offsets.
19 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20 // spill slots.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
39 #include "llvm/CodeGen/Passes.h"
45 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Use.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <limits>
61 #include <memory>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "stack-coloring"
67 
68 static cl::opt<bool>
69 DisableColoring("no-stack-coloring",
70  cl::init(false), cl::Hidden,
71  cl::desc("Disable stack coloring"));
72 
73 /// The user may write code that uses allocas outside of the declared lifetime
74 /// zone. This can happen when the user returns a reference to a local
75 /// data-structure. We can detect these cases and decide not to optimize the
76 /// code. If this flag is enabled, we try to save the user. This option
77 /// is treated as overriding LifetimeStartOnFirstUse below.
78 static cl::opt<bool>
79 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80  cl::init(false), cl::Hidden,
81  cl::desc("Do not optimize lifetime zones that "
82  "are broken"));
83 
84 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
85 /// use of stack slot as start of slot lifetime, as opposed to looking
86 /// for LIFETIME_START marker). See "Implementation notes" below for
87 /// more info.
88 static cl::opt<bool>
89 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90  cl::init(true), cl::Hidden,
91  cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92 
93 
94 STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98 
99 //===----------------------------------------------------------------------===//
100 // StackColoring Pass
101 //===----------------------------------------------------------------------===//
102 //
103 // Stack Coloring reduces stack usage by merging stack slots when they
104 // can't be used together. For example, consider the following C program:
105 //
106 // void bar(char *, int);
107 // void foo(bool var) {
108 // A: {
109 // char z[4096];
110 // bar(z, 0);
111 // }
112 //
113 // char *p;
114 // char x[4096];
115 // char y[4096];
116 // if (var) {
117 // p = x;
118 // } else {
119 // bar(y, 1);
120 // p = y + 1024;
121 // }
122 // B:
123 // bar(p, 2);
124 // }
125 //
126 // Naively-compiled, this program would use 12k of stack space. However, the
127 // stack slot corresponding to `z` is always destroyed before either of the
128 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
129 // is true, while `y` is only used if `var` is false. So in no time are 2
130 // of the stack slots used together, and therefore we can merge them,
131 // compiling the function using only a single 4k alloca:
132 //
133 // void foo(bool var) { // equivalent
134 // char x[4096];
135 // char *p;
136 // bar(x, 0);
137 // if (var) {
138 // p = x;
139 // } else {
140 // bar(x, 1);
141 // p = x + 1024;
142 // }
143 // bar(p, 2);
144 // }
145 //
146 // This is an important optimization if we want stack space to be under
147 // control in large functions, both open-coded ones and ones created by
148 // inlining.
149 //
150 // Implementation Notes:
151 // ---------------------
152 //
153 // An important part of the above reasoning is that `z` can't be accessed
154 // while the latter 2 calls to `bar` are running. This is justified because
155 // `z`'s lifetime is over after we exit from block `A:`, so any further
156 // accesses to it would be UB. The way we represent this information
157 // in LLVM is by having frontends delimit blocks with `lifetime.start`
158 // and `lifetime.end` intrinsics.
159 //
160 // The effect of these intrinsics seems to be as follows (maybe I should
161 // specify this in the reference?):
162 //
163 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164 // lifetime intrinsic refers to that stack slot, in which case
165 // it is marked as *in-scope*.
166 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167 // the stack slot is overwritten with `undef`.
168 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169 // L4) on function exit, all stack slots are marked as *out-of-scope*.
170 // L5) `lifetime.end` is a no-op when called on a slot that is already
171 // *out-of-scope*.
172 // L6) memory accesses to *out-of-scope* stack slots are UB.
173 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174 // are invalidated, unless the slot is "degenerate". This is used to
175 // justify not marking slots as in-use until the pointer to them is
176 // used, but feels a bit hacky in the presence of things like LICM. See
177 // the "Degenerate Slots" section for more details.
178 //
179 // Now, let's ground stack coloring on these rules. We'll define a slot
180 // as *in-use* at a (dynamic) point in execution if it either can be
181 // written to at that point, or if it has a live and non-undef content
182 // at that point.
183 //
184 // Obviously, slots that are never *in-use* together can be merged, and
185 // in our example `foo`, the slots for `x`, `y` and `z` are never
186 // in-use together (of course, sometimes slots that *are* in-use together
187 // might still be mergable, but we don't care about that here).
188 //
189 // In this implementation, we successively merge pairs of slots that are
190 // not *in-use* together. We could be smarter - for example, we could merge
191 // a single large slot with 2 small slots, or we could construct the
192 // interference graph and run a "smart" graph coloring algorithm, but with
193 // that aside, how do we find out whether a pair of slots might be *in-use*
194 // together?
195 //
196 // From our rules, we see that *out-of-scope* slots are never *in-use*,
197 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198 // until their address is taken. Therefore, we can approximate slot activity
199 // using dataflow.
200 //
201 // A subtle point: naively, we might try to figure out which pairs of
202 // stack-slots interfere by propagating `S in-use` through the CFG for every
203 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204 // which they are both *in-use*.
205 //
206 // That is sound, but overly conservative in some cases: in our (artificial)
207 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
208 // as `x` is only in use if we came in from the `var` edge and `y` only
209 // if we came from the `!var` edge, they still can't be in use together.
210 // See PR32488 for an important real-life case.
211 //
212 // If we wanted to find all points of interference precisely, we could
213 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214 // would be precise, but requires propagating `O(n^2)` dataflow facts.
215 //
216 // However, we aren't interested in the *set* of points of interference
217 // between 2 stack slots, only *whether* there *is* such a point. So we
218 // can rely on a little trick: for `S` and `T` to be in-use together,
219 // one of them needs to become in-use while the other is in-use (or
220 // they might both become in use simultaneously). We can check this
221 // by also keeping track of the points at which a stack slot might *start*
222 // being in-use.
223 //
224 // Exact first use:
225 // ----------------
226 //
227 // Consider the following motivating example:
228 //
229 // int foo() {
230 // char b1[1024], b2[1024];
231 // if (...) {
232 // char b3[1024];
233 // <uses of b1, b3>;
234 // return x;
235 // } else {
236 // char b4[1024], b5[1024];
237 // <uses of b2, b4, b5>;
238 // return y;
239 // }
240 // }
241 //
242 // In the code above, "b3" and "b4" are declared in distinct lexical
243 // scopes, meaning that it is easy to prove that they can share the
244 // same stack slot. Variables "b1" and "b2" are declared in the same
245 // scope, meaning that from a lexical point of view, their lifetimes
246 // overlap. From a control flow pointer of view, however, the two
247 // variables are accessed in disjoint regions of the CFG, thus it
248 // should be possible for them to share the same stack slot. An ideal
249 // stack allocation for the function above would look like:
250 //
251 // slot 0: b1, b2
252 // slot 1: b3, b4
253 // slot 2: b5
254 //
255 // Achieving this allocation is tricky, however, due to the way
256 // lifetime markers are inserted. Here is a simplified view of the
257 // control flow graph for the code above:
258 //
259 // +------ block 0 -------+
260 // 0| LIFETIME_START b1, b2 |
261 // 1| <test 'if' condition> |
262 // +-----------------------+
263 // ./ \.
264 // +------ block 1 -------+ +------ block 2 -------+
265 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268 // +-----------------------+ +-----------------------+
269 // \. /.
270 // +------ block 3 -------+
271 // 8| <cleanupcode> |
272 // 9| LIFETIME_END b1, b2 |
273 // 10| return |
274 // +-----------------------+
275 //
276 // If we create live intervals for the variables above strictly based
277 // on the lifetime markers, we'll get the set of intervals on the
278 // left. If we ignore the lifetime start markers and instead treat a
279 // variable's lifetime as beginning with the first reference to the
280 // var, then we get the intervals on the right.
281 //
282 // LIFETIME_START First Use
283 // b1: [0,9] [3,4] [8,9]
284 // b2: [0,9] [6,9]
285 // b3: [2,4] [3,4]
286 // b4: [5,7] [6,7]
287 // b5: [5,7] [6,7]
288 //
289 // For the intervals on the left, the best we can do is overlap two
290 // variables (b3 and b4, for example); this gives us a stack size of
291 // 4*1024 bytes, not ideal. When treating first-use as the start of a
292 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293 // byte stack (better).
294 //
295 // Degenerate Slots:
296 // -----------------
297 //
298 // Relying entirely on first-use of stack slots is problematic,
299 // however, due to the fact that optimizations can sometimes migrate
300 // uses of a variable outside of its lifetime start/end region. Here
301 // is an example:
302 //
303 // int bar() {
304 // char b1[1024], b2[1024];
305 // if (...) {
306 // <uses of b2>
307 // return y;
308 // } else {
309 // <uses of b1>
310 // while (...) {
311 // char b3[1024];
312 // <uses of b3>
313 // }
314 // }
315 // }
316 //
317 // Before optimization, the control flow graph for the code above
318 // might look like the following:
319 //
320 // +------ block 0 -------+
321 // 0| LIFETIME_START b1, b2 |
322 // 1| <test 'if' condition> |
323 // +-----------------------+
324 // ./ \.
325 // +------ block 1 -------+ +------- block 2 -------+
326 // 2| <uses of b2> | 3| <uses of b1> |
327 // +-----------------------+ +-----------------------+
328 // | |
329 // | +------- block 3 -------+ <-\.
330 // | 4| <while condition> | |
331 // | +-----------------------+ |
332 // | / | |
333 // | / +------- block 4 -------+
334 // \ / 5| LIFETIME_START b3 | |
335 // \ / 6| <uses of b3> | |
336 // \ / 7| LIFETIME_END b3 | |
337 // \ | +------------------------+ |
338 // \ | \ /
339 // +------ block 5 -----+ \---------------
340 // 8| <cleanupcode> |
341 // 9| LIFETIME_END b1, b2 |
342 // 10| return |
343 // +---------------------+
344 //
345 // During optimization, however, it can happen that an instruction
346 // computing an address in "b3" (for example, a loop-invariant GEP) is
347 // hoisted up out of the loop from block 4 to block 2. [Note that
348 // this is not an actual load from the stack, only an instruction that
349 // computes the address to be loaded]. If this happens, there is now a
350 // path leading from the first use of b3 to the return instruction
351 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352 // now larger than if we were computing live intervals strictly based
353 // on lifetime markers. In the example above, this lengthened lifetime
354 // would mean that it would appear illegal to overlap b3 with b2.
355 //
356 // To deal with this such cases, the code in ::collectMarkers() below
357 // tries to identify "degenerate" slots -- those slots where on a single
358 // forward pass through the CFG we encounter a first reference to slot
359 // K before we hit the slot K lifetime start marker. For such slots,
360 // we fall back on using the lifetime start marker as the beginning of
361 // the variable's lifetime. NB: with this implementation, slots can
362 // appear degenerate in cases where there is unstructured control flow:
363 //
364 // if (q) goto mid;
365 // if (x > 9) {
366 // int b[100];
367 // memcpy(&b[0], ...);
368 // mid: b[k] = ...;
369 // abc(&b);
370 // }
371 //
372 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373 // before visiting the memcpy block (which will contain the lifetime start
374 // for "b" then it will appear that 'b' has a degenerate lifetime.
375 //
376 
377 namespace {
378 
379 /// StackColoring - A machine pass for merging disjoint stack allocations,
380 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
381 class StackColoring : public MachineFunctionPass {
382  MachineFrameInfo *MFI;
383  MachineFunction *MF;
384 
385  /// A class representing liveness information for a single basic block.
386  /// Each bit in the BitVector represents the liveness property
387  /// for a different stack slot.
388  struct BlockLifetimeInfo {
389  /// Which slots BEGINs in each basic block.
390  BitVector Begin;
391 
392  /// Which slots ENDs in each basic block.
393  BitVector End;
394 
395  /// Which slots are marked as LIVE_IN, coming into each basic block.
396  BitVector LiveIn;
397 
398  /// Which slots are marked as LIVE_OUT, coming out of each basic block.
399  BitVector LiveOut;
400  };
401 
402  /// Maps active slots (per bit) for each basic block.
404  LivenessMap BlockLiveness;
405 
406  /// Maps serial numbers to basic blocks.
408 
409  /// Maps basic blocks to a serial number.
410  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
411 
412  /// Maps slots to their use interval. Outside of this interval, slots
413  /// values are either dead or `undef` and they will not be written to.
415 
416  /// Maps slots to the points where they can become in-use.
418 
419  /// VNInfo is used for the construction of LiveIntervals.
420  VNInfo::Allocator VNInfoAllocator;
421 
422  /// SlotIndex analysis object.
423  SlotIndexes *Indexes;
424 
425  /// The stack protector object.
427 
428  /// The list of lifetime markers found. These markers are to be removed
429  /// once the coloring is done.
431 
432  /// Record the FI slots for which we have seen some sort of
433  /// lifetime marker (either start or end).
434  BitVector InterestingSlots;
435 
436  /// FI slots that need to be handled conservatively (for these
437  /// slots lifetime-start-on-first-use is disabled).
438  BitVector ConservativeSlots;
439 
440  /// Number of iterations taken during data flow analysis.
441  unsigned NumIterations;
442 
443 public:
444  static char ID;
445 
446  StackColoring() : MachineFunctionPass(ID) {
448  }
449 
450  void getAnalysisUsage(AnalysisUsage &AU) const override;
451  bool runOnMachineFunction(MachineFunction &MF) override;
452 
453 private:
454  /// Used in collectMarkers
455  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
456 
457  /// Debug.
458  void dump() const;
459  void dumpIntervals() const;
460  void dumpBB(MachineBasicBlock *MBB) const;
461  void dumpBV(const char *tag, const BitVector &BV) const;
462 
463  /// Removes all of the lifetime marker instructions from the function.
464  /// \returns true if any markers were removed.
465  bool removeAllMarkers();
466 
467  /// Scan the machine function and find all of the lifetime markers.
468  /// Record the findings in the BEGIN and END vectors.
469  /// \returns the number of markers found.
470  unsigned collectMarkers(unsigned NumSlot);
471 
472  /// Perform the dataflow calculation and calculate the lifetime for each of
473  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
474  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
475  /// in and out blocks.
476  void calculateLocalLiveness();
477 
478  /// Returns TRUE if we're using the first-use-begins-lifetime method for
479  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
480  bool applyFirstUse(int Slot) {
482  return false;
483  if (ConservativeSlots.test(Slot))
484  return false;
485  return true;
486  }
487 
488  /// Examines the specified instruction and returns TRUE if the instruction
489  /// represents the start or end of an interesting lifetime. The slot or slots
490  /// starting or ending are added to the vector "slots" and "isStart" is set
491  /// accordingly.
492  /// \returns True if inst contains a lifetime start or end
493  bool isLifetimeStartOrEnd(const MachineInstr &MI,
495  bool &isStart);
496 
497  /// Construct the LiveIntervals for the slots.
498  void calculateLiveIntervals(unsigned NumSlots);
499 
500  /// Go over the machine function and change instructions which use stack
501  /// slots to use the joint slots.
502  void remapInstructions(DenseMap<int, int> &SlotRemap);
503 
504  /// The input program may contain instructions which are not inside lifetime
505  /// markers. This can happen due to a bug in the compiler or due to a bug in
506  /// user code (for example, returning a reference to a local variable).
507  /// This procedure checks all of the instructions in the function and
508  /// invalidates lifetime ranges which do not contain all of the instructions
509  /// which access that frame slot.
510  void removeInvalidSlotRanges();
511 
512  /// Map entries which point to other entries to their destination.
513  /// A->B->C becomes A->C.
514  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
515 };
516 
517 } // end anonymous namespace
518 
519 char StackColoring::ID = 0;
520 
522 
523 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
524  "Merge disjoint stack slots", false, false)
528  "Merge disjoint stack slots", false, false)
529 
530 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
531  AU.addRequired<SlotIndexes>();
532  AU.addRequired<StackProtector>();
534 }
535 
536 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
537 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
538  const BitVector &BV) const {
539  dbgs() << tag << " : { ";
540  for (unsigned I = 0, E = BV.size(); I != E; ++I)
541  dbgs() << BV.test(I) << " ";
542  dbgs() << "}\n";
543 }
544 
545 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
546  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
547  assert(BI != BlockLiveness.end() && "Block not found");
548  const BlockLifetimeInfo &BlockInfo = BI->second;
549 
550  dumpBV("BEGIN", BlockInfo.Begin);
551  dumpBV("END", BlockInfo.End);
552  dumpBV("LIVE_IN", BlockInfo.LiveIn);
553  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
554 }
555 
557  for (MachineBasicBlock *MBB : depth_first(MF)) {
558  dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
559  << MBB->getName() << "]\n";
560  dumpBB(MBB);
561  }
562 }
563 
564 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
565  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
566  dbgs() << "Interval[" << I << "]:\n";
567  Intervals[I]->dump();
568  }
569 }
570 #endif
571 
572 static inline int getStartOrEndSlot(const MachineInstr &MI)
573 {
576  "Expected LIFETIME_START or LIFETIME_END op");
577  const MachineOperand &MO = MI.getOperand(0);
578  int Slot = MO.getIndex();
579  if (Slot >= 0)
580  return Slot;
581  return -1;
582 }
583 
584 // At the moment the only way to end a variable lifetime is with
585 // a VARIABLE_LIFETIME op (which can't contain a start). If things
586 // change and the IR allows for a single inst that both begins
587 // and ends lifetime(s), this interface will need to be reworked.
588 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
590  bool &isStart) {
593  int Slot = getStartOrEndSlot(MI);
594  if (Slot < 0)
595  return false;
596  if (!InterestingSlots.test(Slot))
597  return false;
598  slots.push_back(Slot);
600  isStart = false;
601  return true;
602  }
603  if (! applyFirstUse(Slot)) {
604  isStart = true;
605  return true;
606  }
608  if (! MI.isDebugValue()) {
609  bool found = false;
610  for (const MachineOperand &MO : MI.operands()) {
611  if (!MO.isFI())
612  continue;
613  int Slot = MO.getIndex();
614  if (Slot<0)
615  continue;
616  if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
617  slots.push_back(Slot);
618  found = true;
619  }
620  }
621  if (found) {
622  isStart = true;
623  return true;
624  }
625  }
626  }
627  return false;
628 }
629 
630 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
631  unsigned MarkersFound = 0;
632  BlockBitVecMap SeenStartMap;
633  InterestingSlots.clear();
634  InterestingSlots.resize(NumSlot);
635  ConservativeSlots.clear();
636  ConservativeSlots.resize(NumSlot);
637 
638  // number of start and end lifetime ops for each slot
639  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
640  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
641 
642  // Step 1: collect markers and populate the "InterestingSlots"
643  // and "ConservativeSlots" sets.
644  for (MachineBasicBlock *MBB : depth_first(MF)) {
645  // Compute the set of slots for which we've seen a START marker but have
646  // not yet seen an END marker at this point in the walk (e.g. on entry
647  // to this bb).
648  BitVector BetweenStartEnd;
649  BetweenStartEnd.resize(NumSlot);
651  PE = MBB->pred_end(); PI != PE; ++PI) {
652  BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
653  if (I != SeenStartMap.end()) {
654  BetweenStartEnd |= I->second;
655  }
656  }
657 
658  // Walk the instructions in the block to look for start/end ops.
659  for (MachineInstr &MI : *MBB) {
662  int Slot = getStartOrEndSlot(MI);
663  if (Slot < 0)
664  continue;
665  InterestingSlots.set(Slot);
667  BetweenStartEnd.set(Slot);
668  NumStartLifetimes[Slot] += 1;
669  } else {
670  BetweenStartEnd.reset(Slot);
671  NumEndLifetimes[Slot] += 1;
672  }
673  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
674  if (Allocation) {
675  DEBUG(dbgs() << "Found a lifetime ");
677  ? "start"
678  : "end"));
679  DEBUG(dbgs() << " marker for slot #" << Slot);
680  DEBUG(dbgs() << " with allocation: " << Allocation->getName()
681  << "\n");
682  }
683  Markers.push_back(&MI);
684  MarkersFound += 1;
685  } else {
686  for (const MachineOperand &MO : MI.operands()) {
687  if (!MO.isFI())
688  continue;
689  int Slot = MO.getIndex();
690  if (Slot < 0)
691  continue;
692  if (! BetweenStartEnd.test(Slot)) {
693  ConservativeSlots.set(Slot);
694  }
695  }
696  }
697  }
698  BitVector &SeenStart = SeenStartMap[MBB];
699  SeenStart |= BetweenStartEnd;
700  }
701  if (!MarkersFound) {
702  return 0;
703  }
704 
705  // PR27903: slots with multiple start or end lifetime ops are not
706  // safe to enable for "lifetime-start-on-first-use".
707  for (unsigned slot = 0; slot < NumSlot; ++slot)
708  if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
709  ConservativeSlots.set(slot);
710  DEBUG(dumpBV("Conservative slots", ConservativeSlots));
711 
712  // Step 2: compute begin/end sets for each block
713 
714  // NOTE: We use a depth-first iteration to ensure that we obtain a
715  // deterministic numbering.
716  for (MachineBasicBlock *MBB : depth_first(MF)) {
717  // Assign a serial number to this basic block.
718  BasicBlocks[MBB] = BasicBlockNumbering.size();
719  BasicBlockNumbering.push_back(MBB);
720 
721  // Keep a reference to avoid repeated lookups.
722  BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
723 
724  BlockInfo.Begin.resize(NumSlot);
725  BlockInfo.End.resize(NumSlot);
726 
728  for (MachineInstr &MI : *MBB) {
729  bool isStart = false;
730  slots.clear();
731  if (isLifetimeStartOrEnd(MI, slots, isStart)) {
732  if (!isStart) {
733  assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
734  int Slot = slots[0];
735  if (BlockInfo.Begin.test(Slot)) {
736  BlockInfo.Begin.reset(Slot);
737  }
738  BlockInfo.End.set(Slot);
739  } else {
740  for (auto Slot : slots) {
741  DEBUG(dbgs() << "Found a use of slot #" << Slot);
742  DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
743  DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
744  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
745  if (Allocation) {
746  DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
747  }
748  DEBUG(dbgs() << "\n");
749  if (BlockInfo.End.test(Slot)) {
750  BlockInfo.End.reset(Slot);
751  }
752  BlockInfo.Begin.set(Slot);
753  }
754  }
755  }
756  }
757  }
758 
759  // Update statistics.
760  NumMarkerSeen += MarkersFound;
761  return MarkersFound;
762 }
763 
764 void StackColoring::calculateLocalLiveness() {
765  unsigned NumIters = 0;
766  bool changed = true;
767  while (changed) {
768  changed = false;
769  ++NumIters;
770 
771  for (const MachineBasicBlock *BB : BasicBlockNumbering) {
772  // Use an iterator to avoid repeated lookups.
773  LivenessMap::iterator BI = BlockLiveness.find(BB);
774  assert(BI != BlockLiveness.end() && "Block not found");
775  BlockLifetimeInfo &BlockInfo = BI->second;
776 
777  // Compute LiveIn by unioning together the LiveOut sets of all preds.
778  BitVector LocalLiveIn;
779  for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
780  PE = BB->pred_end(); PI != PE; ++PI) {
781  LivenessMap::const_iterator I = BlockLiveness.find(*PI);
782  assert(I != BlockLiveness.end() && "Predecessor not found");
783  LocalLiveIn |= I->second.LiveOut;
784  }
785 
786  // Compute LiveOut by subtracting out lifetimes that end in this
787  // block, then adding in lifetimes that begin in this block. If
788  // we have both BEGIN and END markers in the same basic block
789  // then we know that the BEGIN marker comes after the END,
790  // because we already handle the case where the BEGIN comes
791  // before the END when collecting the markers (and building the
792  // BEGIN/END vectors).
793  BitVector LocalLiveOut = LocalLiveIn;
794  LocalLiveOut.reset(BlockInfo.End);
795  LocalLiveOut |= BlockInfo.Begin;
796 
797  // Update block LiveIn set, noting whether it has changed.
798  if (LocalLiveIn.test(BlockInfo.LiveIn)) {
799  changed = true;
800  BlockInfo.LiveIn |= LocalLiveIn;
801  }
802 
803  // Update block LiveOut set, noting whether it has changed.
804  if (LocalLiveOut.test(BlockInfo.LiveOut)) {
805  changed = true;
806  BlockInfo.LiveOut |= LocalLiveOut;
807  }
808  }
809  } // while changed.
810 
811  NumIterations = NumIters;
812 }
813 
814 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
816  SmallVector<bool, 16> DefinitelyInUse;
817 
818  // For each block, find which slots are active within this block
819  // and update the live intervals.
820  for (const MachineBasicBlock &MBB : *MF) {
821  Starts.clear();
822  Starts.resize(NumSlots);
823  DefinitelyInUse.clear();
824  DefinitelyInUse.resize(NumSlots);
825 
826  // Start the interval of the slots that we previously found to be 'in-use'.
827  BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
828  for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
829  pos = MBBLiveness.LiveIn.find_next(pos)) {
830  Starts[pos] = Indexes->getMBBStartIdx(&MBB);
831  }
832 
833  // Create the interval for the basic blocks containing lifetime begin/end.
834  for (const MachineInstr &MI : MBB) {
836  bool IsStart = false;
837  if (!isLifetimeStartOrEnd(MI, slots, IsStart))
838  continue;
839  SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
840  for (auto Slot : slots) {
841  if (IsStart) {
842  // If a slot is already definitely in use, we don't have to emit
843  // a new start marker because there is already a pre-existing
844  // one.
845  if (!DefinitelyInUse[Slot]) {
846  LiveStarts[Slot].push_back(ThisIndex);
847  DefinitelyInUse[Slot] = true;
848  }
849  if (!Starts[Slot].isValid())
850  Starts[Slot] = ThisIndex;
851  } else {
852  if (Starts[Slot].isValid()) {
853  VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
854  Intervals[Slot]->addSegment(
855  LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
856  Starts[Slot] = SlotIndex(); // Invalidate the start index
857  DefinitelyInUse[Slot] = false;
858  }
859  }
860  }
861  }
862 
863  // Finish up started segments
864  for (unsigned i = 0; i < NumSlots; ++i) {
865  if (!Starts[i].isValid())
866  continue;
867 
868  SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
869  VNInfo *VNI = Intervals[i]->getValNumInfo(0);
870  Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
871  }
872  }
873 }
874 
875 bool StackColoring::removeAllMarkers() {
876  unsigned Count = 0;
877  for (MachineInstr *MI : Markers) {
878  MI->eraseFromParent();
879  Count++;
880  }
881  Markers.clear();
882 
883  DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
884  return Count;
885 }
886 
887 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
888  unsigned FixedInstr = 0;
889  unsigned FixedMemOp = 0;
890  unsigned FixedDbg = 0;
891 
892  // Remap debug information that refers to stack slots.
893  for (auto &VI : MF->getVariableDbgInfo()) {
894  if (!VI.Var)
895  continue;
896  if (SlotRemap.count(VI.Slot)) {
897  DEBUG(dbgs() << "Remapping debug info for ["
898  << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
899  VI.Slot = SlotRemap[VI.Slot];
900  FixedDbg++;
901  }
902  }
903 
904  // Keep a list of *allocas* which need to be remapped.
906 
907  // Keep a list of allocas which has been affected by the remap.
909 
910  for (const std::pair<int, int> &SI : SlotRemap) {
911  const AllocaInst *From = MFI->getObjectAllocation(SI.first);
912  const AllocaInst *To = MFI->getObjectAllocation(SI.second);
913  assert(To && From && "Invalid allocation object");
914  Allocas[From] = To;
915 
916  // AA might be used later for instruction scheduling, and we need it to be
917  // able to deduce the correct aliasing releationships between pointers
918  // derived from the alloca being remapped and the target of that remapping.
919  // The only safe way, without directly informing AA about the remapping
920  // somehow, is to directly update the IR to reflect the change being made
921  // here.
922  Instruction *Inst = const_cast<AllocaInst *>(To);
923  if (From->getType() != To->getType()) {
924  BitCastInst *Cast = new BitCastInst(Inst, From->getType());
925  Cast->insertAfter(Inst);
926  Inst = Cast;
927  }
928 
929  // We keep both slots to maintain AliasAnalysis metadata later.
930  MergedAllocas.insert(From);
931  MergedAllocas.insert(To);
932 
933  // Allow the stack protector to adjust its value map to account for the
934  // upcoming replacement.
935  SP->adjustForColoring(From, To);
936 
937  // The new alloca might not be valid in a llvm.dbg.declare for this
938  // variable, so undef out the use to make the verifier happy.
939  AllocaInst *FromAI = const_cast<AllocaInst *>(From);
940  if (FromAI->isUsedByMetadata())
942  for (auto &Use : FromAI->uses()) {
943  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
944  if (BCI->isUsedByMetadata())
945  ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
946  }
947 
948  // Note that this will not replace uses in MMOs (which we'll update below),
949  // or anywhere else (which is why we won't delete the original
950  // instruction).
951  FromAI->replaceAllUsesWith(Inst);
952  }
953 
954  // Remap all instructions to the new stack slots.
955  for (MachineBasicBlock &BB : *MF)
956  for (MachineInstr &I : BB) {
957  // Skip lifetime markers. We'll remove them soon.
958  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
959  I.getOpcode() == TargetOpcode::LIFETIME_END)
960  continue;
961 
962  // Update the MachineMemOperand to use the new alloca.
963  for (MachineMemOperand *MMO : I.memoperands()) {
964  // We've replaced IR-level uses of the remapped allocas, so we only
965  // need to replace direct uses here.
966  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
967  if (!AI)
968  continue;
969 
970  if (!Allocas.count(AI))
971  continue;
972 
973  MMO->setValue(Allocas[AI]);
974  FixedMemOp++;
975  }
976 
977  // Update all of the machine instruction operands.
978  for (MachineOperand &MO : I.operands()) {
979  if (!MO.isFI())
980  continue;
981  int FromSlot = MO.getIndex();
982 
983  // Don't touch arguments.
984  if (FromSlot<0)
985  continue;
986 
987  // Only look at mapped slots.
988  if (!SlotRemap.count(FromSlot))
989  continue;
990 
991  // In a debug build, check that the instruction that we are modifying is
992  // inside the expected live range. If the instruction is not inside
993  // the calculated range then it means that the alloca usage moved
994  // outside of the lifetime markers, or that the user has a bug.
995  // NOTE: Alloca address calculations which happen outside the lifetime
996  // zone are are okay, despite the fact that we don't have a good way
997  // for validating all of the usages of the calculation.
998 #ifndef NDEBUG
999  bool TouchesMemory = I.mayLoad() || I.mayStore();
1000  // If we *don't* protect the user from escaped allocas, don't bother
1001  // validating the instructions.
1002  if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
1003  SlotIndex Index = Indexes->getInstructionIndex(I);
1004  const LiveInterval *Interval = &*Intervals[FromSlot];
1005  assert(Interval->find(Index) != Interval->end() &&
1006  "Found instruction usage outside of live range.");
1007  }
1008 #endif
1009 
1010  // Fix the machine instructions.
1011  int ToSlot = SlotRemap[FromSlot];
1012  MO.setIndex(ToSlot);
1013  FixedInstr++;
1014  }
1015 
1016  // We adjust AliasAnalysis information for merged stack slots.
1017  MachineSDNode::mmo_iterator NewMemOps =
1018  MF->allocateMemRefsArray(I.getNumMemOperands());
1019  unsigned MemOpIdx = 0;
1020  bool ReplaceMemOps = false;
1021  for (MachineMemOperand *MMO : I.memoperands()) {
1022  // If this memory location can be a slot remapped here,
1023  // we remove AA information.
1024  bool MayHaveConflictingAAMD = false;
1025  if (MMO->getAAInfo()) {
1026  if (const Value *MMOV = MMO->getValue()) {
1028  getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1029 
1030  if (Objs.empty())
1031  MayHaveConflictingAAMD = true;
1032  else
1033  for (Value *V : Objs) {
1034  // If this memory location comes from a known stack slot
1035  // that is not remapped, we continue checking.
1036  // Otherwise, we need to invalidate AA infomation.
1037  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1038  if (AI && MergedAllocas.count(AI)) {
1039  MayHaveConflictingAAMD = true;
1040  break;
1041  }
1042  }
1043  }
1044  }
1045  if (MayHaveConflictingAAMD) {
1046  NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
1047  ReplaceMemOps = true;
1048  }
1049  else
1050  NewMemOps[MemOpIdx++] = MMO;
1051  }
1052 
1053  // If any memory operand is updated, set memory references of
1054  // this instruction.
1055  if (ReplaceMemOps)
1056  I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
1057  }
1058 
1059  // Update the location of C++ catch objects for the MSVC personality routine.
1060  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1061  for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1062  for (WinEHHandlerType &H : TBME.HandlerArray)
1064  SlotRemap.count(H.CatchObj.FrameIndex))
1065  H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1066 
1067  DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
1068  DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
1069  DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
1070 }
1071 
1072 void StackColoring::removeInvalidSlotRanges() {
1073  for (MachineBasicBlock &BB : *MF)
1074  for (MachineInstr &I : BB) {
1075  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1076  I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue())
1077  continue;
1078 
1079  // Some intervals are suspicious! In some cases we find address
1080  // calculations outside of the lifetime zone, but not actual memory
1081  // read or write. Memory accesses outside of the lifetime zone are a clear
1082  // violation, but address calculations are okay. This can happen when
1083  // GEPs are hoisted outside of the lifetime zone.
1084  // So, in here we only check instructions which can read or write memory.
1085  if (!I.mayLoad() && !I.mayStore())
1086  continue;
1087 
1088  // Check all of the machine operands.
1089  for (const MachineOperand &MO : I.operands()) {
1090  if (!MO.isFI())
1091  continue;
1092 
1093  int Slot = MO.getIndex();
1094 
1095  if (Slot<0)
1096  continue;
1097 
1098  if (Intervals[Slot]->empty())
1099  continue;
1100 
1101  // Check that the used slot is inside the calculated lifetime range.
1102  // If it is not, warn about it and invalidate the range.
1103  LiveInterval *Interval = &*Intervals[Slot];
1104  SlotIndex Index = Indexes->getInstructionIndex(I);
1105  if (Interval->find(Index) == Interval->end()) {
1106  Interval->clear();
1107  DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
1108  EscapedAllocas++;
1109  }
1110  }
1111  }
1112 }
1113 
1114 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1115  unsigned NumSlots) {
1116  // Expunge slot remap map.
1117  for (unsigned i=0; i < NumSlots; ++i) {
1118  // If we are remapping i
1119  if (SlotRemap.count(i)) {
1120  int Target = SlotRemap[i];
1121  // As long as our target is mapped to something else, follow it.
1122  while (SlotRemap.count(Target)) {
1123  Target = SlotRemap[Target];
1124  SlotRemap[i] = Target;
1125  }
1126  }
1127  }
1128 }
1129 
1130 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1131  DEBUG(dbgs() << "********** Stack Coloring **********\n"
1132  << "********** Function: "
1133  << ((const Value*)Func.getFunction())->getName() << '\n');
1134  MF = &Func;
1135  MFI = &MF->getFrameInfo();
1136  Indexes = &getAnalysis<SlotIndexes>();
1137  SP = &getAnalysis<StackProtector>();
1138  BlockLiveness.clear();
1139  BasicBlocks.clear();
1140  BasicBlockNumbering.clear();
1141  Markers.clear();
1142  Intervals.clear();
1143  LiveStarts.clear();
1144  VNInfoAllocator.Reset();
1145 
1146  unsigned NumSlots = MFI->getObjectIndexEnd();
1147 
1148  // If there are no stack slots then there are no markers to remove.
1149  if (!NumSlots)
1150  return false;
1151 
1152  SmallVector<int, 8> SortedSlots;
1153  SortedSlots.reserve(NumSlots);
1154  Intervals.reserve(NumSlots);
1155  LiveStarts.resize(NumSlots);
1156 
1157  unsigned NumMarkers = collectMarkers(NumSlots);
1158 
1159  unsigned TotalSize = 0;
1160  DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
1161  DEBUG(dbgs()<<"Slot structure:\n");
1162 
1163  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1164  DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
1165  TotalSize += MFI->getObjectSize(i);
1166  }
1167 
1168  DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
1169 
1170  // Don't continue because there are not enough lifetime markers, or the
1171  // stack is too small, or we are told not to optimize the slots.
1172  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1173  skipFunction(*Func.getFunction())) {
1174  DEBUG(dbgs()<<"Will not try to merge slots.\n");
1175  return removeAllMarkers();
1176  }
1177 
1178  for (unsigned i=0; i < NumSlots; ++i) {
1179  std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1180  LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1181  Intervals.push_back(std::move(LI));
1182  SortedSlots.push_back(i);
1183  }
1184 
1185  // Calculate the liveness of each block.
1186  calculateLocalLiveness();
1187  DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1188  DEBUG(dump());
1189 
1190  // Propagate the liveness information.
1191  calculateLiveIntervals(NumSlots);
1192  DEBUG(dumpIntervals());
1193 
1194  // Search for allocas which are used outside of the declared lifetime
1195  // markers.
1197  removeInvalidSlotRanges();
1198 
1199  // Maps old slots to new slots.
1200  DenseMap<int, int> SlotRemap;
1201  unsigned RemovedSlots = 0;
1202  unsigned ReducedSize = 0;
1203 
1204  // Do not bother looking at empty intervals.
1205  for (unsigned I = 0; I < NumSlots; ++I) {
1206  if (Intervals[SortedSlots[I]]->empty())
1207  SortedSlots[I] = -1;
1208  }
1209 
1210  // This is a simple greedy algorithm for merging allocas. First, sort the
1211  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1212  // for disjoint slots. When you find disjoint slots, merge the samller one
1213  // into the bigger one and update the live interval. Remove the small alloca
1214  // and continue.
1215 
1216  // Sort the slots according to their size. Place unused slots at the end.
1217  // Use stable sort to guarantee deterministic code generation.
1218  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1219  [this](int LHS, int RHS) {
1220  // We use -1 to denote a uninteresting slot. Place these slots at the end.
1221  if (LHS == -1) return false;
1222  if (RHS == -1) return true;
1223  // Sort according to size.
1224  return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1225  });
1226 
1227  for (auto &s : LiveStarts)
1228  std::sort(s.begin(), s.end());
1229 
1230  bool Changed = true;
1231  while (Changed) {
1232  Changed = false;
1233  for (unsigned I = 0; I < NumSlots; ++I) {
1234  if (SortedSlots[I] == -1)
1235  continue;
1236 
1237  for (unsigned J=I+1; J < NumSlots; ++J) {
1238  if (SortedSlots[J] == -1)
1239  continue;
1240 
1241  int FirstSlot = SortedSlots[I];
1242  int SecondSlot = SortedSlots[J];
1243  LiveInterval *First = &*Intervals[FirstSlot];
1244  LiveInterval *Second = &*Intervals[SecondSlot];
1245  auto &FirstS = LiveStarts[FirstSlot];
1246  auto &SecondS = LiveStarts[SecondSlot];
1247  assert(!First->empty() && !Second->empty() && "Found an empty range");
1248 
1249  // Merge disjoint slots. This is a little bit tricky - see the
1250  // Implementation Notes section for an explanation.
1251  if (!First->isLiveAtIndexes(SecondS) &&
1252  !Second->isLiveAtIndexes(FirstS)) {
1253  Changed = true;
1254  First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1255 
1256  int OldSize = FirstS.size();
1257  FirstS.append(SecondS.begin(), SecondS.end());
1258  auto Mid = FirstS.begin() + OldSize;
1259  std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1260 
1261  SlotRemap[SecondSlot] = FirstSlot;
1262  SortedSlots[J] = -1;
1263  DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
1264  SecondSlot<<" together.\n");
1265  unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1266  MFI->getObjectAlignment(SecondSlot));
1267 
1268  assert(MFI->getObjectSize(FirstSlot) >=
1269  MFI->getObjectSize(SecondSlot) &&
1270  "Merging a small object into a larger one");
1271 
1272  RemovedSlots+=1;
1273  ReducedSize += MFI->getObjectSize(SecondSlot);
1274  MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1275  MFI->RemoveStackObject(SecondSlot);
1276  }
1277  }
1278  }
1279  }// While changed.
1280 
1281  // Record statistics.
1282  StackSpaceSaved += ReducedSize;
1283  StackSlotMerged += RemovedSlots;
1284  DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
1285  ReducedSize<<" bytes\n");
1286 
1287  // Scan the entire function and update all machine operands that use frame
1288  // indices to use the remapped frame index.
1289  expungeSlotMap(SlotRemap, NumSlots);
1290  remapInstructions(SlotRemap);
1291 
1292  return removeAllMarkers();
1293 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
bool empty() const
Definition: LiveInterval.h:370
union llvm::WinEHHandlerType::@168 CatchObj
The CatchObj starts out life as an LLVM alloca and is eventually turned frame index.
void push_back(const T &Elt)
Definition: SmallVector.h:212
BitVector & set()
Definition: BitVector.h:398
iterator_range< use_iterator > uses()
Definition: Value.h:356
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:77
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static int getStartOrEndSlot(const MachineInstr &MI)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:37
Merge disjoint stack slots
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
This file contains the declarations for metadata subclasses.
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
STATISTIC(NumFunctions, "Total number of functions")
std::vector< MachineBasicBlock * >::const_iterator const_pred_iterator
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:380
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
VariableDbgInfoMapTy & getVariableDbgInfo()
Value * get() const
Definition: Use.h:108
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
A description of a memory reference used in the backend.
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:367
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it...
Definition: Allocator.h:192
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
iterator end()
Definition: LiveInterval.h:212
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static StringRef getName(Value *V)
SlotIndex getInstructionIndex(const MachineInstr &MI) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:414
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
void MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo)
Merge all of the live segments of a specific val# in RHS into this live range as the specified value ...
static void handleRAUW(Value *From, Value *To)
Definition: Metadata.cpp:384
SlotIndexes pass.
Definition: SlotIndexes.h:331
This class represents a no-op cast from one type to another.
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_END(StackColoring
int getObjectIndexEnd() const
Return one past the maximum frame object index.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:801
Local Stack Slot Allocation
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:138
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:487
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:439
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:397
static const unsigned End
void adjustForColoring(const AllocaInst *From, const AllocaInst *To)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
#define DEBUG_TYPE
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:497
R600 Clause Merge
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isDebugValue() const
Definition: MachineInstr.h:816
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
void initializeStackColoringPass(PassRegistry &)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition: Value.h:490
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Target - Wrapper for Target specific information.
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:305
Representation of each machine instruction.
Definition: MachineInstr.h:59
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:81
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:170
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
LLVM Value Representation.
Definition: Value.h:73
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
void print(raw_ostream &os) const
Print this index to the given raw_ostream.
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
an instruction to allocate memory on the stack
Definition: Instructions.h:60
void resize(size_type N)
Definition: SmallVector.h:355