LLVM  8.0.0svn
StackColoring.cpp
Go to the documentation of this file.
1 //===- StackColoring.cpp --------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements the stack-coloring optimization that looks for
11 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12 // which represent the possible lifetime of stack slots. It attempts to
13 // merge disjoint stack slots and reduce the used stack space.
14 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 //
16 // TODO: In the future we plan to improve stack coloring in the following ways:
17 // 1. Allow merging multiple small slots into a single larger slot at different
18 // offsets.
19 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20 // spill slots.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
39 #include "llvm/CodeGen/Passes.h"
44 #include "llvm/Config/llvm-config.h"
45 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Use.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <limits>
61 #include <memory>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "stack-coloring"
67 
68 static cl::opt<bool>
69 DisableColoring("no-stack-coloring",
70  cl::init(false), cl::Hidden,
71  cl::desc("Disable stack coloring"));
72 
73 /// The user may write code that uses allocas outside of the declared lifetime
74 /// zone. This can happen when the user returns a reference to a local
75 /// data-structure. We can detect these cases and decide not to optimize the
76 /// code. If this flag is enabled, we try to save the user. This option
77 /// is treated as overriding LifetimeStartOnFirstUse below.
78 static cl::opt<bool>
79 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80  cl::init(false), cl::Hidden,
81  cl::desc("Do not optimize lifetime zones that "
82  "are broken"));
83 
84 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
85 /// use of stack slot as start of slot lifetime, as opposed to looking
86 /// for LIFETIME_START marker). See "Implementation notes" below for
87 /// more info.
88 static cl::opt<bool>
89 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90  cl::init(true), cl::Hidden,
91  cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92 
93 
94 STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98 
99 //===----------------------------------------------------------------------===//
100 // StackColoring Pass
101 //===----------------------------------------------------------------------===//
102 //
103 // Stack Coloring reduces stack usage by merging stack slots when they
104 // can't be used together. For example, consider the following C program:
105 //
106 // void bar(char *, int);
107 // void foo(bool var) {
108 // A: {
109 // char z[4096];
110 // bar(z, 0);
111 // }
112 //
113 // char *p;
114 // char x[4096];
115 // char y[4096];
116 // if (var) {
117 // p = x;
118 // } else {
119 // bar(y, 1);
120 // p = y + 1024;
121 // }
122 // B:
123 // bar(p, 2);
124 // }
125 //
126 // Naively-compiled, this program would use 12k of stack space. However, the
127 // stack slot corresponding to `z` is always destroyed before either of the
128 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
129 // is true, while `y` is only used if `var` is false. So in no time are 2
130 // of the stack slots used together, and therefore we can merge them,
131 // compiling the function using only a single 4k alloca:
132 //
133 // void foo(bool var) { // equivalent
134 // char x[4096];
135 // char *p;
136 // bar(x, 0);
137 // if (var) {
138 // p = x;
139 // } else {
140 // bar(x, 1);
141 // p = x + 1024;
142 // }
143 // bar(p, 2);
144 // }
145 //
146 // This is an important optimization if we want stack space to be under
147 // control in large functions, both open-coded ones and ones created by
148 // inlining.
149 //
150 // Implementation Notes:
151 // ---------------------
152 //
153 // An important part of the above reasoning is that `z` can't be accessed
154 // while the latter 2 calls to `bar` are running. This is justified because
155 // `z`'s lifetime is over after we exit from block `A:`, so any further
156 // accesses to it would be UB. The way we represent this information
157 // in LLVM is by having frontends delimit blocks with `lifetime.start`
158 // and `lifetime.end` intrinsics.
159 //
160 // The effect of these intrinsics seems to be as follows (maybe I should
161 // specify this in the reference?):
162 //
163 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164 // lifetime intrinsic refers to that stack slot, in which case
165 // it is marked as *in-scope*.
166 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167 // the stack slot is overwritten with `undef`.
168 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169 // L4) on function exit, all stack slots are marked as *out-of-scope*.
170 // L5) `lifetime.end` is a no-op when called on a slot that is already
171 // *out-of-scope*.
172 // L6) memory accesses to *out-of-scope* stack slots are UB.
173 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174 // are invalidated, unless the slot is "degenerate". This is used to
175 // justify not marking slots as in-use until the pointer to them is
176 // used, but feels a bit hacky in the presence of things like LICM. See
177 // the "Degenerate Slots" section for more details.
178 //
179 // Now, let's ground stack coloring on these rules. We'll define a slot
180 // as *in-use* at a (dynamic) point in execution if it either can be
181 // written to at that point, or if it has a live and non-undef content
182 // at that point.
183 //
184 // Obviously, slots that are never *in-use* together can be merged, and
185 // in our example `foo`, the slots for `x`, `y` and `z` are never
186 // in-use together (of course, sometimes slots that *are* in-use together
187 // might still be mergable, but we don't care about that here).
188 //
189 // In this implementation, we successively merge pairs of slots that are
190 // not *in-use* together. We could be smarter - for example, we could merge
191 // a single large slot with 2 small slots, or we could construct the
192 // interference graph and run a "smart" graph coloring algorithm, but with
193 // that aside, how do we find out whether a pair of slots might be *in-use*
194 // together?
195 //
196 // From our rules, we see that *out-of-scope* slots are never *in-use*,
197 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198 // until their address is taken. Therefore, we can approximate slot activity
199 // using dataflow.
200 //
201 // A subtle point: naively, we might try to figure out which pairs of
202 // stack-slots interfere by propagating `S in-use` through the CFG for every
203 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204 // which they are both *in-use*.
205 //
206 // That is sound, but overly conservative in some cases: in our (artificial)
207 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
208 // as `x` is only in use if we came in from the `var` edge and `y` only
209 // if we came from the `!var` edge, they still can't be in use together.
210 // See PR32488 for an important real-life case.
211 //
212 // If we wanted to find all points of interference precisely, we could
213 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214 // would be precise, but requires propagating `O(n^2)` dataflow facts.
215 //
216 // However, we aren't interested in the *set* of points of interference
217 // between 2 stack slots, only *whether* there *is* such a point. So we
218 // can rely on a little trick: for `S` and `T` to be in-use together,
219 // one of them needs to become in-use while the other is in-use (or
220 // they might both become in use simultaneously). We can check this
221 // by also keeping track of the points at which a stack slot might *start*
222 // being in-use.
223 //
224 // Exact first use:
225 // ----------------
226 //
227 // Consider the following motivating example:
228 //
229 // int foo() {
230 // char b1[1024], b2[1024];
231 // if (...) {
232 // char b3[1024];
233 // <uses of b1, b3>;
234 // return x;
235 // } else {
236 // char b4[1024], b5[1024];
237 // <uses of b2, b4, b5>;
238 // return y;
239 // }
240 // }
241 //
242 // In the code above, "b3" and "b4" are declared in distinct lexical
243 // scopes, meaning that it is easy to prove that they can share the
244 // same stack slot. Variables "b1" and "b2" are declared in the same
245 // scope, meaning that from a lexical point of view, their lifetimes
246 // overlap. From a control flow pointer of view, however, the two
247 // variables are accessed in disjoint regions of the CFG, thus it
248 // should be possible for them to share the same stack slot. An ideal
249 // stack allocation for the function above would look like:
250 //
251 // slot 0: b1, b2
252 // slot 1: b3, b4
253 // slot 2: b5
254 //
255 // Achieving this allocation is tricky, however, due to the way
256 // lifetime markers are inserted. Here is a simplified view of the
257 // control flow graph for the code above:
258 //
259 // +------ block 0 -------+
260 // 0| LIFETIME_START b1, b2 |
261 // 1| <test 'if' condition> |
262 // +-----------------------+
263 // ./ \.
264 // +------ block 1 -------+ +------ block 2 -------+
265 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268 // +-----------------------+ +-----------------------+
269 // \. /.
270 // +------ block 3 -------+
271 // 8| <cleanupcode> |
272 // 9| LIFETIME_END b1, b2 |
273 // 10| return |
274 // +-----------------------+
275 //
276 // If we create live intervals for the variables above strictly based
277 // on the lifetime markers, we'll get the set of intervals on the
278 // left. If we ignore the lifetime start markers and instead treat a
279 // variable's lifetime as beginning with the first reference to the
280 // var, then we get the intervals on the right.
281 //
282 // LIFETIME_START First Use
283 // b1: [0,9] [3,4] [8,9]
284 // b2: [0,9] [6,9]
285 // b3: [2,4] [3,4]
286 // b4: [5,7] [6,7]
287 // b5: [5,7] [6,7]
288 //
289 // For the intervals on the left, the best we can do is overlap two
290 // variables (b3 and b4, for example); this gives us a stack size of
291 // 4*1024 bytes, not ideal. When treating first-use as the start of a
292 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293 // byte stack (better).
294 //
295 // Degenerate Slots:
296 // -----------------
297 //
298 // Relying entirely on first-use of stack slots is problematic,
299 // however, due to the fact that optimizations can sometimes migrate
300 // uses of a variable outside of its lifetime start/end region. Here
301 // is an example:
302 //
303 // int bar() {
304 // char b1[1024], b2[1024];
305 // if (...) {
306 // <uses of b2>
307 // return y;
308 // } else {
309 // <uses of b1>
310 // while (...) {
311 // char b3[1024];
312 // <uses of b3>
313 // }
314 // }
315 // }
316 //
317 // Before optimization, the control flow graph for the code above
318 // might look like the following:
319 //
320 // +------ block 0 -------+
321 // 0| LIFETIME_START b1, b2 |
322 // 1| <test 'if' condition> |
323 // +-----------------------+
324 // ./ \.
325 // +------ block 1 -------+ +------- block 2 -------+
326 // 2| <uses of b2> | 3| <uses of b1> |
327 // +-----------------------+ +-----------------------+
328 // | |
329 // | +------- block 3 -------+ <-\.
330 // | 4| <while condition> | |
331 // | +-----------------------+ |
332 // | / | |
333 // | / +------- block 4 -------+
334 // \ / 5| LIFETIME_START b3 | |
335 // \ / 6| <uses of b3> | |
336 // \ / 7| LIFETIME_END b3 | |
337 // \ | +------------------------+ |
338 // \ | \ /
339 // +------ block 5 -----+ \---------------
340 // 8| <cleanupcode> |
341 // 9| LIFETIME_END b1, b2 |
342 // 10| return |
343 // +---------------------+
344 //
345 // During optimization, however, it can happen that an instruction
346 // computing an address in "b3" (for example, a loop-invariant GEP) is
347 // hoisted up out of the loop from block 4 to block 2. [Note that
348 // this is not an actual load from the stack, only an instruction that
349 // computes the address to be loaded]. If this happens, there is now a
350 // path leading from the first use of b3 to the return instruction
351 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352 // now larger than if we were computing live intervals strictly based
353 // on lifetime markers. In the example above, this lengthened lifetime
354 // would mean that it would appear illegal to overlap b3 with b2.
355 //
356 // To deal with this such cases, the code in ::collectMarkers() below
357 // tries to identify "degenerate" slots -- those slots where on a single
358 // forward pass through the CFG we encounter a first reference to slot
359 // K before we hit the slot K lifetime start marker. For such slots,
360 // we fall back on using the lifetime start marker as the beginning of
361 // the variable's lifetime. NB: with this implementation, slots can
362 // appear degenerate in cases where there is unstructured control flow:
363 //
364 // if (q) goto mid;
365 // if (x > 9) {
366 // int b[100];
367 // memcpy(&b[0], ...);
368 // mid: b[k] = ...;
369 // abc(&b);
370 // }
371 //
372 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373 // before visiting the memcpy block (which will contain the lifetime start
374 // for "b" then it will appear that 'b' has a degenerate lifetime.
375 //
376 
377 namespace {
378 
379 /// StackColoring - A machine pass for merging disjoint stack allocations,
380 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
381 class StackColoring : public MachineFunctionPass {
382  MachineFrameInfo *MFI;
383  MachineFunction *MF;
384 
385  /// A class representing liveness information for a single basic block.
386  /// Each bit in the BitVector represents the liveness property
387  /// for a different stack slot.
388  struct BlockLifetimeInfo {
389  /// Which slots BEGINs in each basic block.
390  BitVector Begin;
391 
392  /// Which slots ENDs in each basic block.
393  BitVector End;
394 
395  /// Which slots are marked as LIVE_IN, coming into each basic block.
396  BitVector LiveIn;
397 
398  /// Which slots are marked as LIVE_OUT, coming out of each basic block.
399  BitVector LiveOut;
400  };
401 
402  /// Maps active slots (per bit) for each basic block.
404  LivenessMap BlockLiveness;
405 
406  /// Maps serial numbers to basic blocks.
408 
409  /// Maps basic blocks to a serial number.
410  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
411 
412  /// Maps slots to their use interval. Outside of this interval, slots
413  /// values are either dead or `undef` and they will not be written to.
415 
416  /// Maps slots to the points where they can become in-use.
418 
419  /// VNInfo is used for the construction of LiveIntervals.
420  VNInfo::Allocator VNInfoAllocator;
421 
422  /// SlotIndex analysis object.
423  SlotIndexes *Indexes;
424 
425  /// The list of lifetime markers found. These markers are to be removed
426  /// once the coloring is done.
428 
429  /// Record the FI slots for which we have seen some sort of
430  /// lifetime marker (either start or end).
431  BitVector InterestingSlots;
432 
433  /// FI slots that need to be handled conservatively (for these
434  /// slots lifetime-start-on-first-use is disabled).
435  BitVector ConservativeSlots;
436 
437  /// Number of iterations taken during data flow analysis.
438  unsigned NumIterations;
439 
440 public:
441  static char ID;
442 
443  StackColoring() : MachineFunctionPass(ID) {
445  }
446 
447  void getAnalysisUsage(AnalysisUsage &AU) const override;
448  bool runOnMachineFunction(MachineFunction &Func) override;
449 
450 private:
451  /// Used in collectMarkers
452  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
453 
454  /// Debug.
455  void dump() const;
456  void dumpIntervals() const;
457  void dumpBB(MachineBasicBlock *MBB) const;
458  void dumpBV(const char *tag, const BitVector &BV) const;
459 
460  /// Removes all of the lifetime marker instructions from the function.
461  /// \returns true if any markers were removed.
462  bool removeAllMarkers();
463 
464  /// Scan the machine function and find all of the lifetime markers.
465  /// Record the findings in the BEGIN and END vectors.
466  /// \returns the number of markers found.
467  unsigned collectMarkers(unsigned NumSlot);
468 
469  /// Perform the dataflow calculation and calculate the lifetime for each of
470  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
471  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
472  /// in and out blocks.
473  void calculateLocalLiveness();
474 
475  /// Returns TRUE if we're using the first-use-begins-lifetime method for
476  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
477  bool applyFirstUse(int Slot) {
479  return false;
480  if (ConservativeSlots.test(Slot))
481  return false;
482  return true;
483  }
484 
485  /// Examines the specified instruction and returns TRUE if the instruction
486  /// represents the start or end of an interesting lifetime. The slot or slots
487  /// starting or ending are added to the vector "slots" and "isStart" is set
488  /// accordingly.
489  /// \returns True if inst contains a lifetime start or end
490  bool isLifetimeStartOrEnd(const MachineInstr &MI,
492  bool &isStart);
493 
494  /// Construct the LiveIntervals for the slots.
495  void calculateLiveIntervals(unsigned NumSlots);
496 
497  /// Go over the machine function and change instructions which use stack
498  /// slots to use the joint slots.
499  void remapInstructions(DenseMap<int, int> &SlotRemap);
500 
501  /// The input program may contain instructions which are not inside lifetime
502  /// markers. This can happen due to a bug in the compiler or due to a bug in
503  /// user code (for example, returning a reference to a local variable).
504  /// This procedure checks all of the instructions in the function and
505  /// invalidates lifetime ranges which do not contain all of the instructions
506  /// which access that frame slot.
507  void removeInvalidSlotRanges();
508 
509  /// Map entries which point to other entries to their destination.
510  /// A->B->C becomes A->C.
511  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
512 };
513 
514 } // end anonymous namespace
515 
516 char StackColoring::ID = 0;
517 
519 
520 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
521  "Merge disjoint stack slots", false, false)
524  "Merge disjoint stack slots", false, false)
525 
526 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
527  AU.addRequired<SlotIndexes>();
529 }
530 
531 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
532 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
533  const BitVector &BV) const {
534  dbgs() << tag << " : { ";
535  for (unsigned I = 0, E = BV.size(); I != E; ++I)
536  dbgs() << BV.test(I) << " ";
537  dbgs() << "}\n";
538 }
539 
540 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
541  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
542  assert(BI != BlockLiveness.end() && "Block not found");
543  const BlockLifetimeInfo &BlockInfo = BI->second;
544 
545  dumpBV("BEGIN", BlockInfo.Begin);
546  dumpBV("END", BlockInfo.End);
547  dumpBV("LIVE_IN", BlockInfo.LiveIn);
548  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
549 }
550 
552  for (MachineBasicBlock *MBB : depth_first(MF)) {
553  dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
554  << MBB->getName() << "]\n";
555  dumpBB(MBB);
556  }
557 }
558 
559 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
560  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
561  dbgs() << "Interval[" << I << "]:\n";
562  Intervals[I]->dump();
563  }
564 }
565 #endif
566 
567 static inline int getStartOrEndSlot(const MachineInstr &MI)
568 {
571  "Expected LIFETIME_START or LIFETIME_END op");
572  const MachineOperand &MO = MI.getOperand(0);
573  int Slot = MO.getIndex();
574  if (Slot >= 0)
575  return Slot;
576  return -1;
577 }
578 
579 // At the moment the only way to end a variable lifetime is with
580 // a VARIABLE_LIFETIME op (which can't contain a start). If things
581 // change and the IR allows for a single inst that both begins
582 // and ends lifetime(s), this interface will need to be reworked.
583 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
585  bool &isStart) {
588  int Slot = getStartOrEndSlot(MI);
589  if (Slot < 0)
590  return false;
591  if (!InterestingSlots.test(Slot))
592  return false;
593  slots.push_back(Slot);
595  isStart = false;
596  return true;
597  }
598  if (!applyFirstUse(Slot)) {
599  isStart = true;
600  return true;
601  }
603  if (!MI.isDebugInstr()) {
604  bool found = false;
605  for (const MachineOperand &MO : MI.operands()) {
606  if (!MO.isFI())
607  continue;
608  int Slot = MO.getIndex();
609  if (Slot<0)
610  continue;
611  if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
612  slots.push_back(Slot);
613  found = true;
614  }
615  }
616  if (found) {
617  isStart = true;
618  return true;
619  }
620  }
621  }
622  return false;
623 }
624 
625 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
626  unsigned MarkersFound = 0;
627  BlockBitVecMap SeenStartMap;
628  InterestingSlots.clear();
629  InterestingSlots.resize(NumSlot);
630  ConservativeSlots.clear();
631  ConservativeSlots.resize(NumSlot);
632 
633  // number of start and end lifetime ops for each slot
634  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
635  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
636 
637  // Step 1: collect markers and populate the "InterestingSlots"
638  // and "ConservativeSlots" sets.
639  for (MachineBasicBlock *MBB : depth_first(MF)) {
640  // Compute the set of slots for which we've seen a START marker but have
641  // not yet seen an END marker at this point in the walk (e.g. on entry
642  // to this bb).
643  BitVector BetweenStartEnd;
644  BetweenStartEnd.resize(NumSlot);
646  PE = MBB->pred_end(); PI != PE; ++PI) {
647  BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
648  if (I != SeenStartMap.end()) {
649  BetweenStartEnd |= I->second;
650  }
651  }
652 
653  // Walk the instructions in the block to look for start/end ops.
654  for (MachineInstr &MI : *MBB) {
657  int Slot = getStartOrEndSlot(MI);
658  if (Slot < 0)
659  continue;
660  InterestingSlots.set(Slot);
662  BetweenStartEnd.set(Slot);
663  NumStartLifetimes[Slot] += 1;
664  } else {
665  BetweenStartEnd.reset(Slot);
666  NumEndLifetimes[Slot] += 1;
667  }
668  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
669  if (Allocation) {
670  LLVM_DEBUG(dbgs() << "Found a lifetime ");
672  ? "start"
673  : "end"));
674  LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
675  LLVM_DEBUG(dbgs()
676  << " with allocation: " << Allocation->getName() << "\n");
677  }
678  Markers.push_back(&MI);
679  MarkersFound += 1;
680  } else {
681  for (const MachineOperand &MO : MI.operands()) {
682  if (!MO.isFI())
683  continue;
684  int Slot = MO.getIndex();
685  if (Slot < 0)
686  continue;
687  if (! BetweenStartEnd.test(Slot)) {
688  ConservativeSlots.set(Slot);
689  }
690  }
691  }
692  }
693  BitVector &SeenStart = SeenStartMap[MBB];
694  SeenStart |= BetweenStartEnd;
695  }
696  if (!MarkersFound) {
697  return 0;
698  }
699 
700  // PR27903: slots with multiple start or end lifetime ops are not
701  // safe to enable for "lifetime-start-on-first-use".
702  for (unsigned slot = 0; slot < NumSlot; ++slot)
703  if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
704  ConservativeSlots.set(slot);
705  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
706 
707  // Step 2: compute begin/end sets for each block
708 
709  // NOTE: We use a depth-first iteration to ensure that we obtain a
710  // deterministic numbering.
711  for (MachineBasicBlock *MBB : depth_first(MF)) {
712  // Assign a serial number to this basic block.
713  BasicBlocks[MBB] = BasicBlockNumbering.size();
714  BasicBlockNumbering.push_back(MBB);
715 
716  // Keep a reference to avoid repeated lookups.
717  BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
718 
719  BlockInfo.Begin.resize(NumSlot);
720  BlockInfo.End.resize(NumSlot);
721 
723  for (MachineInstr &MI : *MBB) {
724  bool isStart = false;
725  slots.clear();
726  if (isLifetimeStartOrEnd(MI, slots, isStart)) {
727  if (!isStart) {
728  assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
729  int Slot = slots[0];
730  if (BlockInfo.Begin.test(Slot)) {
731  BlockInfo.Begin.reset(Slot);
732  }
733  BlockInfo.End.set(Slot);
734  } else {
735  for (auto Slot : slots) {
736  LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
737  LLVM_DEBUG(dbgs()
738  << " at " << printMBBReference(*MBB) << " index ");
739  LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
740  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
741  if (Allocation) {
742  LLVM_DEBUG(dbgs()
743  << " with allocation: " << Allocation->getName());
744  }
745  LLVM_DEBUG(dbgs() << "\n");
746  if (BlockInfo.End.test(Slot)) {
747  BlockInfo.End.reset(Slot);
748  }
749  BlockInfo.Begin.set(Slot);
750  }
751  }
752  }
753  }
754  }
755 
756  // Update statistics.
757  NumMarkerSeen += MarkersFound;
758  return MarkersFound;
759 }
760 
761 void StackColoring::calculateLocalLiveness() {
762  unsigned NumIters = 0;
763  bool changed = true;
764  while (changed) {
765  changed = false;
766  ++NumIters;
767 
768  for (const MachineBasicBlock *BB : BasicBlockNumbering) {
769  // Use an iterator to avoid repeated lookups.
770  LivenessMap::iterator BI = BlockLiveness.find(BB);
771  assert(BI != BlockLiveness.end() && "Block not found");
772  BlockLifetimeInfo &BlockInfo = BI->second;
773 
774  // Compute LiveIn by unioning together the LiveOut sets of all preds.
775  BitVector LocalLiveIn;
776  for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
777  PE = BB->pred_end(); PI != PE; ++PI) {
778  LivenessMap::const_iterator I = BlockLiveness.find(*PI);
779  // PR37130: transformations prior to stack coloring can
780  // sometimes leave behind statically unreachable blocks; these
781  // can be safely skipped here.
782  if (I != BlockLiveness.end())
783  LocalLiveIn |= I->second.LiveOut;
784  }
785 
786  // Compute LiveOut by subtracting out lifetimes that end in this
787  // block, then adding in lifetimes that begin in this block. If
788  // we have both BEGIN and END markers in the same basic block
789  // then we know that the BEGIN marker comes after the END,
790  // because we already handle the case where the BEGIN comes
791  // before the END when collecting the markers (and building the
792  // BEGIN/END vectors).
793  BitVector LocalLiveOut = LocalLiveIn;
794  LocalLiveOut.reset(BlockInfo.End);
795  LocalLiveOut |= BlockInfo.Begin;
796 
797  // Update block LiveIn set, noting whether it has changed.
798  if (LocalLiveIn.test(BlockInfo.LiveIn)) {
799  changed = true;
800  BlockInfo.LiveIn |= LocalLiveIn;
801  }
802 
803  // Update block LiveOut set, noting whether it has changed.
804  if (LocalLiveOut.test(BlockInfo.LiveOut)) {
805  changed = true;
806  BlockInfo.LiveOut |= LocalLiveOut;
807  }
808  }
809  } // while changed.
810 
811  NumIterations = NumIters;
812 }
813 
814 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
816  SmallVector<bool, 16> DefinitelyInUse;
817 
818  // For each block, find which slots are active within this block
819  // and update the live intervals.
820  for (const MachineBasicBlock &MBB : *MF) {
821  Starts.clear();
822  Starts.resize(NumSlots);
823  DefinitelyInUse.clear();
824  DefinitelyInUse.resize(NumSlots);
825 
826  // Start the interval of the slots that we previously found to be 'in-use'.
827  BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
828  for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
829  pos = MBBLiveness.LiveIn.find_next(pos)) {
830  Starts[pos] = Indexes->getMBBStartIdx(&MBB);
831  }
832 
833  // Create the interval for the basic blocks containing lifetime begin/end.
834  for (const MachineInstr &MI : MBB) {
836  bool IsStart = false;
837  if (!isLifetimeStartOrEnd(MI, slots, IsStart))
838  continue;
839  SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
840  for (auto Slot : slots) {
841  if (IsStart) {
842  // If a slot is already definitely in use, we don't have to emit
843  // a new start marker because there is already a pre-existing
844  // one.
845  if (!DefinitelyInUse[Slot]) {
846  LiveStarts[Slot].push_back(ThisIndex);
847  DefinitelyInUse[Slot] = true;
848  }
849  if (!Starts[Slot].isValid())
850  Starts[Slot] = ThisIndex;
851  } else {
852  if (Starts[Slot].isValid()) {
853  VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
854  Intervals[Slot]->addSegment(
855  LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
856  Starts[Slot] = SlotIndex(); // Invalidate the start index
857  DefinitelyInUse[Slot] = false;
858  }
859  }
860  }
861  }
862 
863  // Finish up started segments
864  for (unsigned i = 0; i < NumSlots; ++i) {
865  if (!Starts[i].isValid())
866  continue;
867 
868  SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
869  VNInfo *VNI = Intervals[i]->getValNumInfo(0);
870  Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
871  }
872  }
873 }
874 
875 bool StackColoring::removeAllMarkers() {
876  unsigned Count = 0;
877  for (MachineInstr *MI : Markers) {
878  MI->eraseFromParent();
879  Count++;
880  }
881  Markers.clear();
882 
883  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
884  return Count;
885 }
886 
887 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
888  unsigned FixedInstr = 0;
889  unsigned FixedMemOp = 0;
890  unsigned FixedDbg = 0;
891 
892  // Remap debug information that refers to stack slots.
893  for (auto &VI : MF->getVariableDbgInfo()) {
894  if (!VI.Var)
895  continue;
896  if (SlotRemap.count(VI.Slot)) {
897  LLVM_DEBUG(dbgs() << "Remapping debug info for ["
898  << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
899  VI.Slot = SlotRemap[VI.Slot];
900  FixedDbg++;
901  }
902  }
903 
904  // Keep a list of *allocas* which need to be remapped.
906 
907  // Keep a list of allocas which has been affected by the remap.
909 
910  for (const std::pair<int, int> &SI : SlotRemap) {
911  const AllocaInst *From = MFI->getObjectAllocation(SI.first);
912  const AllocaInst *To = MFI->getObjectAllocation(SI.second);
913  assert(To && From && "Invalid allocation object");
914  Allocas[From] = To;
915 
916  // AA might be used later for instruction scheduling, and we need it to be
917  // able to deduce the correct aliasing releationships between pointers
918  // derived from the alloca being remapped and the target of that remapping.
919  // The only safe way, without directly informing AA about the remapping
920  // somehow, is to directly update the IR to reflect the change being made
921  // here.
922  Instruction *Inst = const_cast<AllocaInst *>(To);
923  if (From->getType() != To->getType()) {
924  BitCastInst *Cast = new BitCastInst(Inst, From->getType());
925  Cast->insertAfter(Inst);
926  Inst = Cast;
927  }
928 
929  // We keep both slots to maintain AliasAnalysis metadata later.
930  MergedAllocas.insert(From);
931  MergedAllocas.insert(To);
932 
933  // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
934  // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
935  // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
937  = MFI->getObjectSSPLayout(SI.first);
939  if (FromKind != MachineFrameInfo::SSPLK_None &&
940  (ToKind == MachineFrameInfo::SSPLK_None ||
942  FromKind != MachineFrameInfo::SSPLK_AddrOf)))
943  MFI->setObjectSSPLayout(SI.second, FromKind);
944 
945  // The new alloca might not be valid in a llvm.dbg.declare for this
946  // variable, so undef out the use to make the verifier happy.
947  AllocaInst *FromAI = const_cast<AllocaInst *>(From);
948  if (FromAI->isUsedByMetadata())
950  for (auto &Use : FromAI->uses()) {
951  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
952  if (BCI->isUsedByMetadata())
953  ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
954  }
955 
956  // Note that this will not replace uses in MMOs (which we'll update below),
957  // or anywhere else (which is why we won't delete the original
958  // instruction).
959  FromAI->replaceAllUsesWith(Inst);
960  }
961 
962  // Remap all instructions to the new stack slots.
963  for (MachineBasicBlock &BB : *MF)
964  for (MachineInstr &I : BB) {
965  // Skip lifetime markers. We'll remove them soon.
966  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
967  I.getOpcode() == TargetOpcode::LIFETIME_END)
968  continue;
969 
970  // Update the MachineMemOperand to use the new alloca.
971  for (MachineMemOperand *MMO : I.memoperands()) {
972  // We've replaced IR-level uses of the remapped allocas, so we only
973  // need to replace direct uses here.
974  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
975  if (!AI)
976  continue;
977 
978  if (!Allocas.count(AI))
979  continue;
980 
981  MMO->setValue(Allocas[AI]);
982  FixedMemOp++;
983  }
984 
985  // Update all of the machine instruction operands.
986  for (MachineOperand &MO : I.operands()) {
987  if (!MO.isFI())
988  continue;
989  int FromSlot = MO.getIndex();
990 
991  // Don't touch arguments.
992  if (FromSlot<0)
993  continue;
994 
995  // Only look at mapped slots.
996  if (!SlotRemap.count(FromSlot))
997  continue;
998 
999  // In a debug build, check that the instruction that we are modifying is
1000  // inside the expected live range. If the instruction is not inside
1001  // the calculated range then it means that the alloca usage moved
1002  // outside of the lifetime markers, or that the user has a bug.
1003  // NOTE: Alloca address calculations which happen outside the lifetime
1004  // zone are okay, despite the fact that we don't have a good way
1005  // for validating all of the usages of the calculation.
1006 #ifndef NDEBUG
1007  bool TouchesMemory = I.mayLoad() || I.mayStore();
1008  // If we *don't* protect the user from escaped allocas, don't bother
1009  // validating the instructions.
1010  if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1011  SlotIndex Index = Indexes->getInstructionIndex(I);
1012  const LiveInterval *Interval = &*Intervals[FromSlot];
1013  assert(Interval->find(Index) != Interval->end() &&
1014  "Found instruction usage outside of live range.");
1015  }
1016 #endif
1017 
1018  // Fix the machine instructions.
1019  int ToSlot = SlotRemap[FromSlot];
1020  MO.setIndex(ToSlot);
1021  FixedInstr++;
1022  }
1023 
1024  // We adjust AliasAnalysis information for merged stack slots.
1025  MachineSDNode::mmo_iterator NewMemOps =
1026  MF->allocateMemRefsArray(I.getNumMemOperands());
1027  unsigned MemOpIdx = 0;
1028  bool ReplaceMemOps = false;
1029  for (MachineMemOperand *MMO : I.memoperands()) {
1030  // If this memory location can be a slot remapped here,
1031  // we remove AA information.
1032  bool MayHaveConflictingAAMD = false;
1033  if (MMO->getAAInfo()) {
1034  if (const Value *MMOV = MMO->getValue()) {
1036  getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1037 
1038  if (Objs.empty())
1039  MayHaveConflictingAAMD = true;
1040  else
1041  for (Value *V : Objs) {
1042  // If this memory location comes from a known stack slot
1043  // that is not remapped, we continue checking.
1044  // Otherwise, we need to invalidate AA infomation.
1045  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1046  if (AI && MergedAllocas.count(AI)) {
1047  MayHaveConflictingAAMD = true;
1048  break;
1049  }
1050  }
1051  }
1052  }
1053  if (MayHaveConflictingAAMD) {
1054  NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
1055  ReplaceMemOps = true;
1056  }
1057  else
1058  NewMemOps[MemOpIdx++] = MMO;
1059  }
1060 
1061  // If any memory operand is updated, set memory references of
1062  // this instruction.
1063  if (ReplaceMemOps)
1064  I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
1065  }
1066 
1067  // Update the location of C++ catch objects for the MSVC personality routine.
1068  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1069  for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1070  for (WinEHHandlerType &H : TBME.HandlerArray)
1072  SlotRemap.count(H.CatchObj.FrameIndex))
1073  H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1074 
1075  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1076  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1077  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1078 }
1079 
1080 void StackColoring::removeInvalidSlotRanges() {
1081  for (MachineBasicBlock &BB : *MF)
1082  for (MachineInstr &I : BB) {
1083  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1084  I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1085  continue;
1086 
1087  // Some intervals are suspicious! In some cases we find address
1088  // calculations outside of the lifetime zone, but not actual memory
1089  // read or write. Memory accesses outside of the lifetime zone are a clear
1090  // violation, but address calculations are okay. This can happen when
1091  // GEPs are hoisted outside of the lifetime zone.
1092  // So, in here we only check instructions which can read or write memory.
1093  if (!I.mayLoad() && !I.mayStore())
1094  continue;
1095 
1096  // Check all of the machine operands.
1097  for (const MachineOperand &MO : I.operands()) {
1098  if (!MO.isFI())
1099  continue;
1100 
1101  int Slot = MO.getIndex();
1102 
1103  if (Slot<0)
1104  continue;
1105 
1106  if (Intervals[Slot]->empty())
1107  continue;
1108 
1109  // Check that the used slot is inside the calculated lifetime range.
1110  // If it is not, warn about it and invalidate the range.
1111  LiveInterval *Interval = &*Intervals[Slot];
1112  SlotIndex Index = Indexes->getInstructionIndex(I);
1113  if (Interval->find(Index) == Interval->end()) {
1114  Interval->clear();
1115  LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1116  EscapedAllocas++;
1117  }
1118  }
1119  }
1120 }
1121 
1122 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1123  unsigned NumSlots) {
1124  // Expunge slot remap map.
1125  for (unsigned i=0; i < NumSlots; ++i) {
1126  // If we are remapping i
1127  if (SlotRemap.count(i)) {
1128  int Target = SlotRemap[i];
1129  // As long as our target is mapped to something else, follow it.
1130  while (SlotRemap.count(Target)) {
1131  Target = SlotRemap[Target];
1132  SlotRemap[i] = Target;
1133  }
1134  }
1135  }
1136 }
1137 
1138 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1139  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1140  << "********** Function: " << Func.getName() << '\n');
1141  MF = &Func;
1142  MFI = &MF->getFrameInfo();
1143  Indexes = &getAnalysis<SlotIndexes>();
1144  BlockLiveness.clear();
1145  BasicBlocks.clear();
1146  BasicBlockNumbering.clear();
1147  Markers.clear();
1148  Intervals.clear();
1149  LiveStarts.clear();
1150  VNInfoAllocator.Reset();
1151 
1152  unsigned NumSlots = MFI->getObjectIndexEnd();
1153 
1154  // If there are no stack slots then there are no markers to remove.
1155  if (!NumSlots)
1156  return false;
1157 
1158  SmallVector<int, 8> SortedSlots;
1159  SortedSlots.reserve(NumSlots);
1160  Intervals.reserve(NumSlots);
1161  LiveStarts.resize(NumSlots);
1162 
1163  unsigned NumMarkers = collectMarkers(NumSlots);
1164 
1165  unsigned TotalSize = 0;
1166  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1167  << " slots\n");
1168  LLVM_DEBUG(dbgs() << "Slot structure:\n");
1169 
1170  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1171  LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1172  << " bytes.\n");
1173  TotalSize += MFI->getObjectSize(i);
1174  }
1175 
1176  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1177 
1178  // Don't continue because there are not enough lifetime markers, or the
1179  // stack is too small, or we are told not to optimize the slots.
1180  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1181  skipFunction(Func.getFunction())) {
1182  LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1183  return removeAllMarkers();
1184  }
1185 
1186  for (unsigned i=0; i < NumSlots; ++i) {
1187  std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1188  LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1189  Intervals.push_back(std::move(LI));
1190  SortedSlots.push_back(i);
1191  }
1192 
1193  // Calculate the liveness of each block.
1194  calculateLocalLiveness();
1195  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1196  LLVM_DEBUG(dump());
1197 
1198  // Propagate the liveness information.
1199  calculateLiveIntervals(NumSlots);
1200  LLVM_DEBUG(dumpIntervals());
1201 
1202  // Search for allocas which are used outside of the declared lifetime
1203  // markers.
1205  removeInvalidSlotRanges();
1206 
1207  // Maps old slots to new slots.
1208  DenseMap<int, int> SlotRemap;
1209  unsigned RemovedSlots = 0;
1210  unsigned ReducedSize = 0;
1211 
1212  // Do not bother looking at empty intervals.
1213  for (unsigned I = 0; I < NumSlots; ++I) {
1214  if (Intervals[SortedSlots[I]]->empty())
1215  SortedSlots[I] = -1;
1216  }
1217 
1218  // This is a simple greedy algorithm for merging allocas. First, sort the
1219  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1220  // for disjoint slots. When you find disjoint slots, merge the samller one
1221  // into the bigger one and update the live interval. Remove the small alloca
1222  // and continue.
1223 
1224  // Sort the slots according to their size. Place unused slots at the end.
1225  // Use stable sort to guarantee deterministic code generation.
1226  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1227  [this](int LHS, int RHS) {
1228  // We use -1 to denote a uninteresting slot. Place these slots at the end.
1229  if (LHS == -1) return false;
1230  if (RHS == -1) return true;
1231  // Sort according to size.
1232  return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1233  });
1234 
1235  for (auto &s : LiveStarts)
1236  llvm::sort(s.begin(), s.end());
1237 
1238  bool Changed = true;
1239  while (Changed) {
1240  Changed = false;
1241  for (unsigned I = 0; I < NumSlots; ++I) {
1242  if (SortedSlots[I] == -1)
1243  continue;
1244 
1245  for (unsigned J=I+1; J < NumSlots; ++J) {
1246  if (SortedSlots[J] == -1)
1247  continue;
1248 
1249  int FirstSlot = SortedSlots[I];
1250  int SecondSlot = SortedSlots[J];
1251  LiveInterval *First = &*Intervals[FirstSlot];
1252  LiveInterval *Second = &*Intervals[SecondSlot];
1253  auto &FirstS = LiveStarts[FirstSlot];
1254  auto &SecondS = LiveStarts[SecondSlot];
1255  assert(!First->empty() && !Second->empty() && "Found an empty range");
1256 
1257  // Merge disjoint slots. This is a little bit tricky - see the
1258  // Implementation Notes section for an explanation.
1259  if (!First->isLiveAtIndexes(SecondS) &&
1260  !Second->isLiveAtIndexes(FirstS)) {
1261  Changed = true;
1262  First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1263 
1264  int OldSize = FirstS.size();
1265  FirstS.append(SecondS.begin(), SecondS.end());
1266  auto Mid = FirstS.begin() + OldSize;
1267  std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1268 
1269  SlotRemap[SecondSlot] = FirstSlot;
1270  SortedSlots[J] = -1;
1271  LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1272  << SecondSlot << " together.\n");
1273  unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1274  MFI->getObjectAlignment(SecondSlot));
1275 
1276  assert(MFI->getObjectSize(FirstSlot) >=
1277  MFI->getObjectSize(SecondSlot) &&
1278  "Merging a small object into a larger one");
1279 
1280  RemovedSlots+=1;
1281  ReducedSize += MFI->getObjectSize(SecondSlot);
1282  MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1283  MFI->RemoveStackObject(SecondSlot);
1284  }
1285  }
1286  }
1287  }// While changed.
1288 
1289  // Record statistics.
1290  StackSpaceSaved += ReducedSize;
1291  StackSlotMerged += RemovedSlots;
1292  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1293  << ReducedSize << " bytes\n");
1294 
1295  // Scan the entire function and update all machine operands that use frame
1296  // indices to use the remapped frame index.
1297  expungeSlotMap(SlotRemap, NumSlots);
1298  remapInstructions(SlotRemap);
1299 
1300  return removeAllMarkers();
1301 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
bool empty() const
Definition: LiveInterval.h:370
void push_back(const T &Elt)
Definition: SmallVector.h:218
BitVector & set()
Definition: BitVector.h:398
iterator_range< use_iterator > uses()
Definition: Value.h:355
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:77
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static int getStartOrEndSlot(const MachineInstr &MI)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Did not trigger a stack protector.
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:37
Merge disjoint stack slots
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
This file contains the declarations for metadata subclasses.
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
STATISTIC(NumFunctions, "Total number of functions")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:452
std::vector< MachineBasicBlock * >::const_iterator const_pred_iterator
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:376
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
VariableDbgInfoMapTy & getVariableDbgInfo()
Value * get() const
Definition: Use.h:108
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
A description of a memory reference used in the backend.
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:367
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it...
Definition: Allocator.h:194
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
iterator end()
Definition: LiveInterval.h:212
The address of this allocation is exposed and triggered protection.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
SlotIndex getInstructionIndex(const MachineInstr &MI) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:414
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
void MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo)
Merge all of the live segments of a specific val# in RHS into this live range as the specified value ...
static void handleRAUW(Value *From, Value *To)
Definition: Metadata.cpp:392
SlotIndexes pass.
Definition: SlotIndexes.h:331
This class represents a no-op cast from one type to another.
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_END(StackColoring
int getObjectIndexEnd() const
Return one past the maximum frame object index.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Array or nested array >= SSP-buffer-size.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:798
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:487
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:439
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:397
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
#define DEBUG_TYPE
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:497
R600 Clause Merge
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
size_t size() const
Definition: SmallVector.h:53
bool isDebugInstr() const
Definition: MachineInstr.h:851
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:969
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
void initializeStackColoringPass(PassRegistry &)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition: Value.h:489
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Target - Wrapper for Target specific information.
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:305
Representation of each machine instruction.
Definition: MachineInstr.h:60
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:80
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:170
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
union llvm::WinEHHandlerType::@174 CatchObj
The CatchObj starts out life as an LLVM alloca and is eventually turned frame index.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
LLVM Value Representation.
Definition: Value.h:73
IRTranslator LLVM IR MI
void print(raw_ostream &os) const
Print this index to the given raw_ostream.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
an instruction to allocate memory on the stack
Definition: Instructions.h:60
void resize(size_type N)
Definition: SmallVector.h:351