LLVM  10.0.0svn
StackColoring.cpp
Go to the documentation of this file.
1 //===- StackColoring.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements the stack-coloring optimization that looks for
10 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
11 // which represent the possible lifetime of stack slots. It attempts to
12 // merge disjoint stack slots and reduce the used stack space.
13 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14 //
15 // TODO: In the future we plan to improve stack coloring in the following ways:
16 // 1. Allow merging multiple small slots into a single larger slot at different
17 // offsets.
18 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19 // spill slots.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
38 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/Config/llvm-config.h"
44 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Metadata.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <limits>
60 #include <memory>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "stack-coloring"
66 
67 static cl::opt<bool>
68 DisableColoring("no-stack-coloring",
69  cl::init(false), cl::Hidden,
70  cl::desc("Disable stack coloring"));
71 
72 /// The user may write code that uses allocas outside of the declared lifetime
73 /// zone. This can happen when the user returns a reference to a local
74 /// data-structure. We can detect these cases and decide not to optimize the
75 /// code. If this flag is enabled, we try to save the user. This option
76 /// is treated as overriding LifetimeStartOnFirstUse below.
77 static cl::opt<bool>
78 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
79  cl::init(false), cl::Hidden,
80  cl::desc("Do not optimize lifetime zones that "
81  "are broken"));
82 
83 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
84 /// use of stack slot as start of slot lifetime, as opposed to looking
85 /// for LIFETIME_START marker). See "Implementation notes" below for
86 /// more info.
87 static cl::opt<bool>
88 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
89  cl::init(true), cl::Hidden,
90  cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
91 
92 
93 STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
94 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
95 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
96 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
97 
98 //===----------------------------------------------------------------------===//
99 // StackColoring Pass
100 //===----------------------------------------------------------------------===//
101 //
102 // Stack Coloring reduces stack usage by merging stack slots when they
103 // can't be used together. For example, consider the following C program:
104 //
105 // void bar(char *, int);
106 // void foo(bool var) {
107 // A: {
108 // char z[4096];
109 // bar(z, 0);
110 // }
111 //
112 // char *p;
113 // char x[4096];
114 // char y[4096];
115 // if (var) {
116 // p = x;
117 // } else {
118 // bar(y, 1);
119 // p = y + 1024;
120 // }
121 // B:
122 // bar(p, 2);
123 // }
124 //
125 // Naively-compiled, this program would use 12k of stack space. However, the
126 // stack slot corresponding to `z` is always destroyed before either of the
127 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
128 // is true, while `y` is only used if `var` is false. So in no time are 2
129 // of the stack slots used together, and therefore we can merge them,
130 // compiling the function using only a single 4k alloca:
131 //
132 // void foo(bool var) { // equivalent
133 // char x[4096];
134 // char *p;
135 // bar(x, 0);
136 // if (var) {
137 // p = x;
138 // } else {
139 // bar(x, 1);
140 // p = x + 1024;
141 // }
142 // bar(p, 2);
143 // }
144 //
145 // This is an important optimization if we want stack space to be under
146 // control in large functions, both open-coded ones and ones created by
147 // inlining.
148 //
149 // Implementation Notes:
150 // ---------------------
151 //
152 // An important part of the above reasoning is that `z` can't be accessed
153 // while the latter 2 calls to `bar` are running. This is justified because
154 // `z`'s lifetime is over after we exit from block `A:`, so any further
155 // accesses to it would be UB. The way we represent this information
156 // in LLVM is by having frontends delimit blocks with `lifetime.start`
157 // and `lifetime.end` intrinsics.
158 //
159 // The effect of these intrinsics seems to be as follows (maybe I should
160 // specify this in the reference?):
161 //
162 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
163 // lifetime intrinsic refers to that stack slot, in which case
164 // it is marked as *in-scope*.
165 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
166 // the stack slot is overwritten with `undef`.
167 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
168 // L4) on function exit, all stack slots are marked as *out-of-scope*.
169 // L5) `lifetime.end` is a no-op when called on a slot that is already
170 // *out-of-scope*.
171 // L6) memory accesses to *out-of-scope* stack slots are UB.
172 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
173 // are invalidated, unless the slot is "degenerate". This is used to
174 // justify not marking slots as in-use until the pointer to them is
175 // used, but feels a bit hacky in the presence of things like LICM. See
176 // the "Degenerate Slots" section for more details.
177 //
178 // Now, let's ground stack coloring on these rules. We'll define a slot
179 // as *in-use* at a (dynamic) point in execution if it either can be
180 // written to at that point, or if it has a live and non-undef content
181 // at that point.
182 //
183 // Obviously, slots that are never *in-use* together can be merged, and
184 // in our example `foo`, the slots for `x`, `y` and `z` are never
185 // in-use together (of course, sometimes slots that *are* in-use together
186 // might still be mergable, but we don't care about that here).
187 //
188 // In this implementation, we successively merge pairs of slots that are
189 // not *in-use* together. We could be smarter - for example, we could merge
190 // a single large slot with 2 small slots, or we could construct the
191 // interference graph and run a "smart" graph coloring algorithm, but with
192 // that aside, how do we find out whether a pair of slots might be *in-use*
193 // together?
194 //
195 // From our rules, we see that *out-of-scope* slots are never *in-use*,
196 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
197 // until their address is taken. Therefore, we can approximate slot activity
198 // using dataflow.
199 //
200 // A subtle point: naively, we might try to figure out which pairs of
201 // stack-slots interfere by propagating `S in-use` through the CFG for every
202 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
203 // which they are both *in-use*.
204 //
205 // That is sound, but overly conservative in some cases: in our (artificial)
206 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
207 // as `x` is only in use if we came in from the `var` edge and `y` only
208 // if we came from the `!var` edge, they still can't be in use together.
209 // See PR32488 for an important real-life case.
210 //
211 // If we wanted to find all points of interference precisely, we could
212 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
213 // would be precise, but requires propagating `O(n^2)` dataflow facts.
214 //
215 // However, we aren't interested in the *set* of points of interference
216 // between 2 stack slots, only *whether* there *is* such a point. So we
217 // can rely on a little trick: for `S` and `T` to be in-use together,
218 // one of them needs to become in-use while the other is in-use (or
219 // they might both become in use simultaneously). We can check this
220 // by also keeping track of the points at which a stack slot might *start*
221 // being in-use.
222 //
223 // Exact first use:
224 // ----------------
225 //
226 // Consider the following motivating example:
227 //
228 // int foo() {
229 // char b1[1024], b2[1024];
230 // if (...) {
231 // char b3[1024];
232 // <uses of b1, b3>;
233 // return x;
234 // } else {
235 // char b4[1024], b5[1024];
236 // <uses of b2, b4, b5>;
237 // return y;
238 // }
239 // }
240 //
241 // In the code above, "b3" and "b4" are declared in distinct lexical
242 // scopes, meaning that it is easy to prove that they can share the
243 // same stack slot. Variables "b1" and "b2" are declared in the same
244 // scope, meaning that from a lexical point of view, their lifetimes
245 // overlap. From a control flow pointer of view, however, the two
246 // variables are accessed in disjoint regions of the CFG, thus it
247 // should be possible for them to share the same stack slot. An ideal
248 // stack allocation for the function above would look like:
249 //
250 // slot 0: b1, b2
251 // slot 1: b3, b4
252 // slot 2: b5
253 //
254 // Achieving this allocation is tricky, however, due to the way
255 // lifetime markers are inserted. Here is a simplified view of the
256 // control flow graph for the code above:
257 //
258 // +------ block 0 -------+
259 // 0| LIFETIME_START b1, b2 |
260 // 1| <test 'if' condition> |
261 // +-----------------------+
262 // ./ \.
263 // +------ block 1 -------+ +------ block 2 -------+
264 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
265 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
266 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
267 // +-----------------------+ +-----------------------+
268 // \. /.
269 // +------ block 3 -------+
270 // 8| <cleanupcode> |
271 // 9| LIFETIME_END b1, b2 |
272 // 10| return |
273 // +-----------------------+
274 //
275 // If we create live intervals for the variables above strictly based
276 // on the lifetime markers, we'll get the set of intervals on the
277 // left. If we ignore the lifetime start markers and instead treat a
278 // variable's lifetime as beginning with the first reference to the
279 // var, then we get the intervals on the right.
280 //
281 // LIFETIME_START First Use
282 // b1: [0,9] [3,4] [8,9]
283 // b2: [0,9] [6,9]
284 // b3: [2,4] [3,4]
285 // b4: [5,7] [6,7]
286 // b5: [5,7] [6,7]
287 //
288 // For the intervals on the left, the best we can do is overlap two
289 // variables (b3 and b4, for example); this gives us a stack size of
290 // 4*1024 bytes, not ideal. When treating first-use as the start of a
291 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
292 // byte stack (better).
293 //
294 // Degenerate Slots:
295 // -----------------
296 //
297 // Relying entirely on first-use of stack slots is problematic,
298 // however, due to the fact that optimizations can sometimes migrate
299 // uses of a variable outside of its lifetime start/end region. Here
300 // is an example:
301 //
302 // int bar() {
303 // char b1[1024], b2[1024];
304 // if (...) {
305 // <uses of b2>
306 // return y;
307 // } else {
308 // <uses of b1>
309 // while (...) {
310 // char b3[1024];
311 // <uses of b3>
312 // }
313 // }
314 // }
315 //
316 // Before optimization, the control flow graph for the code above
317 // might look like the following:
318 //
319 // +------ block 0 -------+
320 // 0| LIFETIME_START b1, b2 |
321 // 1| <test 'if' condition> |
322 // +-----------------------+
323 // ./ \.
324 // +------ block 1 -------+ +------- block 2 -------+
325 // 2| <uses of b2> | 3| <uses of b1> |
326 // +-----------------------+ +-----------------------+
327 // | |
328 // | +------- block 3 -------+ <-\.
329 // | 4| <while condition> | |
330 // | +-----------------------+ |
331 // | / | |
332 // | / +------- block 4 -------+
333 // \ / 5| LIFETIME_START b3 | |
334 // \ / 6| <uses of b3> | |
335 // \ / 7| LIFETIME_END b3 | |
336 // \ | +------------------------+ |
337 // \ | \ /
338 // +------ block 5 -----+ \---------------
339 // 8| <cleanupcode> |
340 // 9| LIFETIME_END b1, b2 |
341 // 10| return |
342 // +---------------------+
343 //
344 // During optimization, however, it can happen that an instruction
345 // computing an address in "b3" (for example, a loop-invariant GEP) is
346 // hoisted up out of the loop from block 4 to block 2. [Note that
347 // this is not an actual load from the stack, only an instruction that
348 // computes the address to be loaded]. If this happens, there is now a
349 // path leading from the first use of b3 to the return instruction
350 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
351 // now larger than if we were computing live intervals strictly based
352 // on lifetime markers. In the example above, this lengthened lifetime
353 // would mean that it would appear illegal to overlap b3 with b2.
354 //
355 // To deal with this such cases, the code in ::collectMarkers() below
356 // tries to identify "degenerate" slots -- those slots where on a single
357 // forward pass through the CFG we encounter a first reference to slot
358 // K before we hit the slot K lifetime start marker. For such slots,
359 // we fall back on using the lifetime start marker as the beginning of
360 // the variable's lifetime. NB: with this implementation, slots can
361 // appear degenerate in cases where there is unstructured control flow:
362 //
363 // if (q) goto mid;
364 // if (x > 9) {
365 // int b[100];
366 // memcpy(&b[0], ...);
367 // mid: b[k] = ...;
368 // abc(&b);
369 // }
370 //
371 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
372 // before visiting the memcpy block (which will contain the lifetime start
373 // for "b" then it will appear that 'b' has a degenerate lifetime.
374 //
375 
376 namespace {
377 
378 /// StackColoring - A machine pass for merging disjoint stack allocations,
379 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
380 class StackColoring : public MachineFunctionPass {
381  MachineFrameInfo *MFI;
382  MachineFunction *MF;
383 
384  /// A class representing liveness information for a single basic block.
385  /// Each bit in the BitVector represents the liveness property
386  /// for a different stack slot.
387  struct BlockLifetimeInfo {
388  /// Which slots BEGINs in each basic block.
389  BitVector Begin;
390 
391  /// Which slots ENDs in each basic block.
392  BitVector End;
393 
394  /// Which slots are marked as LIVE_IN, coming into each basic block.
395  BitVector LiveIn;
396 
397  /// Which slots are marked as LIVE_OUT, coming out of each basic block.
398  BitVector LiveOut;
399  };
400 
401  /// Maps active slots (per bit) for each basic block.
403  LivenessMap BlockLiveness;
404 
405  /// Maps serial numbers to basic blocks.
407 
408  /// Maps basic blocks to a serial number.
409  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
410 
411  /// Maps slots to their use interval. Outside of this interval, slots
412  /// values are either dead or `undef` and they will not be written to.
414 
415  /// Maps slots to the points where they can become in-use.
417 
418  /// VNInfo is used for the construction of LiveIntervals.
419  VNInfo::Allocator VNInfoAllocator;
420 
421  /// SlotIndex analysis object.
422  SlotIndexes *Indexes;
423 
424  /// The list of lifetime markers found. These markers are to be removed
425  /// once the coloring is done.
427 
428  /// Record the FI slots for which we have seen some sort of
429  /// lifetime marker (either start or end).
430  BitVector InterestingSlots;
431 
432  /// FI slots that need to be handled conservatively (for these
433  /// slots lifetime-start-on-first-use is disabled).
434  BitVector ConservativeSlots;
435 
436  /// Number of iterations taken during data flow analysis.
437  unsigned NumIterations;
438 
439 public:
440  static char ID;
441 
442  StackColoring() : MachineFunctionPass(ID) {
444  }
445 
446  void getAnalysisUsage(AnalysisUsage &AU) const override;
447  bool runOnMachineFunction(MachineFunction &Func) override;
448 
449 private:
450  /// Used in collectMarkers
451  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
452 
453  /// Debug.
454  void dump() const;
455  void dumpIntervals() const;
456  void dumpBB(MachineBasicBlock *MBB) const;
457  void dumpBV(const char *tag, const BitVector &BV) const;
458 
459  /// Removes all of the lifetime marker instructions from the function.
460  /// \returns true if any markers were removed.
461  bool removeAllMarkers();
462 
463  /// Scan the machine function and find all of the lifetime markers.
464  /// Record the findings in the BEGIN and END vectors.
465  /// \returns the number of markers found.
466  unsigned collectMarkers(unsigned NumSlot);
467 
468  /// Perform the dataflow calculation and calculate the lifetime for each of
469  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
470  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
471  /// in and out blocks.
472  void calculateLocalLiveness();
473 
474  /// Returns TRUE if we're using the first-use-begins-lifetime method for
475  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
476  bool applyFirstUse(int Slot) {
478  return false;
479  if (ConservativeSlots.test(Slot))
480  return false;
481  return true;
482  }
483 
484  /// Examines the specified instruction and returns TRUE if the instruction
485  /// represents the start or end of an interesting lifetime. The slot or slots
486  /// starting or ending are added to the vector "slots" and "isStart" is set
487  /// accordingly.
488  /// \returns True if inst contains a lifetime start or end
489  bool isLifetimeStartOrEnd(const MachineInstr &MI,
491  bool &isStart);
492 
493  /// Construct the LiveIntervals for the slots.
494  void calculateLiveIntervals(unsigned NumSlots);
495 
496  /// Go over the machine function and change instructions which use stack
497  /// slots to use the joint slots.
498  void remapInstructions(DenseMap<int, int> &SlotRemap);
499 
500  /// The input program may contain instructions which are not inside lifetime
501  /// markers. This can happen due to a bug in the compiler or due to a bug in
502  /// user code (for example, returning a reference to a local variable).
503  /// This procedure checks all of the instructions in the function and
504  /// invalidates lifetime ranges which do not contain all of the instructions
505  /// which access that frame slot.
506  void removeInvalidSlotRanges();
507 
508  /// Map entries which point to other entries to their destination.
509  /// A->B->C becomes A->C.
510  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
511 };
512 
513 } // end anonymous namespace
514 
515 char StackColoring::ID = 0;
516 
518 
519 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
520  "Merge disjoint stack slots", false, false)
523  "Merge disjoint stack slots", false, false)
524 
525 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
526  AU.addRequired<SlotIndexes>();
528 }
529 
530 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
531 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
532  const BitVector &BV) const {
533  dbgs() << tag << " : { ";
534  for (unsigned I = 0, E = BV.size(); I != E; ++I)
535  dbgs() << BV.test(I) << " ";
536  dbgs() << "}\n";
537 }
538 
539 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
540  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
541  assert(BI != BlockLiveness.end() && "Block not found");
542  const BlockLifetimeInfo &BlockInfo = BI->second;
543 
544  dumpBV("BEGIN", BlockInfo.Begin);
545  dumpBV("END", BlockInfo.End);
546  dumpBV("LIVE_IN", BlockInfo.LiveIn);
547  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
548 }
549 
551  for (MachineBasicBlock *MBB : depth_first(MF)) {
552  dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
553  << MBB->getName() << "]\n";
554  dumpBB(MBB);
555  }
556 }
557 
558 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
559  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
560  dbgs() << "Interval[" << I << "]:\n";
561  Intervals[I]->dump();
562  }
563 }
564 #endif
565 
566 static inline int getStartOrEndSlot(const MachineInstr &MI)
567 {
570  "Expected LIFETIME_START or LIFETIME_END op");
571  const MachineOperand &MO = MI.getOperand(0);
572  int Slot = MO.getIndex();
573  if (Slot >= 0)
574  return Slot;
575  return -1;
576 }
577 
578 // At the moment the only way to end a variable lifetime is with
579 // a VARIABLE_LIFETIME op (which can't contain a start). If things
580 // change and the IR allows for a single inst that both begins
581 // and ends lifetime(s), this interface will need to be reworked.
582 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
584  bool &isStart) {
587  int Slot = getStartOrEndSlot(MI);
588  if (Slot < 0)
589  return false;
590  if (!InterestingSlots.test(Slot))
591  return false;
592  slots.push_back(Slot);
594  isStart = false;
595  return true;
596  }
597  if (!applyFirstUse(Slot)) {
598  isStart = true;
599  return true;
600  }
602  if (!MI.isDebugInstr()) {
603  bool found = false;
604  for (const MachineOperand &MO : MI.operands()) {
605  if (!MO.isFI())
606  continue;
607  int Slot = MO.getIndex();
608  if (Slot<0)
609  continue;
610  if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
611  slots.push_back(Slot);
612  found = true;
613  }
614  }
615  if (found) {
616  isStart = true;
617  return true;
618  }
619  }
620  }
621  return false;
622 }
623 
624 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
625  unsigned MarkersFound = 0;
626  BlockBitVecMap SeenStartMap;
627  InterestingSlots.clear();
628  InterestingSlots.resize(NumSlot);
629  ConservativeSlots.clear();
630  ConservativeSlots.resize(NumSlot);
631 
632  // number of start and end lifetime ops for each slot
633  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
634  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
635 
636  // Step 1: collect markers and populate the "InterestingSlots"
637  // and "ConservativeSlots" sets.
638  for (MachineBasicBlock *MBB : depth_first(MF)) {
639  // Compute the set of slots for which we've seen a START marker but have
640  // not yet seen an END marker at this point in the walk (e.g. on entry
641  // to this bb).
642  BitVector BetweenStartEnd;
643  BetweenStartEnd.resize(NumSlot);
645  PE = MBB->pred_end(); PI != PE; ++PI) {
646  BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
647  if (I != SeenStartMap.end()) {
648  BetweenStartEnd |= I->second;
649  }
650  }
651 
652  // Walk the instructions in the block to look for start/end ops.
653  for (MachineInstr &MI : *MBB) {
656  int Slot = getStartOrEndSlot(MI);
657  if (Slot < 0)
658  continue;
659  InterestingSlots.set(Slot);
661  BetweenStartEnd.set(Slot);
662  NumStartLifetimes[Slot] += 1;
663  } else {
664  BetweenStartEnd.reset(Slot);
665  NumEndLifetimes[Slot] += 1;
666  }
667  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
668  if (Allocation) {
669  LLVM_DEBUG(dbgs() << "Found a lifetime ");
671  ? "start"
672  : "end"));
673  LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
674  LLVM_DEBUG(dbgs()
675  << " with allocation: " << Allocation->getName() << "\n");
676  }
677  Markers.push_back(&MI);
678  MarkersFound += 1;
679  } else {
680  for (const MachineOperand &MO : MI.operands()) {
681  if (!MO.isFI())
682  continue;
683  int Slot = MO.getIndex();
684  if (Slot < 0)
685  continue;
686  if (! BetweenStartEnd.test(Slot)) {
687  ConservativeSlots.set(Slot);
688  }
689  }
690  }
691  }
692  BitVector &SeenStart = SeenStartMap[MBB];
693  SeenStart |= BetweenStartEnd;
694  }
695  if (!MarkersFound) {
696  return 0;
697  }
698 
699  // PR27903: slots with multiple start or end lifetime ops are not
700  // safe to enable for "lifetime-start-on-first-use".
701  for (unsigned slot = 0; slot < NumSlot; ++slot)
702  if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
703  ConservativeSlots.set(slot);
704  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
705 
706  // Step 2: compute begin/end sets for each block
707 
708  // NOTE: We use a depth-first iteration to ensure that we obtain a
709  // deterministic numbering.
710  for (MachineBasicBlock *MBB : depth_first(MF)) {
711  // Assign a serial number to this basic block.
712  BasicBlocks[MBB] = BasicBlockNumbering.size();
713  BasicBlockNumbering.push_back(MBB);
714 
715  // Keep a reference to avoid repeated lookups.
716  BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
717 
718  BlockInfo.Begin.resize(NumSlot);
719  BlockInfo.End.resize(NumSlot);
720 
722  for (MachineInstr &MI : *MBB) {
723  bool isStart = false;
724  slots.clear();
725  if (isLifetimeStartOrEnd(MI, slots, isStart)) {
726  if (!isStart) {
727  assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
728  int Slot = slots[0];
729  if (BlockInfo.Begin.test(Slot)) {
730  BlockInfo.Begin.reset(Slot);
731  }
732  BlockInfo.End.set(Slot);
733  } else {
734  for (auto Slot : slots) {
735  LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
736  LLVM_DEBUG(dbgs()
737  << " at " << printMBBReference(*MBB) << " index ");
738  LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
739  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
740  if (Allocation) {
741  LLVM_DEBUG(dbgs()
742  << " with allocation: " << Allocation->getName());
743  }
744  LLVM_DEBUG(dbgs() << "\n");
745  if (BlockInfo.End.test(Slot)) {
746  BlockInfo.End.reset(Slot);
747  }
748  BlockInfo.Begin.set(Slot);
749  }
750  }
751  }
752  }
753  }
754 
755  // Update statistics.
756  NumMarkerSeen += MarkersFound;
757  return MarkersFound;
758 }
759 
760 void StackColoring::calculateLocalLiveness() {
761  unsigned NumIters = 0;
762  bool changed = true;
763  while (changed) {
764  changed = false;
765  ++NumIters;
766 
767  for (const MachineBasicBlock *BB : BasicBlockNumbering) {
768  // Use an iterator to avoid repeated lookups.
769  LivenessMap::iterator BI = BlockLiveness.find(BB);
770  assert(BI != BlockLiveness.end() && "Block not found");
771  BlockLifetimeInfo &BlockInfo = BI->second;
772 
773  // Compute LiveIn by unioning together the LiveOut sets of all preds.
774  BitVector LocalLiveIn;
775  for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
776  PE = BB->pred_end(); PI != PE; ++PI) {
777  LivenessMap::const_iterator I = BlockLiveness.find(*PI);
778  // PR37130: transformations prior to stack coloring can
779  // sometimes leave behind statically unreachable blocks; these
780  // can be safely skipped here.
781  if (I != BlockLiveness.end())
782  LocalLiveIn |= I->second.LiveOut;
783  }
784 
785  // Compute LiveOut by subtracting out lifetimes that end in this
786  // block, then adding in lifetimes that begin in this block. If
787  // we have both BEGIN and END markers in the same basic block
788  // then we know that the BEGIN marker comes after the END,
789  // because we already handle the case where the BEGIN comes
790  // before the END when collecting the markers (and building the
791  // BEGIN/END vectors).
792  BitVector LocalLiveOut = LocalLiveIn;
793  LocalLiveOut.reset(BlockInfo.End);
794  LocalLiveOut |= BlockInfo.Begin;
795 
796  // Update block LiveIn set, noting whether it has changed.
797  if (LocalLiveIn.test(BlockInfo.LiveIn)) {
798  changed = true;
799  BlockInfo.LiveIn |= LocalLiveIn;
800  }
801 
802  // Update block LiveOut set, noting whether it has changed.
803  if (LocalLiveOut.test(BlockInfo.LiveOut)) {
804  changed = true;
805  BlockInfo.LiveOut |= LocalLiveOut;
806  }
807  }
808  } // while changed.
809 
810  NumIterations = NumIters;
811 }
812 
813 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
815  SmallVector<bool, 16> DefinitelyInUse;
816 
817  // For each block, find which slots are active within this block
818  // and update the live intervals.
819  for (const MachineBasicBlock &MBB : *MF) {
820  Starts.clear();
821  Starts.resize(NumSlots);
822  DefinitelyInUse.clear();
823  DefinitelyInUse.resize(NumSlots);
824 
825  // Start the interval of the slots that we previously found to be 'in-use'.
826  BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
827  for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
828  pos = MBBLiveness.LiveIn.find_next(pos)) {
829  Starts[pos] = Indexes->getMBBStartIdx(&MBB);
830  }
831 
832  // Create the interval for the basic blocks containing lifetime begin/end.
833  for (const MachineInstr &MI : MBB) {
835  bool IsStart = false;
836  if (!isLifetimeStartOrEnd(MI, slots, IsStart))
837  continue;
838  SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
839  for (auto Slot : slots) {
840  if (IsStart) {
841  // If a slot is already definitely in use, we don't have to emit
842  // a new start marker because there is already a pre-existing
843  // one.
844  if (!DefinitelyInUse[Slot]) {
845  LiveStarts[Slot].push_back(ThisIndex);
846  DefinitelyInUse[Slot] = true;
847  }
848  if (!Starts[Slot].isValid())
849  Starts[Slot] = ThisIndex;
850  } else {
851  if (Starts[Slot].isValid()) {
852  VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
853  Intervals[Slot]->addSegment(
854  LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
855  Starts[Slot] = SlotIndex(); // Invalidate the start index
856  DefinitelyInUse[Slot] = false;
857  }
858  }
859  }
860  }
861 
862  // Finish up started segments
863  for (unsigned i = 0; i < NumSlots; ++i) {
864  if (!Starts[i].isValid())
865  continue;
866 
867  SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
868  VNInfo *VNI = Intervals[i]->getValNumInfo(0);
869  Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
870  }
871  }
872 }
873 
874 bool StackColoring::removeAllMarkers() {
875  unsigned Count = 0;
876  for (MachineInstr *MI : Markers) {
877  MI->eraseFromParent();
878  Count++;
879  }
880  Markers.clear();
881 
882  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
883  return Count;
884 }
885 
886 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
887  unsigned FixedInstr = 0;
888  unsigned FixedMemOp = 0;
889  unsigned FixedDbg = 0;
890 
891  // Remap debug information that refers to stack slots.
892  for (auto &VI : MF->getVariableDbgInfo()) {
893  if (!VI.Var)
894  continue;
895  if (SlotRemap.count(VI.Slot)) {
896  LLVM_DEBUG(dbgs() << "Remapping debug info for ["
897  << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
898  VI.Slot = SlotRemap[VI.Slot];
899  FixedDbg++;
900  }
901  }
902 
903  // Keep a list of *allocas* which need to be remapped.
905 
906  // Keep a list of allocas which has been affected by the remap.
908 
909  for (const std::pair<int, int> &SI : SlotRemap) {
910  const AllocaInst *From = MFI->getObjectAllocation(SI.first);
911  const AllocaInst *To = MFI->getObjectAllocation(SI.second);
912  assert(To && From && "Invalid allocation object");
913  Allocas[From] = To;
914 
915  // AA might be used later for instruction scheduling, and we need it to be
916  // able to deduce the correct aliasing releationships between pointers
917  // derived from the alloca being remapped and the target of that remapping.
918  // The only safe way, without directly informing AA about the remapping
919  // somehow, is to directly update the IR to reflect the change being made
920  // here.
921  Instruction *Inst = const_cast<AllocaInst *>(To);
922  if (From->getType() != To->getType()) {
923  BitCastInst *Cast = new BitCastInst(Inst, From->getType());
924  Cast->insertAfter(Inst);
925  Inst = Cast;
926  }
927 
928  // We keep both slots to maintain AliasAnalysis metadata later.
929  MergedAllocas.insert(From);
930  MergedAllocas.insert(To);
931 
932  // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
933  // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
934  // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
936  = MFI->getObjectSSPLayout(SI.first);
938  if (FromKind != MachineFrameInfo::SSPLK_None &&
939  (ToKind == MachineFrameInfo::SSPLK_None ||
941  FromKind != MachineFrameInfo::SSPLK_AddrOf)))
942  MFI->setObjectSSPLayout(SI.second, FromKind);
943 
944  // The new alloca might not be valid in a llvm.dbg.declare for this
945  // variable, so undef out the use to make the verifier happy.
946  AllocaInst *FromAI = const_cast<AllocaInst *>(From);
947  if (FromAI->isUsedByMetadata())
949  for (auto &Use : FromAI->uses()) {
950  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
951  if (BCI->isUsedByMetadata())
952  ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
953  }
954 
955  // Note that this will not replace uses in MMOs (which we'll update below),
956  // or anywhere else (which is why we won't delete the original
957  // instruction).
958  FromAI->replaceAllUsesWith(Inst);
959  }
960 
961  // Remap all instructions to the new stack slots.
962  for (MachineBasicBlock &BB : *MF)
963  for (MachineInstr &I : BB) {
964  // Skip lifetime markers. We'll remove them soon.
965  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
966  I.getOpcode() == TargetOpcode::LIFETIME_END)
967  continue;
968 
969  // Update the MachineMemOperand to use the new alloca.
970  for (MachineMemOperand *MMO : I.memoperands()) {
971  // We've replaced IR-level uses of the remapped allocas, so we only
972  // need to replace direct uses here.
973  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
974  if (!AI)
975  continue;
976 
977  if (!Allocas.count(AI))
978  continue;
979 
980  MMO->setValue(Allocas[AI]);
981  FixedMemOp++;
982  }
983 
984  // Update all of the machine instruction operands.
985  for (MachineOperand &MO : I.operands()) {
986  if (!MO.isFI())
987  continue;
988  int FromSlot = MO.getIndex();
989 
990  // Don't touch arguments.
991  if (FromSlot<0)
992  continue;
993 
994  // Only look at mapped slots.
995  if (!SlotRemap.count(FromSlot))
996  continue;
997 
998  // In a debug build, check that the instruction that we are modifying is
999  // inside the expected live range. If the instruction is not inside
1000  // the calculated range then it means that the alloca usage moved
1001  // outside of the lifetime markers, or that the user has a bug.
1002  // NOTE: Alloca address calculations which happen outside the lifetime
1003  // zone are okay, despite the fact that we don't have a good way
1004  // for validating all of the usages of the calculation.
1005 #ifndef NDEBUG
1006  bool TouchesMemory = I.mayLoad() || I.mayStore();
1007  // If we *don't* protect the user from escaped allocas, don't bother
1008  // validating the instructions.
1009  if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1010  SlotIndex Index = Indexes->getInstructionIndex(I);
1011  const LiveInterval *Interval = &*Intervals[FromSlot];
1012  assert(Interval->find(Index) != Interval->end() &&
1013  "Found instruction usage outside of live range.");
1014  }
1015 #endif
1016 
1017  // Fix the machine instructions.
1018  int ToSlot = SlotRemap[FromSlot];
1019  MO.setIndex(ToSlot);
1020  FixedInstr++;
1021  }
1022 
1023  // We adjust AliasAnalysis information for merged stack slots.
1025  bool ReplaceMemOps = false;
1026  for (MachineMemOperand *MMO : I.memoperands()) {
1027  // If this memory location can be a slot remapped here,
1028  // we remove AA information.
1029  bool MayHaveConflictingAAMD = false;
1030  if (MMO->getAAInfo()) {
1031  if (const Value *MMOV = MMO->getValue()) {
1033  getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1034 
1035  if (Objs.empty())
1036  MayHaveConflictingAAMD = true;
1037  else
1038  for (Value *V : Objs) {
1039  // If this memory location comes from a known stack slot
1040  // that is not remapped, we continue checking.
1041  // Otherwise, we need to invalidate AA infomation.
1042  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1043  if (AI && MergedAllocas.count(AI)) {
1044  MayHaveConflictingAAMD = true;
1045  break;
1046  }
1047  }
1048  }
1049  }
1050  if (MayHaveConflictingAAMD) {
1051  NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1052  ReplaceMemOps = true;
1053  } else {
1054  NewMMOs.push_back(MMO);
1055  }
1056  }
1057 
1058  // If any memory operand is updated, set memory references of
1059  // this instruction.
1060  if (ReplaceMemOps)
1061  I.setMemRefs(*MF, NewMMOs);
1062  }
1063 
1064  // Update the location of C++ catch objects for the MSVC personality routine.
1065  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1066  for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1067  for (WinEHHandlerType &H : TBME.HandlerArray)
1069  SlotRemap.count(H.CatchObj.FrameIndex))
1070  H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1071 
1072  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1073  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1074  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1075 }
1076 
1077 void StackColoring::removeInvalidSlotRanges() {
1078  for (MachineBasicBlock &BB : *MF)
1079  for (MachineInstr &I : BB) {
1080  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1081  I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1082  continue;
1083 
1084  // Some intervals are suspicious! In some cases we find address
1085  // calculations outside of the lifetime zone, but not actual memory
1086  // read or write. Memory accesses outside of the lifetime zone are a clear
1087  // violation, but address calculations are okay. This can happen when
1088  // GEPs are hoisted outside of the lifetime zone.
1089  // So, in here we only check instructions which can read or write memory.
1090  if (!I.mayLoad() && !I.mayStore())
1091  continue;
1092 
1093  // Check all of the machine operands.
1094  for (const MachineOperand &MO : I.operands()) {
1095  if (!MO.isFI())
1096  continue;
1097 
1098  int Slot = MO.getIndex();
1099 
1100  if (Slot<0)
1101  continue;
1102 
1103  if (Intervals[Slot]->empty())
1104  continue;
1105 
1106  // Check that the used slot is inside the calculated lifetime range.
1107  // If it is not, warn about it and invalidate the range.
1108  LiveInterval *Interval = &*Intervals[Slot];
1109  SlotIndex Index = Indexes->getInstructionIndex(I);
1110  if (Interval->find(Index) == Interval->end()) {
1111  Interval->clear();
1112  LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1113  EscapedAllocas++;
1114  }
1115  }
1116  }
1117 }
1118 
1119 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1120  unsigned NumSlots) {
1121  // Expunge slot remap map.
1122  for (unsigned i=0; i < NumSlots; ++i) {
1123  // If we are remapping i
1124  if (SlotRemap.count(i)) {
1125  int Target = SlotRemap[i];
1126  // As long as our target is mapped to something else, follow it.
1127  while (SlotRemap.count(Target)) {
1128  Target = SlotRemap[Target];
1129  SlotRemap[i] = Target;
1130  }
1131  }
1132  }
1133 }
1134 
1135 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1136  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1137  << "********** Function: " << Func.getName() << '\n');
1138  MF = &Func;
1139  MFI = &MF->getFrameInfo();
1140  Indexes = &getAnalysis<SlotIndexes>();
1141  BlockLiveness.clear();
1142  BasicBlocks.clear();
1143  BasicBlockNumbering.clear();
1144  Markers.clear();
1145  Intervals.clear();
1146  LiveStarts.clear();
1147  VNInfoAllocator.Reset();
1148 
1149  unsigned NumSlots = MFI->getObjectIndexEnd();
1150 
1151  // If there are no stack slots then there are no markers to remove.
1152  if (!NumSlots)
1153  return false;
1154 
1155  SmallVector<int, 8> SortedSlots;
1156  SortedSlots.reserve(NumSlots);
1157  Intervals.reserve(NumSlots);
1158  LiveStarts.resize(NumSlots);
1159 
1160  unsigned NumMarkers = collectMarkers(NumSlots);
1161 
1162  unsigned TotalSize = 0;
1163  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1164  << " slots\n");
1165  LLVM_DEBUG(dbgs() << "Slot structure:\n");
1166 
1167  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1168  LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1169  << " bytes.\n");
1170  TotalSize += MFI->getObjectSize(i);
1171  }
1172 
1173  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1174 
1175  // Don't continue because there are not enough lifetime markers, or the
1176  // stack is too small, or we are told not to optimize the slots.
1177  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1178  skipFunction(Func.getFunction())) {
1179  LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1180  return removeAllMarkers();
1181  }
1182 
1183  for (unsigned i=0; i < NumSlots; ++i) {
1184  std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1185  LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1186  Intervals.push_back(std::move(LI));
1187  SortedSlots.push_back(i);
1188  }
1189 
1190  // Calculate the liveness of each block.
1191  calculateLocalLiveness();
1192  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1193  LLVM_DEBUG(dump());
1194 
1195  // Propagate the liveness information.
1196  calculateLiveIntervals(NumSlots);
1197  LLVM_DEBUG(dumpIntervals());
1198 
1199  // Search for allocas which are used outside of the declared lifetime
1200  // markers.
1202  removeInvalidSlotRanges();
1203 
1204  // Maps old slots to new slots.
1205  DenseMap<int, int> SlotRemap;
1206  unsigned RemovedSlots = 0;
1207  unsigned ReducedSize = 0;
1208 
1209  // Do not bother looking at empty intervals.
1210  for (unsigned I = 0; I < NumSlots; ++I) {
1211  if (Intervals[SortedSlots[I]]->empty())
1212  SortedSlots[I] = -1;
1213  }
1214 
1215  // This is a simple greedy algorithm for merging allocas. First, sort the
1216  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1217  // for disjoint slots. When you find disjoint slots, merge the samller one
1218  // into the bigger one and update the live interval. Remove the small alloca
1219  // and continue.
1220 
1221  // Sort the slots according to their size. Place unused slots at the end.
1222  // Use stable sort to guarantee deterministic code generation.
1223  llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1224  // We use -1 to denote a uninteresting slot. Place these slots at the end.
1225  if (LHS == -1)
1226  return false;
1227  if (RHS == -1)
1228  return true;
1229  // Sort according to size.
1230  return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1231  });
1232 
1233  for (auto &s : LiveStarts)
1234  llvm::sort(s);
1235 
1236  bool Changed = true;
1237  while (Changed) {
1238  Changed = false;
1239  for (unsigned I = 0; I < NumSlots; ++I) {
1240  if (SortedSlots[I] == -1)
1241  continue;
1242 
1243  for (unsigned J=I+1; J < NumSlots; ++J) {
1244  if (SortedSlots[J] == -1)
1245  continue;
1246 
1247  int FirstSlot = SortedSlots[I];
1248  int SecondSlot = SortedSlots[J];
1249  LiveInterval *First = &*Intervals[FirstSlot];
1250  LiveInterval *Second = &*Intervals[SecondSlot];
1251  auto &FirstS = LiveStarts[FirstSlot];
1252  auto &SecondS = LiveStarts[SecondSlot];
1253  assert(!First->empty() && !Second->empty() && "Found an empty range");
1254 
1255  // Merge disjoint slots. This is a little bit tricky - see the
1256  // Implementation Notes section for an explanation.
1257  if (!First->isLiveAtIndexes(SecondS) &&
1258  !Second->isLiveAtIndexes(FirstS)) {
1259  Changed = true;
1260  First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1261 
1262  int OldSize = FirstS.size();
1263  FirstS.append(SecondS.begin(), SecondS.end());
1264  auto Mid = FirstS.begin() + OldSize;
1265  std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1266 
1267  SlotRemap[SecondSlot] = FirstSlot;
1268  SortedSlots[J] = -1;
1269  LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1270  << SecondSlot << " together.\n");
1271  unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1272  MFI->getObjectAlignment(SecondSlot));
1273 
1274  assert(MFI->getObjectSize(FirstSlot) >=
1275  MFI->getObjectSize(SecondSlot) &&
1276  "Merging a small object into a larger one");
1277 
1278  RemovedSlots+=1;
1279  ReducedSize += MFI->getObjectSize(SecondSlot);
1280  MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1281  MFI->RemoveStackObject(SecondSlot);
1282  }
1283  }
1284  }
1285  }// While changed.
1286 
1287  // Record statistics.
1288  StackSpaceSaved += ReducedSize;
1289  StackSlotMerged += RemovedSlots;
1290  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1291  << ReducedSize << " bytes\n");
1292 
1293  // Scan the entire function and update all machine operands that use frame
1294  // indices to use the remapped frame index.
1295  expungeSlotMap(SlotRemap, NumSlots);
1296  remapInstructions(SlotRemap);
1297 
1298  return removeAllMarkers();
1299 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:371
bool empty() const
Definition: LiveInterval.h:373
BitVector & set()
Definition: BitVector.h:397
iterator_range< use_iterator > uses()
Definition: Value.h:375
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static int getStartOrEndSlot(const MachineInstr &MI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
Did not trigger a stack protector.
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:36
void push_back(const T &Elt)
Definition: SmallVector.h:211
Merge disjoint stack slots
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
This file contains the declarations for metadata subclasses.
bool test(unsigned Idx) const
Definition: BitVector.h:501
bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:322
STATISTIC(NumFunctions, "Total number of functions")
std::vector< MachineBasicBlock * >::const_iterator const_pred_iterator
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:369
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
VariableDbgInfoMapTy & getVariableDbgInfo()
Value * get() const
Definition: Use.h:107
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
A description of a memory reference used in the backend.
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:366
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it...
Definition: Allocator.h:195
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
iterator end()
Definition: LiveInterval.h:215
The address of this allocation is exposed and triggered protection.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
SlotIndex getInstructionIndex(const MachineInstr &MI) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:390
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
void MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo)
Merge all of the live segments of a specific val# in RHS into this live range as the specified value ...
static void handleRAUW(Value *From, Value *To)
Definition: Metadata.cpp:391
SlotIndexes pass.
Definition: SlotIndexes.h:314
This class represents a no-op cast from one type to another.
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_END(StackColoring
int getObjectIndexEnd() const
Return one past the maximum frame object index.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Array or nested array >= SSP-buffer-size.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:878
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:141
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:469
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:438
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:373
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
#define DEBUG_TYPE
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:479
R600 Clause Merge
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
union llvm::WinEHHandlerType::@203 CatchObj
The CatchObj starts out life as an LLVM alloca and is eventually turned frame index.
BlockVerifier::State From
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:389
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void initializeStackColoringPass(PassRegistry &)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition: Value.h:509
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Target - Wrapper for Target specific information.
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:308
Representation of each machine instruction.
Definition: MachineInstr.h:64
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:79
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:169
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
LLVM Value Representation.
Definition: Value.h:74
IRTranslator LLVM IR MI
void print(raw_ostream &os) const
Print this index to the given raw_ostream.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void resize(size_type N)
Definition: SmallVector.h:344