LLVM  14.0.0git
StackColoring.cpp
Go to the documentation of this file.
1 //===- StackColoring.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements the stack-coloring optimization that looks for
10 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
11 // which represent the possible lifetime of stack slots. It attempts to
12 // merge disjoint stack slots and reduce the used stack space.
13 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14 //
15 // TODO: In the future we plan to improve stack coloring in the following ways:
16 // 1. Allow merging multiple small slots into a single larger slot at different
17 // offsets.
18 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19 // spill slots.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
38 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/Config/llvm-config.h"
44 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Metadata.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <limits>
61 #include <memory>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "stack-coloring"
67 
68 static cl::opt<bool>
69 DisableColoring("no-stack-coloring",
70  cl::init(false), cl::Hidden,
71  cl::desc("Disable stack coloring"));
72 
73 /// The user may write code that uses allocas outside of the declared lifetime
74 /// zone. This can happen when the user returns a reference to a local
75 /// data-structure. We can detect these cases and decide not to optimize the
76 /// code. If this flag is enabled, we try to save the user. This option
77 /// is treated as overriding LifetimeStartOnFirstUse below.
78 static cl::opt<bool>
79 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80  cl::init(false), cl::Hidden,
81  cl::desc("Do not optimize lifetime zones that "
82  "are broken"));
83 
84 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
85 /// use of stack slot as start of slot lifetime, as opposed to looking
86 /// for LIFETIME_START marker). See "Implementation notes" below for
87 /// more info.
88 static cl::opt<bool>
89 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90  cl::init(true), cl::Hidden,
91  cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92 
93 
94 STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98 
99 //===----------------------------------------------------------------------===//
100 // StackColoring Pass
101 //===----------------------------------------------------------------------===//
102 //
103 // Stack Coloring reduces stack usage by merging stack slots when they
104 // can't be used together. For example, consider the following C program:
105 //
106 // void bar(char *, int);
107 // void foo(bool var) {
108 // A: {
109 // char z[4096];
110 // bar(z, 0);
111 // }
112 //
113 // char *p;
114 // char x[4096];
115 // char y[4096];
116 // if (var) {
117 // p = x;
118 // } else {
119 // bar(y, 1);
120 // p = y + 1024;
121 // }
122 // B:
123 // bar(p, 2);
124 // }
125 //
126 // Naively-compiled, this program would use 12k of stack space. However, the
127 // stack slot corresponding to `z` is always destroyed before either of the
128 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
129 // is true, while `y` is only used if `var` is false. So in no time are 2
130 // of the stack slots used together, and therefore we can merge them,
131 // compiling the function using only a single 4k alloca:
132 //
133 // void foo(bool var) { // equivalent
134 // char x[4096];
135 // char *p;
136 // bar(x, 0);
137 // if (var) {
138 // p = x;
139 // } else {
140 // bar(x, 1);
141 // p = x + 1024;
142 // }
143 // bar(p, 2);
144 // }
145 //
146 // This is an important optimization if we want stack space to be under
147 // control in large functions, both open-coded ones and ones created by
148 // inlining.
149 //
150 // Implementation Notes:
151 // ---------------------
152 //
153 // An important part of the above reasoning is that `z` can't be accessed
154 // while the latter 2 calls to `bar` are running. This is justified because
155 // `z`'s lifetime is over after we exit from block `A:`, so any further
156 // accesses to it would be UB. The way we represent this information
157 // in LLVM is by having frontends delimit blocks with `lifetime.start`
158 // and `lifetime.end` intrinsics.
159 //
160 // The effect of these intrinsics seems to be as follows (maybe I should
161 // specify this in the reference?):
162 //
163 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164 // lifetime intrinsic refers to that stack slot, in which case
165 // it is marked as *in-scope*.
166 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167 // the stack slot is overwritten with `undef`.
168 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169 // L4) on function exit, all stack slots are marked as *out-of-scope*.
170 // L5) `lifetime.end` is a no-op when called on a slot that is already
171 // *out-of-scope*.
172 // L6) memory accesses to *out-of-scope* stack slots are UB.
173 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174 // are invalidated, unless the slot is "degenerate". This is used to
175 // justify not marking slots as in-use until the pointer to them is
176 // used, but feels a bit hacky in the presence of things like LICM. See
177 // the "Degenerate Slots" section for more details.
178 //
179 // Now, let's ground stack coloring on these rules. We'll define a slot
180 // as *in-use* at a (dynamic) point in execution if it either can be
181 // written to at that point, or if it has a live and non-undef content
182 // at that point.
183 //
184 // Obviously, slots that are never *in-use* together can be merged, and
185 // in our example `foo`, the slots for `x`, `y` and `z` are never
186 // in-use together (of course, sometimes slots that *are* in-use together
187 // might still be mergable, but we don't care about that here).
188 //
189 // In this implementation, we successively merge pairs of slots that are
190 // not *in-use* together. We could be smarter - for example, we could merge
191 // a single large slot with 2 small slots, or we could construct the
192 // interference graph and run a "smart" graph coloring algorithm, but with
193 // that aside, how do we find out whether a pair of slots might be *in-use*
194 // together?
195 //
196 // From our rules, we see that *out-of-scope* slots are never *in-use*,
197 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198 // until their address is taken. Therefore, we can approximate slot activity
199 // using dataflow.
200 //
201 // A subtle point: naively, we might try to figure out which pairs of
202 // stack-slots interfere by propagating `S in-use` through the CFG for every
203 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204 // which they are both *in-use*.
205 //
206 // That is sound, but overly conservative in some cases: in our (artificial)
207 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
208 // as `x` is only in use if we came in from the `var` edge and `y` only
209 // if we came from the `!var` edge, they still can't be in use together.
210 // See PR32488 for an important real-life case.
211 //
212 // If we wanted to find all points of interference precisely, we could
213 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214 // would be precise, but requires propagating `O(n^2)` dataflow facts.
215 //
216 // However, we aren't interested in the *set* of points of interference
217 // between 2 stack slots, only *whether* there *is* such a point. So we
218 // can rely on a little trick: for `S` and `T` to be in-use together,
219 // one of them needs to become in-use while the other is in-use (or
220 // they might both become in use simultaneously). We can check this
221 // by also keeping track of the points at which a stack slot might *start*
222 // being in-use.
223 //
224 // Exact first use:
225 // ----------------
226 //
227 // Consider the following motivating example:
228 //
229 // int foo() {
230 // char b1[1024], b2[1024];
231 // if (...) {
232 // char b3[1024];
233 // <uses of b1, b3>;
234 // return x;
235 // } else {
236 // char b4[1024], b5[1024];
237 // <uses of b2, b4, b5>;
238 // return y;
239 // }
240 // }
241 //
242 // In the code above, "b3" and "b4" are declared in distinct lexical
243 // scopes, meaning that it is easy to prove that they can share the
244 // same stack slot. Variables "b1" and "b2" are declared in the same
245 // scope, meaning that from a lexical point of view, their lifetimes
246 // overlap. From a control flow pointer of view, however, the two
247 // variables are accessed in disjoint regions of the CFG, thus it
248 // should be possible for them to share the same stack slot. An ideal
249 // stack allocation for the function above would look like:
250 //
251 // slot 0: b1, b2
252 // slot 1: b3, b4
253 // slot 2: b5
254 //
255 // Achieving this allocation is tricky, however, due to the way
256 // lifetime markers are inserted. Here is a simplified view of the
257 // control flow graph for the code above:
258 //
259 // +------ block 0 -------+
260 // 0| LIFETIME_START b1, b2 |
261 // 1| <test 'if' condition> |
262 // +-----------------------+
263 // ./ \.
264 // +------ block 1 -------+ +------ block 2 -------+
265 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268 // +-----------------------+ +-----------------------+
269 // \. /.
270 // +------ block 3 -------+
271 // 8| <cleanupcode> |
272 // 9| LIFETIME_END b1, b2 |
273 // 10| return |
274 // +-----------------------+
275 //
276 // If we create live intervals for the variables above strictly based
277 // on the lifetime markers, we'll get the set of intervals on the
278 // left. If we ignore the lifetime start markers and instead treat a
279 // variable's lifetime as beginning with the first reference to the
280 // var, then we get the intervals on the right.
281 //
282 // LIFETIME_START First Use
283 // b1: [0,9] [3,4] [8,9]
284 // b2: [0,9] [6,9]
285 // b3: [2,4] [3,4]
286 // b4: [5,7] [6,7]
287 // b5: [5,7] [6,7]
288 //
289 // For the intervals on the left, the best we can do is overlap two
290 // variables (b3 and b4, for example); this gives us a stack size of
291 // 4*1024 bytes, not ideal. When treating first-use as the start of a
292 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293 // byte stack (better).
294 //
295 // Degenerate Slots:
296 // -----------------
297 //
298 // Relying entirely on first-use of stack slots is problematic,
299 // however, due to the fact that optimizations can sometimes migrate
300 // uses of a variable outside of its lifetime start/end region. Here
301 // is an example:
302 //
303 // int bar() {
304 // char b1[1024], b2[1024];
305 // if (...) {
306 // <uses of b2>
307 // return y;
308 // } else {
309 // <uses of b1>
310 // while (...) {
311 // char b3[1024];
312 // <uses of b3>
313 // }
314 // }
315 // }
316 //
317 // Before optimization, the control flow graph for the code above
318 // might look like the following:
319 //
320 // +------ block 0 -------+
321 // 0| LIFETIME_START b1, b2 |
322 // 1| <test 'if' condition> |
323 // +-----------------------+
324 // ./ \.
325 // +------ block 1 -------+ +------- block 2 -------+
326 // 2| <uses of b2> | 3| <uses of b1> |
327 // +-----------------------+ +-----------------------+
328 // | |
329 // | +------- block 3 -------+ <-\.
330 // | 4| <while condition> | |
331 // | +-----------------------+ |
332 // | / | |
333 // | / +------- block 4 -------+
334 // \ / 5| LIFETIME_START b3 | |
335 // \ / 6| <uses of b3> | |
336 // \ / 7| LIFETIME_END b3 | |
337 // \ | +------------------------+ |
338 // \ | \ /
339 // +------ block 5 -----+ \---------------
340 // 8| <cleanupcode> |
341 // 9| LIFETIME_END b1, b2 |
342 // 10| return |
343 // +---------------------+
344 //
345 // During optimization, however, it can happen that an instruction
346 // computing an address in "b3" (for example, a loop-invariant GEP) is
347 // hoisted up out of the loop from block 4 to block 2. [Note that
348 // this is not an actual load from the stack, only an instruction that
349 // computes the address to be loaded]. If this happens, there is now a
350 // path leading from the first use of b3 to the return instruction
351 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352 // now larger than if we were computing live intervals strictly based
353 // on lifetime markers. In the example above, this lengthened lifetime
354 // would mean that it would appear illegal to overlap b3 with b2.
355 //
356 // To deal with this such cases, the code in ::collectMarkers() below
357 // tries to identify "degenerate" slots -- those slots where on a single
358 // forward pass through the CFG we encounter a first reference to slot
359 // K before we hit the slot K lifetime start marker. For such slots,
360 // we fall back on using the lifetime start marker as the beginning of
361 // the variable's lifetime. NB: with this implementation, slots can
362 // appear degenerate in cases where there is unstructured control flow:
363 //
364 // if (q) goto mid;
365 // if (x > 9) {
366 // int b[100];
367 // memcpy(&b[0], ...);
368 // mid: b[k] = ...;
369 // abc(&b);
370 // }
371 //
372 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373 // before visiting the memcpy block (which will contain the lifetime start
374 // for "b" then it will appear that 'b' has a degenerate lifetime.
375 //
376 // Handle Windows Exception with LifetimeStartOnFirstUse:
377 // -----------------
378 //
379 // There was a bug for using LifetimeStartOnFirstUse in win32.
380 // class Type1 {
381 // ...
382 // ~Type1(){ write memory;}
383 // }
384 // ...
385 // try{
386 // Type1 V
387 // ...
388 // } catch (Type2 X){
389 // ...
390 // }
391 // For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots
392 // to prevent using LifetimeStartOnFirstUse. Because pX may merged with
393 // object V which may call destructor after implicitly writing pX. All these
394 // are done in C++ EH runtime libs (through CxxThrowException), and can't
395 // obviously check it in IR level.
396 //
397 // The loader of pX, without obvious writing IR, is usually the first LOAD MI
398 // in EHPad, Some like:
399 // bb.x.catch.i (landing-pad, ehfunclet-entry):
400 // ; predecessors: %bb...
401 // successors: %bb...
402 // %n:gr32 = MOV32rm %stack.pX ...
403 // ...
404 // The Type2** %stack.pX will only be written in EH runtime libs, so we
405 // check the StoreSlots to screen it out.
406 
407 namespace {
408 
409 /// StackColoring - A machine pass for merging disjoint stack allocations,
410 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
411 class StackColoring : public MachineFunctionPass {
412  MachineFrameInfo *MFI;
413  MachineFunction *MF;
414 
415  /// A class representing liveness information for a single basic block.
416  /// Each bit in the BitVector represents the liveness property
417  /// for a different stack slot.
418  struct BlockLifetimeInfo {
419  /// Which slots BEGINs in each basic block.
420  BitVector Begin;
421 
422  /// Which slots ENDs in each basic block.
423  BitVector End;
424 
425  /// Which slots are marked as LIVE_IN, coming into each basic block.
426  BitVector LiveIn;
427 
428  /// Which slots are marked as LIVE_OUT, coming out of each basic block.
429  BitVector LiveOut;
430  };
431 
432  /// Maps active slots (per bit) for each basic block.
434  LivenessMap BlockLiveness;
435 
436  /// Maps serial numbers to basic blocks.
438 
439  /// Maps basic blocks to a serial number.
440  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
441 
442  /// Maps slots to their use interval. Outside of this interval, slots
443  /// values are either dead or `undef` and they will not be written to.
445 
446  /// Maps slots to the points where they can become in-use.
448 
449  /// VNInfo is used for the construction of LiveIntervals.
450  VNInfo::Allocator VNInfoAllocator;
451 
452  /// SlotIndex analysis object.
453  SlotIndexes *Indexes;
454 
455  /// The list of lifetime markers found. These markers are to be removed
456  /// once the coloring is done.
458 
459  /// Record the FI slots for which we have seen some sort of
460  /// lifetime marker (either start or end).
461  BitVector InterestingSlots;
462 
463  /// FI slots that need to be handled conservatively (for these
464  /// slots lifetime-start-on-first-use is disabled).
465  BitVector ConservativeSlots;
466 
467  /// Record the FI slots referenced by a 'may write to memory'.
468  BitVector StoreSlots;
469 
470  /// Number of iterations taken during data flow analysis.
471  unsigned NumIterations;
472 
473 public:
474  static char ID;
475 
476  StackColoring() : MachineFunctionPass(ID) {
478  }
479 
480  void getAnalysisUsage(AnalysisUsage &AU) const override;
481  bool runOnMachineFunction(MachineFunction &Func) override;
482 
483 private:
484  /// Used in collectMarkers
485  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
486 
487  /// Debug.
488  void dump() const;
489  void dumpIntervals() const;
490  void dumpBB(MachineBasicBlock *MBB) const;
491  void dumpBV(const char *tag, const BitVector &BV) const;
492 
493  /// Removes all of the lifetime marker instructions from the function.
494  /// \returns true if any markers were removed.
495  bool removeAllMarkers();
496 
497  /// Scan the machine function and find all of the lifetime markers.
498  /// Record the findings in the BEGIN and END vectors.
499  /// \returns the number of markers found.
500  unsigned collectMarkers(unsigned NumSlot);
501 
502  /// Perform the dataflow calculation and calculate the lifetime for each of
503  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
504  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
505  /// in and out blocks.
506  void calculateLocalLiveness();
507 
508  /// Returns TRUE if we're using the first-use-begins-lifetime method for
509  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
510  bool applyFirstUse(int Slot) {
512  return false;
513  if (ConservativeSlots.test(Slot))
514  return false;
515  return true;
516  }
517 
518  /// Examines the specified instruction and returns TRUE if the instruction
519  /// represents the start or end of an interesting lifetime. The slot or slots
520  /// starting or ending are added to the vector "slots" and "isStart" is set
521  /// accordingly.
522  /// \returns True if inst contains a lifetime start or end
523  bool isLifetimeStartOrEnd(const MachineInstr &MI,
525  bool &isStart);
526 
527  /// Construct the LiveIntervals for the slots.
528  void calculateLiveIntervals(unsigned NumSlots);
529 
530  /// Go over the machine function and change instructions which use stack
531  /// slots to use the joint slots.
532  void remapInstructions(DenseMap<int, int> &SlotRemap);
533 
534  /// The input program may contain instructions which are not inside lifetime
535  /// markers. This can happen due to a bug in the compiler or due to a bug in
536  /// user code (for example, returning a reference to a local variable).
537  /// This procedure checks all of the instructions in the function and
538  /// invalidates lifetime ranges which do not contain all of the instructions
539  /// which access that frame slot.
540  void removeInvalidSlotRanges();
541 
542  /// Map entries which point to other entries to their destination.
543  /// A->B->C becomes A->C.
544  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
545 };
546 
547 } // end anonymous namespace
548 
549 char StackColoring::ID = 0;
550 
552 
553 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
554  "Merge disjoint stack slots", false, false)
557  "Merge disjoint stack slots", false, false)
558 
559 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
560  AU.addRequired<SlotIndexes>();
562 }
563 
564 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
565 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
566  const BitVector &BV) const {
567  dbgs() << tag << " : { ";
568  for (unsigned I = 0, E = BV.size(); I != E; ++I)
569  dbgs() << BV.test(I) << " ";
570  dbgs() << "}\n";
571 }
572 
573 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
574  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
575  assert(BI != BlockLiveness.end() && "Block not found");
576  const BlockLifetimeInfo &BlockInfo = BI->second;
577 
578  dumpBV("BEGIN", BlockInfo.Begin);
579  dumpBV("END", BlockInfo.End);
580  dumpBV("LIVE_IN", BlockInfo.LiveIn);
581  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
582 }
583 
585  for (MachineBasicBlock *MBB : depth_first(MF)) {
586  dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
587  << MBB->getName() << "]\n";
588  dumpBB(MBB);
589  }
590 }
591 
592 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
593  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
594  dbgs() << "Interval[" << I << "]:\n";
595  Intervals[I]->dump();
596  }
597 }
598 #endif
599 
600 static inline int getStartOrEndSlot(const MachineInstr &MI)
601 {
602  assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
603  MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
604  "Expected LIFETIME_START or LIFETIME_END op");
605  const MachineOperand &MO = MI.getOperand(0);
606  int Slot = MO.getIndex();
607  if (Slot >= 0)
608  return Slot;
609  return -1;
610 }
611 
612 // At the moment the only way to end a variable lifetime is with
613 // a VARIABLE_LIFETIME op (which can't contain a start). If things
614 // change and the IR allows for a single inst that both begins
615 // and ends lifetime(s), this interface will need to be reworked.
616 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
618  bool &isStart) {
619  if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
620  MI.getOpcode() == TargetOpcode::LIFETIME_END) {
621  int Slot = getStartOrEndSlot(MI);
622  if (Slot < 0)
623  return false;
624  if (!InterestingSlots.test(Slot))
625  return false;
626  slots.push_back(Slot);
627  if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
628  isStart = false;
629  return true;
630  }
631  if (!applyFirstUse(Slot)) {
632  isStart = true;
633  return true;
634  }
636  if (!MI.isDebugInstr()) {
637  bool found = false;
638  for (const MachineOperand &MO : MI.operands()) {
639  if (!MO.isFI())
640  continue;
641  int Slot = MO.getIndex();
642  if (Slot<0)
643  continue;
644  if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
645  slots.push_back(Slot);
646  found = true;
647  }
648  }
649  if (found) {
650  isStart = true;
651  return true;
652  }
653  }
654  }
655  return false;
656 }
657 
658 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
659  unsigned MarkersFound = 0;
660  BlockBitVecMap SeenStartMap;
661  InterestingSlots.clear();
662  InterestingSlots.resize(NumSlot);
663  ConservativeSlots.clear();
664  ConservativeSlots.resize(NumSlot);
665  StoreSlots.clear();
666  StoreSlots.resize(NumSlot);
667 
668  // number of start and end lifetime ops for each slot
669  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
670  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
671  SmallVector<int, 8> NumLoadInCatchPad(NumSlot, 0);
672 
673  // Step 1: collect markers and populate the "InterestingSlots"
674  // and "ConservativeSlots" sets.
675  for (MachineBasicBlock *MBB : depth_first(MF)) {
676  // Compute the set of slots for which we've seen a START marker but have
677  // not yet seen an END marker at this point in the walk (e.g. on entry
678  // to this bb).
679  BitVector BetweenStartEnd;
680  BetweenStartEnd.resize(NumSlot);
681  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
682  BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
683  if (I != SeenStartMap.end()) {
684  BetweenStartEnd |= I->second;
685  }
686  }
687 
688  // Walk the instructions in the block to look for start/end ops.
689  for (MachineInstr &MI : *MBB) {
690  if (MI.isDebugInstr())
691  continue;
692  if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
693  MI.getOpcode() == TargetOpcode::LIFETIME_END) {
694  int Slot = getStartOrEndSlot(MI);
695  if (Slot < 0)
696  continue;
697  InterestingSlots.set(Slot);
698  if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
699  BetweenStartEnd.set(Slot);
700  NumStartLifetimes[Slot] += 1;
701  } else {
702  BetweenStartEnd.reset(Slot);
703  NumEndLifetimes[Slot] += 1;
704  }
705  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
706  if (Allocation) {
707  LLVM_DEBUG(dbgs() << "Found a lifetime ");
708  LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
709  ? "start"
710  : "end"));
711  LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
712  LLVM_DEBUG(dbgs()
713  << " with allocation: " << Allocation->getName() << "\n");
714  }
715  Markers.push_back(&MI);
716  MarkersFound += 1;
717  } else {
718  for (const MachineOperand &MO : MI.operands()) {
719  if (!MO.isFI())
720  continue;
721  int Slot = MO.getIndex();
722  if (Slot < 0)
723  continue;
724  if (! BetweenStartEnd.test(Slot)) {
725  ConservativeSlots.set(Slot);
726  }
727  // Here we check the StoreSlots to screen catch point out. For more
728  // information, please refer "Handle Windows Exception with
729  // LifetimeStartOnFirstUse" at the head of this file.
730  if (MI.mayStore())
731  StoreSlots.set(Slot);
732  if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad())
733  NumLoadInCatchPad[Slot] += 1;
734  }
735  }
736  }
737  BitVector &SeenStart = SeenStartMap[MBB];
738  SeenStart |= BetweenStartEnd;
739  }
740  if (!MarkersFound) {
741  return 0;
742  }
743 
744  // 1) PR27903: slots with multiple start or end lifetime ops are not
745  // safe to enable for "lifetime-start-on-first-use".
746  // 2) And also not safe for variable X in catch(X) in windows.
747  for (unsigned slot = 0; slot < NumSlot; ++slot) {
748  if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 ||
749  (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot)))
750  ConservativeSlots.set(slot);
751  }
752  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
753 
754  // Step 2: compute begin/end sets for each block
755 
756  // NOTE: We use a depth-first iteration to ensure that we obtain a
757  // deterministic numbering.
758  for (MachineBasicBlock *MBB : depth_first(MF)) {
759  // Assign a serial number to this basic block.
760  BasicBlocks[MBB] = BasicBlockNumbering.size();
761  BasicBlockNumbering.push_back(MBB);
762 
763  // Keep a reference to avoid repeated lookups.
764  BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
765 
766  BlockInfo.Begin.resize(NumSlot);
767  BlockInfo.End.resize(NumSlot);
768 
770  for (MachineInstr &MI : *MBB) {
771  bool isStart = false;
772  slots.clear();
773  if (isLifetimeStartOrEnd(MI, slots, isStart)) {
774  if (!isStart) {
775  assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
776  int Slot = slots[0];
777  if (BlockInfo.Begin.test(Slot)) {
778  BlockInfo.Begin.reset(Slot);
779  }
780  BlockInfo.End.set(Slot);
781  } else {
782  for (auto Slot : slots) {
783  LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
784  LLVM_DEBUG(dbgs()
785  << " at " << printMBBReference(*MBB) << " index ");
787  const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
788  if (Allocation) {
789  LLVM_DEBUG(dbgs()
790  << " with allocation: " << Allocation->getName());
791  }
792  LLVM_DEBUG(dbgs() << "\n");
793  if (BlockInfo.End.test(Slot)) {
794  BlockInfo.End.reset(Slot);
795  }
796  BlockInfo.Begin.set(Slot);
797  }
798  }
799  }
800  }
801  }
802 
803  // Update statistics.
804  NumMarkerSeen += MarkersFound;
805  return MarkersFound;
806 }
807 
808 void StackColoring::calculateLocalLiveness() {
809  unsigned NumIters = 0;
810  bool changed = true;
811  while (changed) {
812  changed = false;
813  ++NumIters;
814 
815  for (const MachineBasicBlock *BB : BasicBlockNumbering) {
816  // Use an iterator to avoid repeated lookups.
817  LivenessMap::iterator BI = BlockLiveness.find(BB);
818  assert(BI != BlockLiveness.end() && "Block not found");
819  BlockLifetimeInfo &BlockInfo = BI->second;
820 
821  // Compute LiveIn by unioning together the LiveOut sets of all preds.
822  BitVector LocalLiveIn;
823  for (MachineBasicBlock *Pred : BB->predecessors()) {
824  LivenessMap::const_iterator I = BlockLiveness.find(Pred);
825  // PR37130: transformations prior to stack coloring can
826  // sometimes leave behind statically unreachable blocks; these
827  // can be safely skipped here.
828  if (I != BlockLiveness.end())
829  LocalLiveIn |= I->second.LiveOut;
830  }
831 
832  // Compute LiveOut by subtracting out lifetimes that end in this
833  // block, then adding in lifetimes that begin in this block. If
834  // we have both BEGIN and END markers in the same basic block
835  // then we know that the BEGIN marker comes after the END,
836  // because we already handle the case where the BEGIN comes
837  // before the END when collecting the markers (and building the
838  // BEGIN/END vectors).
839  BitVector LocalLiveOut = LocalLiveIn;
840  LocalLiveOut.reset(BlockInfo.End);
841  LocalLiveOut |= BlockInfo.Begin;
842 
843  // Update block LiveIn set, noting whether it has changed.
844  if (LocalLiveIn.test(BlockInfo.LiveIn)) {
845  changed = true;
846  BlockInfo.LiveIn |= LocalLiveIn;
847  }
848 
849  // Update block LiveOut set, noting whether it has changed.
850  if (LocalLiveOut.test(BlockInfo.LiveOut)) {
851  changed = true;
852  BlockInfo.LiveOut |= LocalLiveOut;
853  }
854  }
855  } // while changed.
856 
857  NumIterations = NumIters;
858 }
859 
860 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
862  SmallVector<bool, 16> DefinitelyInUse;
863 
864  // For each block, find which slots are active within this block
865  // and update the live intervals.
866  for (const MachineBasicBlock &MBB : *MF) {
867  Starts.clear();
868  Starts.resize(NumSlots);
869  DefinitelyInUse.clear();
870  DefinitelyInUse.resize(NumSlots);
871 
872  // Start the interval of the slots that we previously found to be 'in-use'.
873  BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
874  for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
875  pos = MBBLiveness.LiveIn.find_next(pos)) {
876  Starts[pos] = Indexes->getMBBStartIdx(&MBB);
877  }
878 
879  // Create the interval for the basic blocks containing lifetime begin/end.
880  for (const MachineInstr &MI : MBB) {
882  bool IsStart = false;
883  if (!isLifetimeStartOrEnd(MI, slots, IsStart))
884  continue;
885  SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
886  for (auto Slot : slots) {
887  if (IsStart) {
888  // If a slot is already definitely in use, we don't have to emit
889  // a new start marker because there is already a pre-existing
890  // one.
891  if (!DefinitelyInUse[Slot]) {
892  LiveStarts[Slot].push_back(ThisIndex);
893  DefinitelyInUse[Slot] = true;
894  }
895  if (!Starts[Slot].isValid())
896  Starts[Slot] = ThisIndex;
897  } else {
898  if (Starts[Slot].isValid()) {
899  VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
900  Intervals[Slot]->addSegment(
901  LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
902  Starts[Slot] = SlotIndex(); // Invalidate the start index
903  DefinitelyInUse[Slot] = false;
904  }
905  }
906  }
907  }
908 
909  // Finish up started segments
910  for (unsigned i = 0; i < NumSlots; ++i) {
911  if (!Starts[i].isValid())
912  continue;
913 
914  SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
915  VNInfo *VNI = Intervals[i]->getValNumInfo(0);
916  Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
917  }
918  }
919 }
920 
921 bool StackColoring::removeAllMarkers() {
922  unsigned Count = 0;
923  for (MachineInstr *MI : Markers) {
924  MI->eraseFromParent();
925  Count++;
926  }
927  Markers.clear();
928 
929  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
930  return Count;
931 }
932 
933 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
934  unsigned FixedInstr = 0;
935  unsigned FixedMemOp = 0;
936  unsigned FixedDbg = 0;
937 
938  // Remap debug information that refers to stack slots.
939  for (auto &VI : MF->getVariableDbgInfo()) {
940  if (!VI.Var)
941  continue;
942  if (SlotRemap.count(VI.Slot)) {
943  LLVM_DEBUG(dbgs() << "Remapping debug info for ["
944  << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
945  VI.Slot = SlotRemap[VI.Slot];
946  FixedDbg++;
947  }
948  }
949 
950  // Keep a list of *allocas* which need to be remapped.
952 
953  // Keep a list of allocas which has been affected by the remap.
955 
956  for (const std::pair<int, int> &SI : SlotRemap) {
957  const AllocaInst *From = MFI->getObjectAllocation(SI.first);
958  const AllocaInst *To = MFI->getObjectAllocation(SI.second);
959  assert(To && From && "Invalid allocation object");
960  Allocas[From] = To;
961 
962  // If From is before wo, its possible that there is a use of From between
963  // them.
964  if (From->comesBefore(To))
965  const_cast<AllocaInst*>(To)->moveBefore(const_cast<AllocaInst*>(From));
966 
967  // AA might be used later for instruction scheduling, and we need it to be
968  // able to deduce the correct aliasing releationships between pointers
969  // derived from the alloca being remapped and the target of that remapping.
970  // The only safe way, without directly informing AA about the remapping
971  // somehow, is to directly update the IR to reflect the change being made
972  // here.
973  Instruction *Inst = const_cast<AllocaInst *>(To);
974  if (From->getType() != To->getType()) {
975  BitCastInst *Cast = new BitCastInst(Inst, From->getType());
976  Cast->insertAfter(Inst);
977  Inst = Cast;
978  }
979 
980  // We keep both slots to maintain AliasAnalysis metadata later.
981  MergedAllocas.insert(From);
982  MergedAllocas.insert(To);
983 
984  // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
985  // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
986  // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
988  = MFI->getObjectSSPLayout(SI.first);
990  if (FromKind != MachineFrameInfo::SSPLK_None &&
991  (ToKind == MachineFrameInfo::SSPLK_None ||
993  FromKind != MachineFrameInfo::SSPLK_AddrOf)))
994  MFI->setObjectSSPLayout(SI.second, FromKind);
995 
996  // The new alloca might not be valid in a llvm.dbg.declare for this
997  // variable, so undef out the use to make the verifier happy.
998  AllocaInst *FromAI = const_cast<AllocaInst *>(From);
999  if (FromAI->isUsedByMetadata())
1001  for (auto &Use : FromAI->uses()) {
1002  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
1003  if (BCI->isUsedByMetadata())
1004  ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
1005  }
1006 
1007  // Note that this will not replace uses in MMOs (which we'll update below),
1008  // or anywhere else (which is why we won't delete the original
1009  // instruction).
1010  FromAI->replaceAllUsesWith(Inst);
1011  }
1012 
1013  // Remap all instructions to the new stack slots.
1014  std::vector<std::vector<MachineMemOperand *>> SSRefs(
1015  MFI->getObjectIndexEnd());
1016  for (MachineBasicBlock &BB : *MF)
1017  for (MachineInstr &I : BB) {
1018  // Skip lifetime markers. We'll remove them soon.
1019  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1020  I.getOpcode() == TargetOpcode::LIFETIME_END)
1021  continue;
1022 
1023  // Update the MachineMemOperand to use the new alloca.
1024  for (MachineMemOperand *MMO : I.memoperands()) {
1025  // We've replaced IR-level uses of the remapped allocas, so we only
1026  // need to replace direct uses here.
1027  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
1028  if (!AI)
1029  continue;
1030 
1031  if (!Allocas.count(AI))
1032  continue;
1033 
1034  MMO->setValue(Allocas[AI]);
1035  FixedMemOp++;
1036  }
1037 
1038  // Update all of the machine instruction operands.
1039  for (MachineOperand &MO : I.operands()) {
1040  if (!MO.isFI())
1041  continue;
1042  int FromSlot = MO.getIndex();
1043 
1044  // Don't touch arguments.
1045  if (FromSlot<0)
1046  continue;
1047 
1048  // Only look at mapped slots.
1049  if (!SlotRemap.count(FromSlot))
1050  continue;
1051 
1052  // In a debug build, check that the instruction that we are modifying is
1053  // inside the expected live range. If the instruction is not inside
1054  // the calculated range then it means that the alloca usage moved
1055  // outside of the lifetime markers, or that the user has a bug.
1056  // NOTE: Alloca address calculations which happen outside the lifetime
1057  // zone are okay, despite the fact that we don't have a good way
1058  // for validating all of the usages of the calculation.
1059 #ifndef NDEBUG
1060  bool TouchesMemory = I.mayLoadOrStore();
1061  // If we *don't* protect the user from escaped allocas, don't bother
1062  // validating the instructions.
1063  if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1064  SlotIndex Index = Indexes->getInstructionIndex(I);
1065  const LiveInterval *Interval = &*Intervals[FromSlot];
1066  assert(Interval->find(Index) != Interval->end() &&
1067  "Found instruction usage outside of live range.");
1068  }
1069 #endif
1070 
1071  // Fix the machine instructions.
1072  int ToSlot = SlotRemap[FromSlot];
1073  MO.setIndex(ToSlot);
1074  FixedInstr++;
1075  }
1076 
1077  // We adjust AliasAnalysis information for merged stack slots.
1079  bool ReplaceMemOps = false;
1080  for (MachineMemOperand *MMO : I.memoperands()) {
1081  // Collect MachineMemOperands which reference
1082  // FixedStackPseudoSourceValues with old frame indices.
1083  if (const auto *FSV = dyn_cast_or_null<FixedStackPseudoSourceValue>(
1084  MMO->getPseudoValue())) {
1085  int FI = FSV->getFrameIndex();
1086  auto To = SlotRemap.find(FI);
1087  if (To != SlotRemap.end())
1088  SSRefs[FI].push_back(MMO);
1089  }
1090 
1091  // If this memory location can be a slot remapped here,
1092  // we remove AA information.
1093  bool MayHaveConflictingAAMD = false;
1094  if (MMO->getAAInfo()) {
1095  if (const Value *MMOV = MMO->getValue()) {
1097  getUnderlyingObjectsForCodeGen(MMOV, Objs);
1098 
1099  if (Objs.empty())
1100  MayHaveConflictingAAMD = true;
1101  else
1102  for (Value *V : Objs) {
1103  // If this memory location comes from a known stack slot
1104  // that is not remapped, we continue checking.
1105  // Otherwise, we need to invalidate AA infomation.
1106  const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1107  if (AI && MergedAllocas.count(AI)) {
1108  MayHaveConflictingAAMD = true;
1109  break;
1110  }
1111  }
1112  }
1113  }
1114  if (MayHaveConflictingAAMD) {
1115  NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1116  ReplaceMemOps = true;
1117  } else {
1118  NewMMOs.push_back(MMO);
1119  }
1120  }
1121 
1122  // If any memory operand is updated, set memory references of
1123  // this instruction.
1124  if (ReplaceMemOps)
1125  I.setMemRefs(*MF, NewMMOs);
1126  }
1127 
1128  // Rewrite MachineMemOperands that reference old frame indices.
1129  for (auto E : enumerate(SSRefs))
1130  if (!E.value().empty()) {
1131  const PseudoSourceValue *NewSV =
1132  MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
1133  for (MachineMemOperand *Ref : E.value())
1134  Ref->setValue(NewSV);
1135  }
1136 
1137  // Update the location of C++ catch objects for the MSVC personality routine.
1138  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1139  for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1140  for (WinEHHandlerType &H : TBME.HandlerArray)
1141  if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
1142  SlotRemap.count(H.CatchObj.FrameIndex))
1143  H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1144 
1145  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1146  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1147  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1148 }
1149 
1150 void StackColoring::removeInvalidSlotRanges() {
1151  for (MachineBasicBlock &BB : *MF)
1152  for (MachineInstr &I : BB) {
1153  if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1154  I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1155  continue;
1156 
1157  // Some intervals are suspicious! In some cases we find address
1158  // calculations outside of the lifetime zone, but not actual memory
1159  // read or write. Memory accesses outside of the lifetime zone are a clear
1160  // violation, but address calculations are okay. This can happen when
1161  // GEPs are hoisted outside of the lifetime zone.
1162  // So, in here we only check instructions which can read or write memory.
1163  if (!I.mayLoad() && !I.mayStore())
1164  continue;
1165 
1166  // Check all of the machine operands.
1167  for (const MachineOperand &MO : I.operands()) {
1168  if (!MO.isFI())
1169  continue;
1170 
1171  int Slot = MO.getIndex();
1172 
1173  if (Slot<0)
1174  continue;
1175 
1176  if (Intervals[Slot]->empty())
1177  continue;
1178 
1179  // Check that the used slot is inside the calculated lifetime range.
1180  // If it is not, warn about it and invalidate the range.
1181  LiveInterval *Interval = &*Intervals[Slot];
1182  SlotIndex Index = Indexes->getInstructionIndex(I);
1183  if (Interval->find(Index) == Interval->end()) {
1184  Interval->clear();
1185  LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1186  EscapedAllocas++;
1187  }
1188  }
1189  }
1190 }
1191 
1192 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1193  unsigned NumSlots) {
1194  // Expunge slot remap map.
1195  for (unsigned i=0; i < NumSlots; ++i) {
1196  // If we are remapping i
1197  if (SlotRemap.count(i)) {
1198  int Target = SlotRemap[i];
1199  // As long as our target is mapped to something else, follow it.
1200  while (SlotRemap.count(Target)) {
1201  Target = SlotRemap[Target];
1202  SlotRemap[i] = Target;
1203  }
1204  }
1205  }
1206 }
1207 
1208 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1209  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1210  << "********** Function: " << Func.getName() << '\n');
1211  MF = &Func;
1212  MFI = &MF->getFrameInfo();
1213  Indexes = &getAnalysis<SlotIndexes>();
1214  BlockLiveness.clear();
1215  BasicBlocks.clear();
1216  BasicBlockNumbering.clear();
1217  Markers.clear();
1218  Intervals.clear();
1219  LiveStarts.clear();
1220  VNInfoAllocator.Reset();
1221 
1222  unsigned NumSlots = MFI->getObjectIndexEnd();
1223 
1224  // If there are no stack slots then there are no markers to remove.
1225  if (!NumSlots)
1226  return false;
1227 
1228  SmallVector<int, 8> SortedSlots;
1229  SortedSlots.reserve(NumSlots);
1230  Intervals.reserve(NumSlots);
1231  LiveStarts.resize(NumSlots);
1232 
1233  unsigned NumMarkers = collectMarkers(NumSlots);
1234 
1235  unsigned TotalSize = 0;
1236  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1237  << " slots\n");
1238  LLVM_DEBUG(dbgs() << "Slot structure:\n");
1239 
1240  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1241  LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1242  << " bytes.\n");
1243  TotalSize += MFI->getObjectSize(i);
1244  }
1245 
1246  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1247 
1248  // Don't continue because there are not enough lifetime markers, or the
1249  // stack is too small, or we are told not to optimize the slots.
1250  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1251  skipFunction(Func.getFunction())) {
1252  LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1253  return removeAllMarkers();
1254  }
1255 
1256  for (unsigned i=0; i < NumSlots; ++i) {
1257  std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1258  LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1259  Intervals.push_back(std::move(LI));
1260  SortedSlots.push_back(i);
1261  }
1262 
1263  // Calculate the liveness of each block.
1264  calculateLocalLiveness();
1265  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1266  LLVM_DEBUG(dump());
1267 
1268  // Propagate the liveness information.
1269  calculateLiveIntervals(NumSlots);
1270  LLVM_DEBUG(dumpIntervals());
1271 
1272  // Search for allocas which are used outside of the declared lifetime
1273  // markers.
1275  removeInvalidSlotRanges();
1276 
1277  // Maps old slots to new slots.
1278  DenseMap<int, int> SlotRemap;
1279  unsigned RemovedSlots = 0;
1280  unsigned ReducedSize = 0;
1281 
1282  // Do not bother looking at empty intervals.
1283  for (unsigned I = 0; I < NumSlots; ++I) {
1284  if (Intervals[SortedSlots[I]]->empty())
1285  SortedSlots[I] = -1;
1286  }
1287 
1288  // This is a simple greedy algorithm for merging allocas. First, sort the
1289  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1290  // for disjoint slots. When you find disjoint slots, merge the smaller one
1291  // into the bigger one and update the live interval. Remove the small alloca
1292  // and continue.
1293 
1294  // Sort the slots according to their size. Place unused slots at the end.
1295  // Use stable sort to guarantee deterministic code generation.
1296  llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1297  // We use -1 to denote a uninteresting slot. Place these slots at the end.
1298  if (LHS == -1)
1299  return false;
1300  if (RHS == -1)
1301  return true;
1302  // Sort according to size.
1303  return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1304  });
1305 
1306  for (auto &s : LiveStarts)
1307  llvm::sort(s);
1308 
1309  bool Changed = true;
1310  while (Changed) {
1311  Changed = false;
1312  for (unsigned I = 0; I < NumSlots; ++I) {
1313  if (SortedSlots[I] == -1)
1314  continue;
1315 
1316  for (unsigned J=I+1; J < NumSlots; ++J) {
1317  if (SortedSlots[J] == -1)
1318  continue;
1319 
1320  int FirstSlot = SortedSlots[I];
1321  int SecondSlot = SortedSlots[J];
1322  LiveInterval *First = &*Intervals[FirstSlot];
1323  LiveInterval *Second = &*Intervals[SecondSlot];
1324  auto &FirstS = LiveStarts[FirstSlot];
1325  auto &SecondS = LiveStarts[SecondSlot];
1326  assert(!First->empty() && !Second->empty() && "Found an empty range");
1327 
1328  // Merge disjoint slots. This is a little bit tricky - see the
1329  // Implementation Notes section for an explanation.
1330  if (!First->isLiveAtIndexes(SecondS) &&
1331  !Second->isLiveAtIndexes(FirstS)) {
1332  Changed = true;
1333  First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1334 
1335  int OldSize = FirstS.size();
1336  FirstS.append(SecondS.begin(), SecondS.end());
1337  auto Mid = FirstS.begin() + OldSize;
1338  std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1339 
1340  SlotRemap[SecondSlot] = FirstSlot;
1341  SortedSlots[J] = -1;
1342  LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1343  << SecondSlot << " together.\n");
1344  Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
1345  MFI->getObjectAlign(SecondSlot));
1346 
1347  assert(MFI->getObjectSize(FirstSlot) >=
1348  MFI->getObjectSize(SecondSlot) &&
1349  "Merging a small object into a larger one");
1350 
1351  RemovedSlots+=1;
1352  ReducedSize += MFI->getObjectSize(SecondSlot);
1353  MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1354  MFI->RemoveStackObject(SecondSlot);
1355  }
1356  }
1357  }
1358  }// While changed.
1359 
1360  // Record statistics.
1361  StackSpaceSaved += ReducedSize;
1362  StackSlotMerged += RemovedSlots;
1363  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1364  << ReducedSize << " bytes\n");
1365 
1366  // Scan the entire function and update all machine operands that use frame
1367  // indices to use the remapped frame index.
1368  expungeSlotMap(SlotRemap, NumSlots);
1369  remapInstructions(SlotRemap);
1370 
1371  return removeAllMarkers();
1372 }
ProtectFromEscapedAllocas
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:506
Merge
R600 Clause Merge
Definition: R600ClauseMergePass.cpp:69
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::LiveRange::empty
bool empty() const
Definition: LiveInterval.h:374
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) INITIALIZE_PASS_END(StackColoring
llvm::BumpPtrAllocatorImpl::Reset
void Reset()
Deallocate all but the current slab and reset the current pointer to the beginning of it,...
Definition: Allocator.h:125
Metadata.h
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1178
DEBUG_TYPE
#define DEBUG_TYPE
Definition: StackColoring.cpp:66
DebugInfoMetadata.h
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1177
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:327
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5198
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:104
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1981
llvm::Use::get
Value * get() const
Definition: Use.h:67
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::WinEHHandlerType
Definition: WinEHFuncInfo.h:60
ValueTracking.h
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
DenseMap.h
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:333
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::MachineFrameInfo::SSPLK_LargeArray
@ SSPLK_LargeArray
Array or nested array >= SSP-buffer-size.
Definition: MachineFrameInfo.h:114
DisableColoring
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
slot
Since we know that Vector is byte aligned and we know the element offset of we should change the load into a lve *x instead of doing a load store lve *x sequence Implement passing vectors by value into calls and receiving them as arguments GCC apparently tries to then a load and vperm of Variable We need a way to teach tblgen that some operands of an intrinsic are required to be constants The verifier should enforce this constraint We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a byte aligned stack slot
Definition: README_ALTIVEC.txt:57
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:742
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::SlotIndexes::getMBBEndIdx
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:476
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:744
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:393
Use.h
llvm::MachineFrameInfo::setObjectAlignment
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
Definition: MachineFrameInfo.h:474
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::LiveRange::isLiveAtIndexes
bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
Definition: LiveInterval.cpp:813
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::Instruction::insertAfter
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition: Instruction.cpp:90
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
SelectionDAGNodes.h
llvm::ModRefInfo::Ref
@ Ref
The access may reference the value stored in memory.
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFrameInfo::setObjectSSPLayout
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Definition: MachineFrameInfo.h:542
llvm::BitVector::size
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:151
getStartOrEndSlot
static int getStartOrEndSlot(const MachineInstr &MI)
Definition: StackColoring.cpp:600
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::SlotIndexes::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:385
llvm::MachineFrameInfo::SSPLK_None
@ SSPLK_None
Did not trigger a stack protector.
Definition: MachineFrameInfo.h:112
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
BitVector.h
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::PseudoSourceValue
Special value supplied for machine level alias analysis.
Definition: PseudoSourceValue.h:35
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:680
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Passes.h
llvm::cl::opt< bool >
llvm::WinEHTryBlockMapEntry
Definition: WinEHFuncInfo.h:72
VI
@ VI
Definition: SIInstrInfo.cpp:7685
llvm::SlotIndexes::getMBBStartIdx
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:466
llvm::StackColoringID
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
Definition: StackColoring.cpp:551
llvm::WinEHFuncInfo
Definition: WinEHFuncInfo.h:90
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:453
llvm::Interval
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:36
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:67
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
llvm::WinEHTryBlockMapEntry::HandlerArray
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:467
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::getUnderlyingObjectsForCodeGen
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
Definition: ValueTracking.cpp:4492
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1056
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:526
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::SlotIndex::print
void print(raw_ostream &os) const
Print this index to the given raw_ostream.
Definition: SlotIndexes.cpp:268
Compiler.h
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::SlotIndexes::getZeroIndex
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:368
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:447
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1686
llvm::MachineFrameInfo::getObjectSSPLayout
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
Definition: MachineFrameInfo.h:536
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
H
#define H(x, y, z)
Definition: MD5.cpp:58
MachineFrameInfo.h
llvm::MachineFunction::getWinEHFuncInfo
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
Definition: MachineFunction.h:674
llvm::MachineOperand::getIndex
int getIndex() const
Definition: MachineOperand.h:557
Casting.h
llvm::pdb::PDB_LocType::Slot
@ Slot
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1492
slots
Merge disjoint stack slots
Definition: StackColoring.cpp:557
llvm::VNInfo
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:384
WinEHFuncInfo.h
llvm::ValueAsMetadata::handleRAUW
static void handleRAUW(Value *From, Value *To)
Definition: Metadata.cpp:411
LiveInterval.h
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:217
Instructions.h
SmallVector.h
llvm::Value::isUsedByMetadata
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition: Value.h:558
llvm::MachineFrameInfo::SSPLK_AddrOf
@ SSPLK_AddrOf
The address of this allocation is exposed and triggered protection.
Definition: MachineFrameInfo.h:118
llvm::MachineFrameInfo::SSPLayoutKind
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
Definition: MachineFrameInfo.h:111
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
stack
S is passed via registers r2 But gcc stores them to the stack
Definition: README.txt:189
MachineMemOperand.h
MachineOperand.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::initializeStackColoringPass
void initializeStackColoringPass(PassRegistry &)
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
SlotIndexes.h
llvm::cl::desc
Definition: CommandLine.h:412
LifetimeStartOnFirstUse
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
raw_ostream.h
MachineFunction.h
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:314
Debug.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::MachineFrameInfo::getObjectAllocation
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
Definition: MachineFrameInfo.h:486