Line data Source code
1 : //===- StackColoring.cpp --------------------------------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This pass implements the stack-coloring optimization that looks for
11 : // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12 : // which represent the possible lifetime of stack slots. It attempts to
13 : // merge disjoint stack slots and reduce the used stack space.
14 : // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 : //
16 : // TODO: In the future we plan to improve stack coloring in the following ways:
17 : // 1. Allow merging multiple small slots into a single larger slot at different
18 : // offsets.
19 : // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20 : // spill slots.
21 : //
22 : //===----------------------------------------------------------------------===//
23 :
24 : #include "llvm/ADT/BitVector.h"
25 : #include "llvm/ADT/DenseMap.h"
26 : #include "llvm/ADT/DepthFirstIterator.h"
27 : #include "llvm/ADT/SmallPtrSet.h"
28 : #include "llvm/ADT/SmallVector.h"
29 : #include "llvm/ADT/Statistic.h"
30 : #include "llvm/Analysis/ValueTracking.h"
31 : #include "llvm/CodeGen/LiveInterval.h"
32 : #include "llvm/CodeGen/MachineBasicBlock.h"
33 : #include "llvm/CodeGen/MachineFrameInfo.h"
34 : #include "llvm/CodeGen/MachineFunction.h"
35 : #include "llvm/CodeGen/MachineFunctionPass.h"
36 : #include "llvm/CodeGen/MachineInstr.h"
37 : #include "llvm/CodeGen/MachineMemOperand.h"
38 : #include "llvm/CodeGen/MachineOperand.h"
39 : #include "llvm/CodeGen/Passes.h"
40 : #include "llvm/CodeGen/SelectionDAGNodes.h"
41 : #include "llvm/CodeGen/SlotIndexes.h"
42 : #include "llvm/CodeGen/TargetOpcodes.h"
43 : #include "llvm/CodeGen/WinEHFuncInfo.h"
44 : #include "llvm/Config/llvm-config.h"
45 : #include "llvm/IR/Constants.h"
46 : #include "llvm/IR/DebugInfoMetadata.h"
47 : #include "llvm/IR/Function.h"
48 : #include "llvm/IR/Instructions.h"
49 : #include "llvm/IR/Metadata.h"
50 : #include "llvm/IR/Use.h"
51 : #include "llvm/IR/Value.h"
52 : #include "llvm/Pass.h"
53 : #include "llvm/Support/Casting.h"
54 : #include "llvm/Support/CommandLine.h"
55 : #include "llvm/Support/Compiler.h"
56 : #include "llvm/Support/Debug.h"
57 : #include "llvm/Support/raw_ostream.h"
58 : #include <algorithm>
59 : #include <cassert>
60 : #include <limits>
61 : #include <memory>
62 : #include <utility>
63 :
64 : using namespace llvm;
65 :
66 : #define DEBUG_TYPE "stack-coloring"
67 :
68 : static cl::opt<bool>
69 : DisableColoring("no-stack-coloring",
70 : cl::init(false), cl::Hidden,
71 : cl::desc("Disable stack coloring"));
72 :
73 : /// The user may write code that uses allocas outside of the declared lifetime
74 : /// zone. This can happen when the user returns a reference to a local
75 : /// data-structure. We can detect these cases and decide not to optimize the
76 : /// code. If this flag is enabled, we try to save the user. This option
77 : /// is treated as overriding LifetimeStartOnFirstUse below.
78 : static cl::opt<bool>
79 : ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80 : cl::init(false), cl::Hidden,
81 : cl::desc("Do not optimize lifetime zones that "
82 : "are broken"));
83 :
84 : /// Enable enhanced dataflow scheme for lifetime analysis (treat first
85 : /// use of stack slot as start of slot lifetime, as opposed to looking
86 : /// for LIFETIME_START marker). See "Implementation notes" below for
87 : /// more info.
88 : static cl::opt<bool>
89 : LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90 : cl::init(true), cl::Hidden,
91 : cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92 :
93 :
94 : STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95 : STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96 : STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97 : STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98 :
99 : //===----------------------------------------------------------------------===//
100 : // StackColoring Pass
101 : //===----------------------------------------------------------------------===//
102 : //
103 : // Stack Coloring reduces stack usage by merging stack slots when they
104 : // can't be used together. For example, consider the following C program:
105 : //
106 : // void bar(char *, int);
107 : // void foo(bool var) {
108 : // A: {
109 : // char z[4096];
110 : // bar(z, 0);
111 : // }
112 : //
113 : // char *p;
114 : // char x[4096];
115 : // char y[4096];
116 : // if (var) {
117 : // p = x;
118 : // } else {
119 : // bar(y, 1);
120 : // p = y + 1024;
121 : // }
122 : // B:
123 : // bar(p, 2);
124 : // }
125 : //
126 : // Naively-compiled, this program would use 12k of stack space. However, the
127 : // stack slot corresponding to `z` is always destroyed before either of the
128 : // stack slots for `x` or `y` are used, and then `x` is only used if `var`
129 : // is true, while `y` is only used if `var` is false. So in no time are 2
130 : // of the stack slots used together, and therefore we can merge them,
131 : // compiling the function using only a single 4k alloca:
132 : //
133 : // void foo(bool var) { // equivalent
134 : // char x[4096];
135 : // char *p;
136 : // bar(x, 0);
137 : // if (var) {
138 : // p = x;
139 : // } else {
140 : // bar(x, 1);
141 : // p = x + 1024;
142 : // }
143 : // bar(p, 2);
144 : // }
145 : //
146 : // This is an important optimization if we want stack space to be under
147 : // control in large functions, both open-coded ones and ones created by
148 : // inlining.
149 : //
150 : // Implementation Notes:
151 : // ---------------------
152 : //
153 : // An important part of the above reasoning is that `z` can't be accessed
154 : // while the latter 2 calls to `bar` are running. This is justified because
155 : // `z`'s lifetime is over after we exit from block `A:`, so any further
156 : // accesses to it would be UB. The way we represent this information
157 : // in LLVM is by having frontends delimit blocks with `lifetime.start`
158 : // and `lifetime.end` intrinsics.
159 : //
160 : // The effect of these intrinsics seems to be as follows (maybe I should
161 : // specify this in the reference?):
162 : //
163 : // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164 : // lifetime intrinsic refers to that stack slot, in which case
165 : // it is marked as *in-scope*.
166 : // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167 : // the stack slot is overwritten with `undef`.
168 : // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169 : // L4) on function exit, all stack slots are marked as *out-of-scope*.
170 : // L5) `lifetime.end` is a no-op when called on a slot that is already
171 : // *out-of-scope*.
172 : // L6) memory accesses to *out-of-scope* stack slots are UB.
173 : // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174 : // are invalidated, unless the slot is "degenerate". This is used to
175 : // justify not marking slots as in-use until the pointer to them is
176 : // used, but feels a bit hacky in the presence of things like LICM. See
177 : // the "Degenerate Slots" section for more details.
178 : //
179 : // Now, let's ground stack coloring on these rules. We'll define a slot
180 : // as *in-use* at a (dynamic) point in execution if it either can be
181 : // written to at that point, or if it has a live and non-undef content
182 : // at that point.
183 : //
184 : // Obviously, slots that are never *in-use* together can be merged, and
185 : // in our example `foo`, the slots for `x`, `y` and `z` are never
186 : // in-use together (of course, sometimes slots that *are* in-use together
187 : // might still be mergable, but we don't care about that here).
188 : //
189 : // In this implementation, we successively merge pairs of slots that are
190 : // not *in-use* together. We could be smarter - for example, we could merge
191 : // a single large slot with 2 small slots, or we could construct the
192 : // interference graph and run a "smart" graph coloring algorithm, but with
193 : // that aside, how do we find out whether a pair of slots might be *in-use*
194 : // together?
195 : //
196 : // From our rules, we see that *out-of-scope* slots are never *in-use*,
197 : // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198 : // until their address is taken. Therefore, we can approximate slot activity
199 : // using dataflow.
200 : //
201 : // A subtle point: naively, we might try to figure out which pairs of
202 : // stack-slots interfere by propagating `S in-use` through the CFG for every
203 : // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204 : // which they are both *in-use*.
205 : //
206 : // That is sound, but overly conservative in some cases: in our (artificial)
207 : // example `foo`, either `x` or `y` might be in use at the label `B:`, but
208 : // as `x` is only in use if we came in from the `var` edge and `y` only
209 : // if we came from the `!var` edge, they still can't be in use together.
210 : // See PR32488 for an important real-life case.
211 : //
212 : // If we wanted to find all points of interference precisely, we could
213 : // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214 : // would be precise, but requires propagating `O(n^2)` dataflow facts.
215 : //
216 : // However, we aren't interested in the *set* of points of interference
217 : // between 2 stack slots, only *whether* there *is* such a point. So we
218 : // can rely on a little trick: for `S` and `T` to be in-use together,
219 : // one of them needs to become in-use while the other is in-use (or
220 : // they might both become in use simultaneously). We can check this
221 : // by also keeping track of the points at which a stack slot might *start*
222 : // being in-use.
223 : //
224 : // Exact first use:
225 : // ----------------
226 : //
227 : // Consider the following motivating example:
228 : //
229 : // int foo() {
230 : // char b1[1024], b2[1024];
231 : // if (...) {
232 : // char b3[1024];
233 : // <uses of b1, b3>;
234 : // return x;
235 : // } else {
236 : // char b4[1024], b5[1024];
237 : // <uses of b2, b4, b5>;
238 : // return y;
239 : // }
240 : // }
241 : //
242 : // In the code above, "b3" and "b4" are declared in distinct lexical
243 : // scopes, meaning that it is easy to prove that they can share the
244 : // same stack slot. Variables "b1" and "b2" are declared in the same
245 : // scope, meaning that from a lexical point of view, their lifetimes
246 : // overlap. From a control flow pointer of view, however, the two
247 : // variables are accessed in disjoint regions of the CFG, thus it
248 : // should be possible for them to share the same stack slot. An ideal
249 : // stack allocation for the function above would look like:
250 : //
251 : // slot 0: b1, b2
252 : // slot 1: b3, b4
253 : // slot 2: b5
254 : //
255 : // Achieving this allocation is tricky, however, due to the way
256 : // lifetime markers are inserted. Here is a simplified view of the
257 : // control flow graph for the code above:
258 : //
259 : // +------ block 0 -------+
260 : // 0| LIFETIME_START b1, b2 |
261 : // 1| <test 'if' condition> |
262 : // +-----------------------+
263 : // ./ \.
264 : // +------ block 1 -------+ +------ block 2 -------+
265 : // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266 : // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267 : // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268 : // +-----------------------+ +-----------------------+
269 : // \. /.
270 : // +------ block 3 -------+
271 : // 8| <cleanupcode> |
272 : // 9| LIFETIME_END b1, b2 |
273 : // 10| return |
274 : // +-----------------------+
275 : //
276 : // If we create live intervals for the variables above strictly based
277 : // on the lifetime markers, we'll get the set of intervals on the
278 : // left. If we ignore the lifetime start markers and instead treat a
279 : // variable's lifetime as beginning with the first reference to the
280 : // var, then we get the intervals on the right.
281 : //
282 : // LIFETIME_START First Use
283 : // b1: [0,9] [3,4] [8,9]
284 : // b2: [0,9] [6,9]
285 : // b3: [2,4] [3,4]
286 : // b4: [5,7] [6,7]
287 : // b5: [5,7] [6,7]
288 : //
289 : // For the intervals on the left, the best we can do is overlap two
290 : // variables (b3 and b4, for example); this gives us a stack size of
291 : // 4*1024 bytes, not ideal. When treating first-use as the start of a
292 : // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293 : // byte stack (better).
294 : //
295 : // Degenerate Slots:
296 : // -----------------
297 : //
298 : // Relying entirely on first-use of stack slots is problematic,
299 : // however, due to the fact that optimizations can sometimes migrate
300 : // uses of a variable outside of its lifetime start/end region. Here
301 : // is an example:
302 : //
303 : // int bar() {
304 : // char b1[1024], b2[1024];
305 : // if (...) {
306 : // <uses of b2>
307 : // return y;
308 : // } else {
309 : // <uses of b1>
310 : // while (...) {
311 : // char b3[1024];
312 : // <uses of b3>
313 : // }
314 : // }
315 : // }
316 : //
317 : // Before optimization, the control flow graph for the code above
318 : // might look like the following:
319 : //
320 : // +------ block 0 -------+
321 : // 0| LIFETIME_START b1, b2 |
322 : // 1| <test 'if' condition> |
323 : // +-----------------------+
324 : // ./ \.
325 : // +------ block 1 -------+ +------- block 2 -------+
326 : // 2| <uses of b2> | 3| <uses of b1> |
327 : // +-----------------------+ +-----------------------+
328 : // | |
329 : // | +------- block 3 -------+ <-\.
330 : // | 4| <while condition> | |
331 : // | +-----------------------+ |
332 : // | / | |
333 : // | / +------- block 4 -------+
334 : // \ / 5| LIFETIME_START b3 | |
335 : // \ / 6| <uses of b3> | |
336 : // \ / 7| LIFETIME_END b3 | |
337 : // \ | +------------------------+ |
338 : // \ | \ /
339 : // +------ block 5 -----+ \---------------
340 : // 8| <cleanupcode> |
341 : // 9| LIFETIME_END b1, b2 |
342 : // 10| return |
343 : // +---------------------+
344 : //
345 : // During optimization, however, it can happen that an instruction
346 : // computing an address in "b3" (for example, a loop-invariant GEP) is
347 : // hoisted up out of the loop from block 4 to block 2. [Note that
348 : // this is not an actual load from the stack, only an instruction that
349 : // computes the address to be loaded]. If this happens, there is now a
350 : // path leading from the first use of b3 to the return instruction
351 : // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352 : // now larger than if we were computing live intervals strictly based
353 : // on lifetime markers. In the example above, this lengthened lifetime
354 : // would mean that it would appear illegal to overlap b3 with b2.
355 : //
356 : // To deal with this such cases, the code in ::collectMarkers() below
357 : // tries to identify "degenerate" slots -- those slots where on a single
358 : // forward pass through the CFG we encounter a first reference to slot
359 : // K before we hit the slot K lifetime start marker. For such slots,
360 : // we fall back on using the lifetime start marker as the beginning of
361 : // the variable's lifetime. NB: with this implementation, slots can
362 : // appear degenerate in cases where there is unstructured control flow:
363 : //
364 : // if (q) goto mid;
365 : // if (x > 9) {
366 : // int b[100];
367 : // memcpy(&b[0], ...);
368 : // mid: b[k] = ...;
369 : // abc(&b);
370 : // }
371 : //
372 : // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373 : // before visiting the memcpy block (which will contain the lifetime start
374 : // for "b" then it will appear that 'b' has a degenerate lifetime.
375 : //
376 :
377 : namespace {
378 :
379 : /// StackColoring - A machine pass for merging disjoint stack allocations,
380 : /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
381 : class StackColoring : public MachineFunctionPass {
382 : MachineFrameInfo *MFI;
383 : MachineFunction *MF;
384 :
385 : /// A class representing liveness information for a single basic block.
386 : /// Each bit in the BitVector represents the liveness property
387 : /// for a different stack slot.
388 55567 : struct BlockLifetimeInfo {
389 : /// Which slots BEGINs in each basic block.
390 : BitVector Begin;
391 :
392 : /// Which slots ENDs in each basic block.
393 : BitVector End;
394 :
395 : /// Which slots are marked as LIVE_IN, coming into each basic block.
396 : BitVector LiveIn;
397 :
398 : /// Which slots are marked as LIVE_OUT, coming out of each basic block.
399 : BitVector LiveOut;
400 : };
401 :
402 : /// Maps active slots (per bit) for each basic block.
403 : using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
404 : LivenessMap BlockLiveness;
405 :
406 : /// Maps serial numbers to basic blocks.
407 : DenseMap<const MachineBasicBlock *, int> BasicBlocks;
408 :
409 : /// Maps basic blocks to a serial number.
410 : SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
411 :
412 : /// Maps slots to their use interval. Outside of this interval, slots
413 : /// values are either dead or `undef` and they will not be written to.
414 : SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
415 :
416 : /// Maps slots to the points where they can become in-use.
417 : SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
418 :
419 : /// VNInfo is used for the construction of LiveIntervals.
420 : VNInfo::Allocator VNInfoAllocator;
421 :
422 : /// SlotIndex analysis object.
423 : SlotIndexes *Indexes;
424 :
425 : /// The list of lifetime markers found. These markers are to be removed
426 : /// once the coloring is done.
427 : SmallVector<MachineInstr*, 8> Markers;
428 :
429 : /// Record the FI slots for which we have seen some sort of
430 : /// lifetime marker (either start or end).
431 : BitVector InterestingSlots;
432 :
433 : /// FI slots that need to be handled conservatively (for these
434 : /// slots lifetime-start-on-first-use is disabled).
435 : BitVector ConservativeSlots;
436 :
437 : /// Number of iterations taken during data flow analysis.
438 : unsigned NumIterations;
439 :
440 : public:
441 : static char ID;
442 :
443 20207 : StackColoring() : MachineFunctionPass(ID) {
444 20207 : initializeStackColoringPass(*PassRegistry::getPassRegistry());
445 20207 : }
446 :
447 : void getAnalysisUsage(AnalysisUsage &AU) const override;
448 : bool runOnMachineFunction(MachineFunction &Func) override;
449 :
450 : private:
451 : /// Used in collectMarkers
452 : using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
453 :
454 : /// Debug.
455 : void dump() const;
456 : void dumpIntervals() const;
457 : void dumpBB(MachineBasicBlock *MBB) const;
458 : void dumpBV(const char *tag, const BitVector &BV) const;
459 :
460 : /// Removes all of the lifetime marker instructions from the function.
461 : /// \returns true if any markers were removed.
462 : bool removeAllMarkers();
463 :
464 : /// Scan the machine function and find all of the lifetime markers.
465 : /// Record the findings in the BEGIN and END vectors.
466 : /// \returns the number of markers found.
467 : unsigned collectMarkers(unsigned NumSlot);
468 :
469 : /// Perform the dataflow calculation and calculate the lifetime for each of
470 : /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
471 : /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
472 : /// in and out blocks.
473 : void calculateLocalLiveness();
474 :
475 : /// Returns TRUE if we're using the first-use-begins-lifetime method for
476 : /// this slot (if FALSE, then the start marker is treated as start of lifetime).
477 : bool applyFirstUse(int Slot) {
478 246434 : if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
479 : return false;
480 286134 : if (ConservativeSlots.test(Slot))
481 : return false;
482 : return true;
483 : }
484 :
485 : /// Examines the specified instruction and returns TRUE if the instruction
486 : /// represents the start or end of an interesting lifetime. The slot or slots
487 : /// starting or ending are added to the vector "slots" and "isStart" is set
488 : /// accordingly.
489 : /// \returns True if inst contains a lifetime start or end
490 : bool isLifetimeStartOrEnd(const MachineInstr &MI,
491 : SmallVector<int, 4> &slots,
492 : bool &isStart);
493 :
494 : /// Construct the LiveIntervals for the slots.
495 : void calculateLiveIntervals(unsigned NumSlots);
496 :
497 : /// Go over the machine function and change instructions which use stack
498 : /// slots to use the joint slots.
499 : void remapInstructions(DenseMap<int, int> &SlotRemap);
500 :
501 : /// The input program may contain instructions which are not inside lifetime
502 : /// markers. This can happen due to a bug in the compiler or due to a bug in
503 : /// user code (for example, returning a reference to a local variable).
504 : /// This procedure checks all of the instructions in the function and
505 : /// invalidates lifetime ranges which do not contain all of the instructions
506 : /// which access that frame slot.
507 : void removeInvalidSlotRanges();
508 :
509 : /// Map entries which point to other entries to their destination.
510 : /// A->B->C becomes A->C.
511 : void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
512 : };
513 :
514 : } // end anonymous namespace
515 :
516 : char StackColoring::ID = 0;
517 :
518 : char &llvm::StackColoringID = StackColoring::ID;
519 :
520 31780 : INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
521 : "Merge disjoint stack slots", false, false)
522 31780 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
523 105354 : INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
524 : "Merge disjoint stack slots", false, false)
525 :
526 20044 : void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
527 : AU.addRequired<SlotIndexes>();
528 20044 : MachineFunctionPass::getAnalysisUsage(AU);
529 20044 : }
530 :
531 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
532 : LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
533 : const BitVector &BV) const {
534 : dbgs() << tag << " : { ";
535 : for (unsigned I = 0, E = BV.size(); I != E; ++I)
536 : dbgs() << BV.test(I) << " ";
537 : dbgs() << "}\n";
538 : }
539 :
540 : LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
541 : LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
542 : assert(BI != BlockLiveness.end() && "Block not found");
543 : const BlockLifetimeInfo &BlockInfo = BI->second;
544 :
545 : dumpBV("BEGIN", BlockInfo.Begin);
546 : dumpBV("END", BlockInfo.End);
547 : dumpBV("LIVE_IN", BlockInfo.LiveIn);
548 : dumpBV("LIVE_OUT", BlockInfo.LiveOut);
549 : }
550 :
551 : LLVM_DUMP_METHOD void StackColoring::dump() const {
552 : for (MachineBasicBlock *MBB : depth_first(MF)) {
553 : dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
554 : << MBB->getName() << "]\n";
555 : dumpBB(MBB);
556 : }
557 : }
558 :
559 : LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
560 : for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
561 : dbgs() << "Interval[" << I << "]:\n";
562 : Intervals[I]->dump();
563 : }
564 : }
565 : #endif
566 :
567 : static inline int getStartOrEndSlot(const MachineInstr &MI)
568 : {
569 : assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
570 : MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
571 : "Expected LIFETIME_START or LIFETIME_END op");
572 169458 : const MachineOperand &MO = MI.getOperand(0);
573 169458 : int Slot = MO.getIndex();
574 169458 : if (Slot >= 0)
575 : return Slot;
576 : return -1;
577 : }
578 :
579 : // At the moment the only way to end a variable lifetime is with
580 : // a VARIABLE_LIFETIME op (which can't contain a start). If things
581 : // change and the IR allows for a single inst that both begins
582 : // and ends lifetime(s), this interface will need to be reworked.
583 3876977 : bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
584 : SmallVector<int, 4> &slots,
585 : bool &isStart) {
586 7753954 : if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
587 : MI.getOpcode() == TargetOpcode::LIFETIME_END) {
588 112431 : int Slot = getStartOrEndSlot(MI);
589 112431 : if (Slot < 0)
590 104997 : return false;
591 224860 : if (!InterestingSlots.test(Slot))
592 : return false;
593 112426 : slots.push_back(Slot);
594 224852 : if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
595 72506 : isStart = false;
596 72506 : return true;
597 : }
598 39920 : if (!applyFirstUse(Slot)) {
599 32486 : isStart = true;
600 32486 : return true;
601 : }
602 3764546 : } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
603 : if (!MI.isDebugInstr()) {
604 : bool found = false;
605 21044729 : for (const MachineOperand &MO : MI.operands()) {
606 17384160 : if (!MO.isFI())
607 17172094 : continue;
608 215869 : int Slot = MO.getIndex();
609 215869 : if (Slot<0)
610 : continue;
611 424132 : if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
612 23146 : slots.push_back(Slot);
613 : found = true;
614 : }
615 : }
616 3660569 : if (found) {
617 23144 : isStart = true;
618 23144 : return true;
619 : }
620 : }
621 : }
622 : return false;
623 : }
624 :
625 11235 : unsigned StackColoring::collectMarkers(unsigned NumSlot) {
626 : unsigned MarkersFound = 0;
627 : BlockBitVecMap SeenStartMap;
628 : InterestingSlots.clear();
629 11235 : InterestingSlots.resize(NumSlot);
630 : ConservativeSlots.clear();
631 11235 : ConservativeSlots.resize(NumSlot);
632 :
633 : // number of start and end lifetime ops for each slot
634 11235 : SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
635 11235 : SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
636 :
637 : // Step 1: collect markers and populate the "InterestingSlots"
638 : // and "ConservativeSlots" sets.
639 196744 : for (MachineBasicBlock *MBB : depth_first(MF)) {
640 : // Compute the set of slots for which we've seen a START marker but have
641 : // not yet seen an END marker at this point in the walk (e.g. on entry
642 : // to this bb).
643 : BitVector BetweenStartEnd;
644 174274 : BetweenStartEnd.resize(NumSlot);
645 : for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
646 396336 : PE = MBB->pred_end(); PI != PE; ++PI) {
647 222062 : BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
648 222062 : if (I != SeenStartMap.end()) {
649 177097 : BetweenStartEnd |= I->second;
650 : }
651 : }
652 :
653 : // Walk the instructions in the block to look for start/end ops.
654 2386426 : for (MachineInstr &MI : *MBB) {
655 4424304 : if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
656 : MI.getOpcode() == TargetOpcode::LIFETIME_END) {
657 : int Slot = getStartOrEndSlot(MI);
658 : if (Slot < 0)
659 : continue;
660 57026 : InterestingSlots.set(Slot);
661 57026 : if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
662 : BetweenStartEnd.set(Slot);
663 40568 : NumStartLifetimes[Slot] += 1;
664 : } else {
665 : BetweenStartEnd.reset(Slot);
666 73484 : NumEndLifetimes[Slot] += 1;
667 : }
668 : const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
669 : if (Allocation) {
670 : LLVM_DEBUG(dbgs() << "Found a lifetime ");
671 : LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
672 : ? "start"
673 : : "end"));
674 : LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
675 : LLVM_DEBUG(dbgs()
676 : << " with allocation: " << Allocation->getName() << "\n");
677 : }
678 57026 : Markers.push_back(&MI);
679 57026 : MarkersFound += 1;
680 : } else {
681 12471996 : for (const MachineOperand &MO : MI.operands()) {
682 10316871 : if (!MO.isFI())
683 : continue;
684 159370 : int Slot = MO.getIndex();
685 159370 : if (Slot < 0)
686 : continue;
687 302782 : if (! BetweenStartEnd.test(Slot)) {
688 : ConservativeSlots.set(Slot);
689 : }
690 : }
691 : }
692 : }
693 174274 : BitVector &SeenStart = SeenStartMap[MBB];
694 174274 : SeenStart |= BetweenStartEnd;
695 : }
696 11235 : if (!MarkersFound) {
697 : return 0;
698 : }
699 :
700 : // PR27903: slots with multiple start or end lifetime ops are not
701 : // safe to enable for "lifetime-start-on-first-use".
702 24132 : for (unsigned slot = 0; slot < NumSlot; ++slot)
703 42886 : if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
704 : ConservativeSlots.set(slot);
705 : LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
706 :
707 : // Step 2: compute begin/end sets for each block
708 :
709 : // NOTE: We use a depth-first iteration to ensure that we obtain a
710 : // deterministic numbering.
711 164308 : for (MachineBasicBlock *MBB : depth_first(MF)) {
712 : // Assign a serial number to this basic block.
713 158930 : BasicBlocks[MBB] = BasicBlockNumbering.size();
714 158930 : BasicBlockNumbering.push_back(MBB);
715 :
716 : // Keep a reference to avoid repeated lookups.
717 158930 : BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
718 :
719 158930 : BlockInfo.Begin.resize(NumSlot);
720 158930 : BlockInfo.End.resize(NumSlot);
721 :
722 : SmallVector<int, 4> slots;
723 2134190 : for (MachineInstr &MI : *MBB) {
724 1975260 : bool isStart = false;
725 : slots.clear();
726 1975260 : if (isLifetimeStartOrEnd(MI, slots, isStart)) {
727 65038 : if (!isStart) {
728 : assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
729 36742 : int Slot = slots[0];
730 73484 : if (BlockInfo.Begin.test(Slot)) {
731 : BlockInfo.Begin.reset(Slot);
732 : }
733 : BlockInfo.End.set(Slot);
734 : } else {
735 56594 : for (auto Slot : slots) {
736 : LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
737 : LLVM_DEBUG(dbgs()
738 : << " at " << printMBBReference(*MBB) << " index ");
739 : LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
740 : const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
741 : if (Allocation) {
742 : LLVM_DEBUG(dbgs()
743 : << " with allocation: " << Allocation->getName());
744 : }
745 : LLVM_DEBUG(dbgs() << "\n");
746 28298 : if (BlockInfo.End.test(Slot)) {
747 : BlockInfo.End.reset(Slot);
748 : }
749 : BlockInfo.Begin.set(Slot);
750 : }
751 : }
752 : }
753 : }
754 : }
755 :
756 : // Update statistics.
757 : NumMarkerSeen += MarkersFound;
758 2689 : return MarkersFound;
759 : }
760 :
761 2171 : void StackColoring::calculateLocalLiveness() {
762 : unsigned NumIters = 0;
763 : bool changed = true;
764 6750 : while (changed) {
765 : changed = false;
766 4579 : ++NumIters;
767 :
768 385537 : for (const MachineBasicBlock *BB : BasicBlockNumbering) {
769 : // Use an iterator to avoid repeated lookups.
770 380958 : LivenessMap::iterator BI = BlockLiveness.find(BB);
771 : assert(BI != BlockLiveness.end() && "Block not found");
772 380958 : BlockLifetimeInfo &BlockInfo = BI->second;
773 :
774 : // Compute LiveIn by unioning together the LiveOut sets of all preds.
775 : BitVector LocalLiveIn;
776 : for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
777 894337 : PE = BB->pred_end(); PI != PE; ++PI) {
778 513379 : LivenessMap::const_iterator I = BlockLiveness.find(*PI);
779 : // PR37130: transformations prior to stack coloring can
780 : // sometimes leave behind statically unreachable blocks; these
781 : // can be safely skipped here.
782 513379 : if (I != BlockLiveness.end())
783 513378 : LocalLiveIn |= I->second.LiveOut;
784 : }
785 :
786 : // Compute LiveOut by subtracting out lifetimes that end in this
787 : // block, then adding in lifetimes that begin in this block. If
788 : // we have both BEGIN and END markers in the same basic block
789 : // then we know that the BEGIN marker comes after the END,
790 : // because we already handle the case where the BEGIN comes
791 : // before the END when collecting the markers (and building the
792 : // BEGIN/END vectors).
793 380958 : BitVector LocalLiveOut = LocalLiveIn;
794 380958 : LocalLiveOut.reset(BlockInfo.End);
795 380958 : LocalLiveOut |= BlockInfo.Begin;
796 :
797 : // Update block LiveIn set, noting whether it has changed.
798 380958 : if (LocalLiveIn.test(BlockInfo.LiveIn)) {
799 : changed = true;
800 144561 : BlockInfo.LiveIn |= LocalLiveIn;
801 : }
802 :
803 : // Update block LiveOut set, noting whether it has changed.
804 380958 : if (LocalLiveOut.test(BlockInfo.LiveOut)) {
805 : changed = true;
806 142619 : BlockInfo.LiveOut |= LocalLiveOut;
807 : }
808 : }
809 : } // while changed.
810 :
811 2171 : NumIterations = NumIters;
812 2171 : }
813 :
814 2171 : void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
815 : SmallVector<SlotIndex, 16> Starts;
816 : SmallVector<bool, 16> DefinitelyInUse;
817 :
818 : // For each block, find which slots are active within this block
819 : // and update the live intervals.
820 155848 : for (const MachineBasicBlock &MBB : *MF) {
821 : Starts.clear();
822 153677 : Starts.resize(NumSlots);
823 : DefinitelyInUse.clear();
824 153677 : DefinitelyInUse.resize(NumSlots);
825 :
826 : // Start the interval of the slots that we previously found to be 'in-use'.
827 153677 : BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
828 858196 : for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
829 704519 : pos = MBBLiveness.LiveIn.find_next(pos)) {
830 1409038 : Starts[pos] = Indexes->getMBBStartIdx(&MBB);
831 : }
832 :
833 : // Create the interval for the basic blocks containing lifetime begin/end.
834 2055394 : for (const MachineInstr &MI : MBB) {
835 : SmallVector<int, 4> slots;
836 1901717 : bool IsStart = false;
837 1901717 : if (!isLifetimeStartOrEnd(MI, slots, IsStart))
838 : continue;
839 63098 : SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
840 126196 : for (auto Slot : slots) {
841 63098 : if (IsStart) {
842 : // If a slot is already definitely in use, we don't have to emit
843 : // a new start marker because there is already a pre-existing
844 : // one.
845 54668 : if (!DefinitelyInUse[Slot]) {
846 24813 : LiveStarts[Slot].push_back(ThisIndex);
847 24813 : DefinitelyInUse[Slot] = true;
848 : }
849 27334 : if (!Starts[Slot].isValid())
850 19600 : Starts[Slot] = ThisIndex;
851 : } else {
852 71528 : if (Starts[Slot].isValid()) {
853 : VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
854 71498 : Intervals[Slot]->addSegment(
855 : LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
856 35749 : Starts[Slot] = SlotIndex(); // Invalidate the start index
857 35749 : DefinitelyInUse[Slot] = false;
858 : }
859 : }
860 : }
861 : }
862 :
863 : // Finish up started segments
864 7552086 : for (unsigned i = 0; i < NumSlots; ++i) {
865 14796818 : if (!Starts[i].isValid())
866 : continue;
867 :
868 688370 : SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
869 : VNInfo *VNI = Intervals[i]->getValNumInfo(0);
870 1376740 : Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
871 : }
872 : }
873 2171 : }
874 :
875 11235 : bool StackColoring::removeAllMarkers() {
876 : unsigned Count = 0;
877 68261 : for (MachineInstr *MI : Markers) {
878 57026 : MI->eraseFromParent();
879 57026 : Count++;
880 : }
881 : Markers.clear();
882 :
883 : LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
884 11235 : return Count;
885 : }
886 :
887 0 : void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
888 : unsigned FixedInstr = 0;
889 : unsigned FixedMemOp = 0;
890 : unsigned FixedDbg = 0;
891 :
892 : // Remap debug information that refers to stack slots.
893 0 : for (auto &VI : MF->getVariableDbgInfo()) {
894 0 : if (!VI.Var)
895 0 : continue;
896 0 : if (SlotRemap.count(VI.Slot)) {
897 : LLVM_DEBUG(dbgs() << "Remapping debug info for ["
898 : << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
899 0 : VI.Slot = SlotRemap[VI.Slot];
900 : FixedDbg++;
901 : }
902 : }
903 :
904 : // Keep a list of *allocas* which need to be remapped.
905 : DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
906 :
907 : // Keep a list of allocas which has been affected by the remap.
908 : SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
909 :
910 0 : for (const std::pair<int, int> &SI : SlotRemap) {
911 0 : const AllocaInst *From = MFI->getObjectAllocation(SI.first);
912 0 : const AllocaInst *To = MFI->getObjectAllocation(SI.second);
913 : assert(To && From && "Invalid allocation object");
914 0 : Allocas[From] = To;
915 :
916 : // AA might be used later for instruction scheduling, and we need it to be
917 : // able to deduce the correct aliasing releationships between pointers
918 : // derived from the alloca being remapped and the target of that remapping.
919 : // The only safe way, without directly informing AA about the remapping
920 : // somehow, is to directly update the IR to reflect the change being made
921 : // here.
922 : Instruction *Inst = const_cast<AllocaInst *>(To);
923 0 : if (From->getType() != To->getType()) {
924 0 : BitCastInst *Cast = new BitCastInst(Inst, From->getType());
925 0 : Cast->insertAfter(Inst);
926 : Inst = Cast;
927 : }
928 :
929 : // We keep both slots to maintain AliasAnalysis metadata later.
930 0 : MergedAllocas.insert(From);
931 0 : MergedAllocas.insert(To);
932 :
933 : // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
934 : // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
935 : // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
936 : MachineFrameInfo::SSPLayoutKind FromKind
937 0 : = MFI->getObjectSSPLayout(SI.first);
938 0 : MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second);
939 0 : if (FromKind != MachineFrameInfo::SSPLK_None &&
940 0 : (ToKind == MachineFrameInfo::SSPLK_None ||
941 0 : (ToKind != MachineFrameInfo::SSPLK_LargeArray &&
942 0 : FromKind != MachineFrameInfo::SSPLK_AddrOf)))
943 : MFI->setObjectSSPLayout(SI.second, FromKind);
944 :
945 : // The new alloca might not be valid in a llvm.dbg.declare for this
946 : // variable, so undef out the use to make the verifier happy.
947 0 : AllocaInst *FromAI = const_cast<AllocaInst *>(From);
948 0 : if (FromAI->isUsedByMetadata())
949 0 : ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
950 0 : for (auto &Use : FromAI->uses()) {
951 0 : if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
952 0 : if (BCI->isUsedByMetadata())
953 0 : ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
954 : }
955 :
956 : // Note that this will not replace uses in MMOs (which we'll update below),
957 : // or anywhere else (which is why we won't delete the original
958 : // instruction).
959 0 : FromAI->replaceAllUsesWith(Inst);
960 : }
961 :
962 : // Remap all instructions to the new stack slots.
963 0 : for (MachineBasicBlock &BB : *MF)
964 0 : for (MachineInstr &I : BB) {
965 : // Skip lifetime markers. We'll remove them soon.
966 0 : if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
967 : I.getOpcode() == TargetOpcode::LIFETIME_END)
968 0 : continue;
969 :
970 : // Update the MachineMemOperand to use the new alloca.
971 0 : for (MachineMemOperand *MMO : I.memoperands()) {
972 : // We've replaced IR-level uses of the remapped allocas, so we only
973 : // need to replace direct uses here.
974 0 : const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
975 0 : if (!AI)
976 0 : continue;
977 :
978 0 : if (!Allocas.count(AI))
979 0 : continue;
980 :
981 0 : MMO->setValue(Allocas[AI]);
982 : FixedMemOp++;
983 : }
984 :
985 : // Update all of the machine instruction operands.
986 0 : for (MachineOperand &MO : I.operands()) {
987 0 : if (!MO.isFI())
988 0 : continue;
989 0 : int FromSlot = MO.getIndex();
990 :
991 : // Don't touch arguments.
992 0 : if (FromSlot<0)
993 0 : continue;
994 :
995 : // Only look at mapped slots.
996 0 : if (!SlotRemap.count(FromSlot))
997 0 : continue;
998 :
999 : // In a debug build, check that the instruction that we are modifying is
1000 : // inside the expected live range. If the instruction is not inside
1001 : // the calculated range then it means that the alloca usage moved
1002 : // outside of the lifetime markers, or that the user has a bug.
1003 : // NOTE: Alloca address calculations which happen outside the lifetime
1004 : // zone are okay, despite the fact that we don't have a good way
1005 : // for validating all of the usages of the calculation.
1006 : #ifndef NDEBUG
1007 : bool TouchesMemory = I.mayLoad() || I.mayStore();
1008 : // If we *don't* protect the user from escaped allocas, don't bother
1009 : // validating the instructions.
1010 : if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1011 : SlotIndex Index = Indexes->getInstructionIndex(I);
1012 : const LiveInterval *Interval = &*Intervals[FromSlot];
1013 : assert(Interval->find(Index) != Interval->end() &&
1014 : "Found instruction usage outside of live range.");
1015 : }
1016 : #endif
1017 :
1018 : // Fix the machine instructions.
1019 0 : int ToSlot = SlotRemap[FromSlot];
1020 : MO.setIndex(ToSlot);
1021 : FixedInstr++;
1022 : }
1023 :
1024 : // We adjust AliasAnalysis information for merged stack slots.
1025 : SmallVector<MachineMemOperand *, 2> NewMMOs;
1026 : bool ReplaceMemOps = false;
1027 0 : for (MachineMemOperand *MMO : I.memoperands()) {
1028 : // If this memory location can be a slot remapped here,
1029 : // we remove AA information.
1030 : bool MayHaveConflictingAAMD = false;
1031 : if (MMO->getAAInfo()) {
1032 0 : if (const Value *MMOV = MMO->getValue()) {
1033 : SmallVector<Value *, 4> Objs;
1034 0 : getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1035 :
1036 0 : if (Objs.empty())
1037 : MayHaveConflictingAAMD = true;
1038 : else
1039 0 : for (Value *V : Objs) {
1040 : // If this memory location comes from a known stack slot
1041 : // that is not remapped, we continue checking.
1042 : // Otherwise, we need to invalidate AA infomation.
1043 : const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1044 0 : if (AI && MergedAllocas.count(AI)) {
1045 : MayHaveConflictingAAMD = true;
1046 : break;
1047 : }
1048 : }
1049 : }
1050 : }
1051 0 : if (MayHaveConflictingAAMD) {
1052 0 : NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1053 : ReplaceMemOps = true;
1054 : } else {
1055 0 : NewMMOs.push_back(MMO);
1056 : }
1057 : }
1058 :
1059 : // If any memory operand is updated, set memory references of
1060 : // this instruction.
1061 0 : if (ReplaceMemOps)
1062 0 : I.setMemRefs(*MF, NewMMOs);
1063 : }
1064 :
1065 : // Update the location of C++ catch objects for the MSVC personality routine.
1066 0 : if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1067 0 : for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1068 0 : for (WinEHHandlerType &H : TBME.HandlerArray)
1069 0 : if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
1070 0 : SlotRemap.count(H.CatchObj.FrameIndex))
1071 0 : H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1072 :
1073 : LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1074 : LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1075 : LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1076 0 : }
1077 :
1078 0 : void StackColoring::removeInvalidSlotRanges() {
1079 0 : for (MachineBasicBlock &BB : *MF)
1080 0 : for (MachineInstr &I : BB) {
1081 0 : if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1082 0 : I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1083 : continue;
1084 :
1085 : // Some intervals are suspicious! In some cases we find address
1086 : // calculations outside of the lifetime zone, but not actual memory
1087 : // read or write. Memory accesses outside of the lifetime zone are a clear
1088 : // violation, but address calculations are okay. This can happen when
1089 : // GEPs are hoisted outside of the lifetime zone.
1090 : // So, in here we only check instructions which can read or write memory.
1091 0 : if (!I.mayLoad() && !I.mayStore())
1092 : continue;
1093 :
1094 : // Check all of the machine operands.
1095 0 : for (const MachineOperand &MO : I.operands()) {
1096 0 : if (!MO.isFI())
1097 0 : continue;
1098 :
1099 0 : int Slot = MO.getIndex();
1100 :
1101 0 : if (Slot<0)
1102 : continue;
1103 :
1104 0 : if (Intervals[Slot]->empty())
1105 : continue;
1106 :
1107 : // Check that the used slot is inside the calculated lifetime range.
1108 : // If it is not, warn about it and invalidate the range.
1109 : LiveInterval *Interval = &*Intervals[Slot];
1110 0 : SlotIndex Index = Indexes->getInstructionIndex(I);
1111 0 : if (Interval->find(Index) == Interval->end()) {
1112 : Interval->clear();
1113 : LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1114 : EscapedAllocas++;
1115 : }
1116 : }
1117 : }
1118 0 : }
1119 :
1120 0 : void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1121 : unsigned NumSlots) {
1122 : // Expunge slot remap map.
1123 0 : for (unsigned i=0; i < NumSlots; ++i) {
1124 : // If we are remapping i
1125 0 : if (SlotRemap.count(i)) {
1126 0 : int Target = SlotRemap[i];
1127 : // As long as our target is mapped to something else, follow it.
1128 0 : while (SlotRemap.count(Target)) {
1129 0 : Target = SlotRemap[Target];
1130 0 : SlotRemap[i] = Target;
1131 : }
1132 : }
1133 : }
1134 0 : }
1135 :
1136 197756 : bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1137 : LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1138 : << "********** Function: " << Func.getName() << '\n');
1139 197756 : MF = &Func;
1140 197756 : MFI = &MF->getFrameInfo();
1141 197756 : Indexes = &getAnalysis<SlotIndexes>();
1142 197756 : BlockLiveness.clear();
1143 197756 : BasicBlocks.clear();
1144 : BasicBlockNumbering.clear();
1145 : Markers.clear();
1146 197755 : Intervals.clear();
1147 197756 : LiveStarts.clear();
1148 197756 : VNInfoAllocator.Reset();
1149 :
1150 197756 : unsigned NumSlots = MFI->getObjectIndexEnd();
1151 :
1152 : // If there are no stack slots then there are no markers to remove.
1153 197756 : if (!NumSlots)
1154 : return false;
1155 :
1156 : SmallVector<int, 8> SortedSlots;
1157 11235 : SortedSlots.reserve(NumSlots);
1158 : Intervals.reserve(NumSlots);
1159 11235 : LiveStarts.resize(NumSlots);
1160 :
1161 11235 : unsigned NumMarkers = collectMarkers(NumSlots);
1162 :
1163 : unsigned TotalSize = 0;
1164 : LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1165 : << " slots\n");
1166 : LLVM_DEBUG(dbgs() << "Slot structure:\n");
1167 :
1168 100324 : for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1169 : LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1170 : << " bytes.\n");
1171 38927 : TotalSize += MFI->getObjectSize(i);
1172 : }
1173 :
1174 : LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1175 :
1176 : // Don't continue because there are not enough lifetime markers, or the
1177 : // stack is too small, or we are told not to optimize the slots.
1178 13406 : if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1179 2171 : skipFunction(Func.getFunction())) {
1180 : LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1181 9064 : return removeAllMarkers();
1182 : }
1183 :
1184 22904 : for (unsigned i=0; i < NumSlots; ++i) {
1185 20733 : std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1186 41466 : LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1187 20733 : Intervals.push_back(std::move(LI));
1188 20733 : SortedSlots.push_back(i);
1189 : }
1190 :
1191 : // Calculate the liveness of each block.
1192 2171 : calculateLocalLiveness();
1193 : LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1194 : LLVM_DEBUG(dump());
1195 :
1196 : // Propagate the liveness information.
1197 2171 : calculateLiveIntervals(NumSlots);
1198 : LLVM_DEBUG(dumpIntervals());
1199 :
1200 : // Search for allocas which are used outside of the declared lifetime
1201 : // markers.
1202 2171 : if (ProtectFromEscapedAllocas)
1203 0 : removeInvalidSlotRanges();
1204 :
1205 : // Maps old slots to new slots.
1206 : DenseMap<int, int> SlotRemap;
1207 : unsigned RemovedSlots = 0;
1208 : unsigned ReducedSize = 0;
1209 :
1210 : // Do not bother looking at empty intervals.
1211 22904 : for (unsigned I = 0; I < NumSlots; ++I) {
1212 62199 : if (Intervals[SortedSlots[I]]->empty())
1213 1190 : SortedSlots[I] = -1;
1214 : }
1215 :
1216 : // This is a simple greedy algorithm for merging allocas. First, sort the
1217 : // slots, placing the largest slots first. Next, perform an n^2 scan and look
1218 : // for disjoint slots. When you find disjoint slots, merge the samller one
1219 : // into the bigger one and update the live interval. Remove the small alloca
1220 : // and continue.
1221 :
1222 : // Sort the slots according to their size. Place unused slots at the end.
1223 : // Use stable sort to guarantee deterministic code generation.
1224 : std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1225 73910 : [this](int LHS, int RHS) {
1226 : // We use -1 to denote a uninteresting slot. Place these slots at the end.
1227 77677 : if (LHS == -1) return false;
1228 76686 : if (RHS == -1) return true;
1229 : // Sort according to size.
1230 147820 : return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1231 : });
1232 :
1233 22904 : for (auto &s : LiveStarts)
1234 : llvm::sort(s);
1235 :
1236 : bool Changed = true;
1237 5172 : while (Changed) {
1238 : Changed = false;
1239 40786 : for (unsigned I = 0; I < NumSlots; ++I) {
1240 75570 : if (SortedSlots[I] == -1)
1241 : continue;
1242 :
1243 190777 : for (unsigned J=I+1; J < NumSlots; ++J) {
1244 360238 : if (SortedSlots[J] == -1)
1245 98474 : continue;
1246 :
1247 81645 : int FirstSlot = SortedSlots[I];
1248 81645 : int SecondSlot = SortedSlots[J];
1249 81645 : LiveInterval *First = &*Intervals[FirstSlot];
1250 81645 : LiveInterval *Second = &*Intervals[SecondSlot];
1251 : auto &FirstS = LiveStarts[FirstSlot];
1252 : auto &SecondS = LiveStarts[SecondSlot];
1253 : assert(!First->empty() && !Second->empty() && "Found an empty range");
1254 :
1255 : // Merge disjoint slots. This is a little bit tricky - see the
1256 : // Implementation Notes section for an explanation.
1257 185751 : if (!First->isLiveAtIndexes(SecondS) &&
1258 126567 : !Second->isLiveAtIndexes(FirstS)) {
1259 : Changed = true;
1260 12801 : First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1261 :
1262 12801 : int OldSize = FirstS.size();
1263 25602 : FirstS.append(SecondS.begin(), SecondS.end());
1264 12801 : auto Mid = FirstS.begin() + OldSize;
1265 : std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1266 :
1267 12801 : SlotRemap[SecondSlot] = FirstSlot;
1268 12801 : SortedSlots[J] = -1;
1269 : LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1270 : << SecondSlot << " together.\n");
1271 12801 : unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1272 25768 : MFI->getObjectAlignment(SecondSlot));
1273 :
1274 : assert(MFI->getObjectSize(FirstSlot) >=
1275 : MFI->getObjectSize(SecondSlot) &&
1276 : "Merging a small object into a larger one");
1277 :
1278 : RemovedSlots+=1;
1279 : ReducedSize += MFI->getObjectSize(SecondSlot);
1280 : MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1281 12801 : MFI->RemoveStackObject(SecondSlot);
1282 : }
1283 : }
1284 : }
1285 : }// While changed.
1286 :
1287 : // Record statistics.
1288 : StackSpaceSaved += ReducedSize;
1289 : StackSlotMerged += RemovedSlots;
1290 : LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1291 : << ReducedSize << " bytes\n");
1292 :
1293 : // Scan the entire function and update all machine operands that use frame
1294 : // indices to use the remapped frame index.
1295 2171 : expungeSlotMap(SlotRemap, NumSlots);
1296 2171 : remapInstructions(SlotRemap);
1297 :
1298 2171 : return removeAllMarkers();
1299 : }
|