LLVM 22.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
39#include "llvm/CodeGen/Passes.h"
42#include "llvm/Config/llvm-config.h"
43#include "llvm/IR/Analysis.h"
44#include "llvm/IR/InlineAsm.h"
46#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <bitset>
52using namespace llvm;
53
54#define DEBUG_TYPE "x86-fp-stackifier"
55
56STATISTIC(NumFXCH, "Number of fxch instructions inserted");
57STATISTIC(NumFP, "Number of floating point instructions");
58
59namespace {
60const unsigned ScratchFPReg = 7;
61
62class FPS {
63public:
64 bool shouldRun(MachineFunction &MF);
66
67private:
68 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
69
70 // Two CFG edges are related if they leave the same block, or enter the same
71 // block. The transitive closure of an edge under this relation is a
72 // LiveBundle. It represents a set of CFG edges where the live FP stack
73 // registers must be allocated identically in the x87 stack.
74 //
75 // A LiveBundle is usually all the edges leaving a block, or all the edges
76 // entering a block, but it can contain more edges if critical edges are
77 // present.
78 //
79 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
80 // but the exact mapping of FP registers to stack slots is fixed later.
81 struct LiveBundle {
82 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
83 unsigned Mask = 0;
84
85 // Number of pre-assigned live registers in FixStack. This is 0 when the
86 // stack order has not yet been fixed.
87 unsigned FixCount = 0;
88
89 // Assigned stack order for live-in registers.
90 // FixStack[i] == getStackEntry(i) for all i < FixCount.
91 unsigned char FixStack[8];
92
93 LiveBundle() = default;
94
95 // Have the live registers been assigned a stack order yet?
96 bool isFixed() const { return !Mask || FixCount; }
97 };
98
99 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
100 // with no live FP registers.
101 SmallVector<LiveBundle, 8> LiveBundles;
102
103 // The edge bundle analysis provides indices into the LiveBundles vector.
104 EdgeBundles *Bundles = nullptr;
105
106 // Return a bitmask of FP registers in block's live-in list.
107 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
108 unsigned Mask = 0;
109 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin();
110 I != MBB->livein_end();) {
111 MCPhysReg Reg = I->PhysReg;
112 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
113 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
114 Mask |= 1 << (Reg - X86::FP0);
115 if (RemoveFPs) {
116 I = MBB->removeLiveIn(I);
117 continue;
118 }
119 }
120 ++I;
121 }
122 return Mask;
123 }
124
125 // Partition all the CFG edges into LiveBundles.
126 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
127
128 MachineBasicBlock *MBB = nullptr; // Current basic block
129
130 // The hardware keeps track of how many FP registers are live, so we have
131 // to model that exactly. Usually, each live register corresponds to an
132 // FP<n> register, but when dealing with calls, returns, and inline
133 // assembly, it is sometimes necessary to have live scratch registers.
134 unsigned Stack[8] = {}; // FP<n> Registers in each stack slot...
135 unsigned StackTop = 0; // The current top of the FP stack.
136
137 enum {
138 NumFPRegs = 8 // Including scratch pseudo-registers.
139 };
140
141 // For each live FP<n> register, point to its Stack[] entry.
142 // The first entries correspond to FP0-FP6, the rest are scratch registers
143 // used when we need slightly different live registers than what the
144 // register allocator thinks.
145 unsigned RegMap[NumFPRegs] = {};
146
147 // Set up our stack model to match the incoming registers to MBB.
148 void setupBlockStack();
149
150 // Shuffle live registers to match the expectations of successor blocks.
151 void finishBlockStack();
152
153#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
154 void dumpStack() const {
155 dbgs() << "Stack contents:";
156 for (unsigned i = 0; i != StackTop; ++i) {
157 dbgs() << " FP" << Stack[i];
158 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
159 }
160 }
161#endif
162
163 /// getSlot - Return the stack slot number a particular register number is
164 /// in.
165 unsigned getSlot(unsigned RegNo) const {
166 assert(RegNo < NumFPRegs && "Regno out of range!");
167 return RegMap[RegNo];
168 }
169
170 /// isLive - Is RegNo currently live in the stack?
171 bool isLive(unsigned RegNo) const {
172 unsigned Slot = getSlot(RegNo);
173 return Slot < StackTop && Stack[Slot] == RegNo;
174 }
175
176 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
177 unsigned getStackEntry(unsigned STi) const {
178 if (STi >= StackTop)
179 report_fatal_error("Access past stack top!");
180 return Stack[StackTop - 1 - STi];
181 }
182
183 /// getSTReg - Return the X86::ST(i) register which contains the specified
184 /// FP<RegNo> register.
185 unsigned getSTReg(unsigned RegNo) const {
186 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
187 }
188
189 // pushReg - Push the specified FP<n> register onto the stack.
190 void pushReg(unsigned Reg) {
191 assert(Reg < NumFPRegs && "Register number out of range!");
192 if (StackTop >= 8)
193 report_fatal_error("Stack overflow!");
194 Stack[StackTop] = Reg;
195 RegMap[Reg] = StackTop++;
196 }
197
198 // popReg - Pop a register from the stack.
199 void popReg() {
200 if (StackTop == 0)
201 report_fatal_error("Cannot pop empty stack!");
202 RegMap[Stack[--StackTop]] = ~0; // Update state
203 }
204
205 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop - 1; }
206 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
207 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
208 if (isAtTop(RegNo))
209 return;
210
211 unsigned STReg = getSTReg(RegNo);
212 unsigned RegOnTop = getStackEntry(0);
213
214 // Swap the slots the regs are in.
215 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
216
217 // Swap stack slot contents.
218 if (RegMap[RegOnTop] >= StackTop)
219 report_fatal_error("Access past stack top!");
220 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop - 1]);
221
222 // Emit an fxch to update the runtime processors version of the state.
223 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
224 ++NumFXCH;
225 }
226
227 void duplicateToTop(unsigned RegNo, unsigned AsReg,
229 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
230 unsigned STReg = getSTReg(RegNo);
231 pushReg(AsReg); // New register on top of stack
232
233 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
234 }
235
236 /// popStackAfter - Pop the current value off of the top of the FP stack
237 /// after the specified instruction.
238 void popStackAfter(MachineBasicBlock::iterator &I);
239
240 /// freeStackSlotAfter - Free the specified register from the register
241 /// stack, so that it is no longer in a register. If the register is
242 /// currently at the top of the stack, we just pop the current instruction,
243 /// otherwise we store the current top-of-stack into the specified slot,
244 /// then pop the top of stack.
245 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
246
247 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
248 /// instruction.
250 unsigned FPRegNo);
251
252 /// Adjust the live registers to be the set in Mask.
253 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
254
255 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
256 /// st(0), FP reg FixStack[1] is st(1) etc.
257 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
259
260 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
261
262 void handleCall(MachineBasicBlock::iterator &I);
263 void handleReturn(MachineBasicBlock::iterator &I);
264 void handleZeroArgFP(MachineBasicBlock::iterator &I);
265 void handleOneArgFP(MachineBasicBlock::iterator &I);
266 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
267 void handleTwoArgFP(MachineBasicBlock::iterator &I);
268 void handleCompareFP(MachineBasicBlock::iterator &I);
269 void handleCondMovFP(MachineBasicBlock::iterator &I);
270 void handleSpecialFP(MachineBasicBlock::iterator &I);
271
272 // Check if a COPY instruction is using FP registers.
273 static bool isFPCopy(MachineInstr &MI) {
274 Register DstReg = MI.getOperand(0).getReg();
275 Register SrcReg = MI.getOperand(1).getReg();
276
277 return X86::RFP80RegClass.contains(DstReg) ||
278 X86::RFP80RegClass.contains(SrcReg);
279 }
280
281 void setKillFlags(MachineBasicBlock &MBB) const;
282};
283
284class X86FPStackifierLegacy : public MachineFunctionPass {
285public:
286 X86FPStackifierLegacy() : MachineFunctionPass(ID) {}
287
288 static char ID;
289
290private:
291 void getAnalysisUsage(AnalysisUsage &AU) const override {
292 AU.setPreservesCFG();
293 AU.addRequired<EdgeBundlesWrapperLegacy>();
297 }
298
299 bool runOnMachineFunction(MachineFunction &MF) override;
300
301 MachineFunctionProperties getRequiredProperties() const override {
302 return MachineFunctionProperties().setNoVRegs();
303 }
304
305 StringRef getPassName() const override { return "X86 FP Stackifier"; }
306};
307} // namespace
308
309char X86FPStackifierLegacy::ID = 0;
310
311INITIALIZE_PASS_BEGIN(X86FPStackifierLegacy, DEBUG_TYPE, "X86 FP Stackifier",
312 false, false)
314INITIALIZE_PASS_END(X86FPStackifierLegacy, DEBUG_TYPE, "X86 FP Stackifier",
316
318 return new X86FPStackifierLegacy();
319}
320
321/// getFPReg - Return the X86::FPx register number for the specified operand.
322/// For example, this returns 3 for X86::FP3.
323static unsigned getFPReg(const MachineOperand &MO) {
324 assert(MO.isReg() && "Expected an FP register!");
325 Register Reg = MO.getReg();
326 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
327 return Reg - X86::FP0;
328}
329
330bool FPS::shouldRun(MachineFunction &MF) {
331 // We only need to run this pass if there are any FP registers used in this
332 // function. If it is all integer, there is nothing for us to do!
333 static_assert(X86::FP6 == X86::FP0 + 6,
334 "Register enums aren't sorted right!");
335 const MachineRegisterInfo &MRI = MF.getRegInfo();
336 for (unsigned I = 0; I <= 6; ++I)
337 if (!MRI.reg_nodbg_empty(X86::FP0 + I)) {
338 return true;
339 }
340
341 return false;
342}
343
344/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
345/// register references into FP stack references.
346///
347bool FPS::run(MachineFunction &MF, EdgeBundles *FunctionBundles) {
348 Bundles = FunctionBundles;
350
351 // Prepare cross-MBB liveness.
352 bundleCFGRecomputeKillFlags(MF);
353
354 StackTop = 0;
355
356 // Process the function in depth first order so that we process at least one
357 // of the predecessors for every reachable block in the function.
358 df_iterator_default_set<MachineBasicBlock *> Processed;
359 MachineBasicBlock *Entry = &MF.front();
360
361 LiveBundle &Bundle =
362 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
363
364 // In regcall convention, some FP registers may not be passed through
365 // the stack, so they will need to be assigned to the stack first
366 if ((Entry->getParent()->getFunction().getCallingConv() ==
367 CallingConv::X86_RegCall) &&
368 (Bundle.Mask && !Bundle.FixCount)) {
369 // In the register calling convention, up to one FP argument could be
370 // saved in the first FP register.
371 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
372 // that the FP registers contain arguments.
373 // The actual value is passed in FP0.
374 // Here we fix the stack and mark FP0 as pre-assigned register.
375 assert((Bundle.Mask & 0xFE) == 0 &&
376 "Only FP0 could be passed as an argument");
377 Bundle.FixCount = 1;
378 Bundle.FixStack[0] = 0;
379 }
380
381 bool Changed = false;
382 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
383 Changed |= processBasicBlock(MF, *BB);
384
385 // Process any unreachable blocks in arbitrary order now.
386 if (MF.size() != Processed.size())
387 for (MachineBasicBlock &BB : MF)
388 if (Processed.insert(&BB).second)
389 Changed |= processBasicBlock(MF, BB);
390
391 LiveBundles.clear();
392
393 return Changed;
394}
395
396/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
397/// live-out sets for the FP registers. Consistent means that the set of
398/// registers live-out from a block is identical to the live-in set of all
399/// successors. This is not enforced by the normal live-in lists since
400/// registers may be implicitly defined, or not used by all successors.
401void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
402 assert(LiveBundles.empty() && "Stale data in LiveBundles");
403 LiveBundles.resize(Bundles->getNumBundles());
404
405 // Gather the actual live-in masks for all MBBs.
406 for (MachineBasicBlock &MBB : MF) {
407 setKillFlags(MBB);
408
409 const unsigned Mask = calcLiveInMask(&MBB, false);
410 if (!Mask)
411 continue;
412 // Update MBB ingoing bundle mask.
413 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
414 }
415}
416
417/// processBasicBlock - Loop over all of the instructions in the basic block,
418/// transforming FP instructions into their stack form.
419///
420bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
421 bool Changed = false;
422 MBB = &BB;
423
424 setupBlockStack();
425
426 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
427 MachineInstr &MI = *I;
428 uint64_t Flags = MI.getDesc().TSFlags;
429
430 unsigned FPInstClass = Flags & X86II::FPTypeMask;
431 if (MI.isInlineAsm())
432 FPInstClass = X86II::SpecialFP;
433
434 if (MI.isCopy() && isFPCopy(MI))
435 FPInstClass = X86II::SpecialFP;
436
437 if (MI.isImplicitDef() &&
438 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
439 FPInstClass = X86II::SpecialFP;
440
441 if (MI.isCall())
442 FPInstClass = X86II::SpecialFP;
443
444 // A fake_use with a floating point pseudo register argument that is
445 // killed must behave like any other floating point operation and pop
446 // the floating point stack (this is done in handleSpecialFP()).
447 // Fake_use is, however, unusual, in that sometimes its operand is not
448 // killed because a later instruction (probably a return) will use it.
449 // It is this instruction that will pop the stack.
450 // In this scenario we can safely remove the fake_use's operand
451 // (it is live anyway).
452 if (MI.isFakeUse()) {
453 const MachineOperand &MO = MI.getOperand(0);
454 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
455 if (MO.isKill())
456 FPInstClass = X86II::SpecialFP;
457 else
458 MI.removeOperand(0);
459 }
460 }
461
462 if (FPInstClass == X86II::NotFP)
463 continue; // Efficiently ignore non-fp insts!
464
465 MachineInstr *PrevMI = nullptr;
466 if (I != BB.begin())
467 PrevMI = &*std::prev(I);
468
469 ++NumFP; // Keep track of # of pseudo instrs
470 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
471
472 // Get dead variables list now because the MI pointer may be deleted as part
473 // of processing!
475 for (const MachineOperand &MO : MI.operands())
476 if (MO.isReg() && MO.isDead())
477 DeadRegs.push_back(MO.getReg());
478
479 switch (FPInstClass) {
480 case X86II::ZeroArgFP:
481 handleZeroArgFP(I);
482 break;
483 case X86II::OneArgFP:
484 handleOneArgFP(I);
485 break; // fstp ST(0)
487 handleOneArgFPRW(I);
488 break; // ST(0) = fsqrt(ST(0))
489 case X86II::TwoArgFP:
490 handleTwoArgFP(I);
491 break;
492 case X86II::CompareFP:
493 handleCompareFP(I);
494 break;
495 case X86II::CondMovFP:
496 handleCondMovFP(I);
497 break;
498 case X86II::SpecialFP:
499 handleSpecialFP(I);
500 break;
501 default:
502 llvm_unreachable("Unknown FP Type!");
503 }
504
505 // Check to see if any of the values defined by this instruction are dead
506 // after definition. If so, pop them.
507 for (Register Reg : DeadRegs) {
508 // Check if Reg is live on the stack. An inline-asm register operand that
509 // is in the clobber list and marked dead might not be live on the stack.
510 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
511 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg - X86::FP0)) {
512 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
513 freeStackSlotAfter(I, Reg - X86::FP0);
514 }
515 }
516
517 // Print out all of the instructions expanded to if -debug
518 LLVM_DEBUG({
519 MachineBasicBlock::iterator PrevI = PrevMI;
520 if (I == PrevI) {
521 dbgs() << "Just deleted pseudo instruction\n";
522 } else {
524 // Rewind to first instruction newly inserted.
525 while (Start != BB.begin() && std::prev(Start) != PrevI)
526 --Start;
527 dbgs() << "Inserted instructions:\n\t";
528 Start->print(dbgs());
529 while (++Start != std::next(I)) {
530 }
531 }
532 dumpStack();
533 });
534 (void)PrevMI;
535
536 Changed = true;
537 }
538
539 finishBlockStack();
540
541 return Changed;
542}
543
544/// setupBlockStack - Use the live bundles to set up our model of the stack
545/// to match predecessors' live out stack.
546void FPS::setupBlockStack() {
547 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
548 << " derived from " << MBB->getName() << ".\n");
549 StackTop = 0;
550 // Get the live-in bundle for MBB.
551 const LiveBundle &Bundle =
552 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
553
554 if (!Bundle.Mask) {
555 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
556 return;
557 }
558
559 // Depth-first iteration should ensure that we always have an assigned stack.
560 assert(Bundle.isFixed() && "Reached block before any predecessors");
561
562 // Push the fixed live-in registers.
563 for (unsigned i = Bundle.FixCount; i > 0; --i) {
564 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
565 << unsigned(Bundle.FixStack[i - 1]) << '\n');
566 pushReg(Bundle.FixStack[i - 1]);
567 }
568
569 // Kill off unwanted live-ins. This can happen with a critical edge.
570 // FIXME: We could keep these live registers around as zombies. They may need
571 // to be revived at the end of a short block. It might save a few instrs.
572 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
573 adjustLiveRegs(Mask, MBB->begin());
574 LLVM_DEBUG(MBB->dump());
575}
576
577/// finishBlockStack - Revive live-outs that are implicitly defined out of
578/// MBB. Shuffle live registers to match the expected fixed stack of any
579/// predecessors, and ensure that all predecessors are expecting the same
580/// stack.
581void FPS::finishBlockStack() {
582 // The RET handling below takes care of return blocks for us.
583 if (MBB->succ_empty())
584 return;
585
586 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
587 << " derived from " << MBB->getName() << ".\n");
588
589 // Get MBB's live-out bundle.
590 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
591 LiveBundle &Bundle = LiveBundles[BundleIdx];
592
593 // We may need to kill and define some registers to match successors.
594 // FIXME: This can probably be combined with the shuffle below.
596 adjustLiveRegs(Bundle.Mask, Term);
597
598 if (!Bundle.Mask) {
599 LLVM_DEBUG(dbgs() << "No live-outs.\n");
600 return;
601 }
602
603 // Has the stack order been fixed yet?
604 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
605 if (Bundle.isFixed()) {
606 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
607 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
608 } else {
609 // Not fixed yet, we get to choose.
610 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
611 Bundle.FixCount = StackTop;
612 for (unsigned i = 0; i < StackTop; ++i)
613 Bundle.FixStack[i] = getStackEntry(i);
614 }
615}
616
617//===----------------------------------------------------------------------===//
618// Efficient Lookup Table Support
619//===----------------------------------------------------------------------===//
620
621namespace {
622struct TableEntry {
623 uint16_t from;
624 uint16_t to;
625 bool operator<(const TableEntry &TE) const { return from < TE.from; }
626 friend bool operator<(const TableEntry &TE, unsigned V) {
627 return TE.from < V;
628 }
629 [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
630 return V < TE.from;
631 }
632};
633} // namespace
634
635static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
636 const TableEntry *I = llvm::lower_bound(Table, Opcode);
637 if (I != Table.end() && I->from == Opcode)
638 return I->to;
639 return -1;
640}
641
642#ifdef NDEBUG
643#define ASSERT_SORTED(TABLE)
644#else
645#define ASSERT_SORTED(TABLE) \
646 { \
647 static std::atomic<bool> TABLE##Checked(false); \
648 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
649 assert(is_sorted(TABLE) && \
650 "All lookup tables must be sorted for efficient access!"); \
651 TABLE##Checked.store(true, std::memory_order_relaxed); \
652 } \
653 }
654#endif
655
656//===----------------------------------------------------------------------===//
657// Register File -> Register Stack Mapping Methods
658//===----------------------------------------------------------------------===//
659
660// OpcodeTable - Sorted map of register instructions to their stack version.
661// The first element is an register file pseudo instruction, the second is the
662// concrete X86 instruction which uses the register stack.
663//
664static const TableEntry OpcodeTable[] = {
665 {X86::ABS_Fp32, X86::ABS_F},
666 {X86::ABS_Fp64, X86::ABS_F},
667 {X86::ABS_Fp80, X86::ABS_F},
668 {X86::ADD_Fp32m, X86::ADD_F32m},
669 {X86::ADD_Fp64m, X86::ADD_F64m},
670 {X86::ADD_Fp64m32, X86::ADD_F32m},
671 {X86::ADD_Fp80m32, X86::ADD_F32m},
672 {X86::ADD_Fp80m64, X86::ADD_F64m},
673 {X86::ADD_FpI16m32, X86::ADD_FI16m},
674 {X86::ADD_FpI16m64, X86::ADD_FI16m},
675 {X86::ADD_FpI16m80, X86::ADD_FI16m},
676 {X86::ADD_FpI32m32, X86::ADD_FI32m},
677 {X86::ADD_FpI32m64, X86::ADD_FI32m},
678 {X86::ADD_FpI32m80, X86::ADD_FI32m},
679 {X86::CHS_Fp32, X86::CHS_F},
680 {X86::CHS_Fp64, X86::CHS_F},
681 {X86::CHS_Fp80, X86::CHS_F},
682 {X86::CMOVBE_Fp32, X86::CMOVBE_F},
683 {X86::CMOVBE_Fp64, X86::CMOVBE_F},
684 {X86::CMOVBE_Fp80, X86::CMOVBE_F},
685 {X86::CMOVB_Fp32, X86::CMOVB_F},
686 {X86::CMOVB_Fp64, X86::CMOVB_F},
687 {X86::CMOVB_Fp80, X86::CMOVB_F},
688 {X86::CMOVE_Fp32, X86::CMOVE_F},
689 {X86::CMOVE_Fp64, X86::CMOVE_F},
690 {X86::CMOVE_Fp80, X86::CMOVE_F},
691 {X86::CMOVNBE_Fp32, X86::CMOVNBE_F},
692 {X86::CMOVNBE_Fp64, X86::CMOVNBE_F},
693 {X86::CMOVNBE_Fp80, X86::CMOVNBE_F},
694 {X86::CMOVNB_Fp32, X86::CMOVNB_F},
695 {X86::CMOVNB_Fp64, X86::CMOVNB_F},
696 {X86::CMOVNB_Fp80, X86::CMOVNB_F},
697 {X86::CMOVNE_Fp32, X86::CMOVNE_F},
698 {X86::CMOVNE_Fp64, X86::CMOVNE_F},
699 {X86::CMOVNE_Fp80, X86::CMOVNE_F},
700 {X86::CMOVNP_Fp32, X86::CMOVNP_F},
701 {X86::CMOVNP_Fp64, X86::CMOVNP_F},
702 {X86::CMOVNP_Fp80, X86::CMOVNP_F},
703 {X86::CMOVP_Fp32, X86::CMOVP_F},
704 {X86::CMOVP_Fp64, X86::CMOVP_F},
705 {X86::CMOVP_Fp80, X86::CMOVP_F},
706 {X86::COM_FpIr32, X86::COM_FIr},
707 {X86::COM_FpIr64, X86::COM_FIr},
708 {X86::COM_FpIr80, X86::COM_FIr},
709 {X86::COM_Fpr32, X86::COM_FST0r},
710 {X86::COM_Fpr64, X86::COM_FST0r},
711 {X86::COM_Fpr80, X86::COM_FST0r},
712 {X86::DIVR_Fp32m, X86::DIVR_F32m},
713 {X86::DIVR_Fp64m, X86::DIVR_F64m},
714 {X86::DIVR_Fp64m32, X86::DIVR_F32m},
715 {X86::DIVR_Fp80m32, X86::DIVR_F32m},
716 {X86::DIVR_Fp80m64, X86::DIVR_F64m},
717 {X86::DIVR_FpI16m32, X86::DIVR_FI16m},
718 {X86::DIVR_FpI16m64, X86::DIVR_FI16m},
719 {X86::DIVR_FpI16m80, X86::DIVR_FI16m},
720 {X86::DIVR_FpI32m32, X86::DIVR_FI32m},
721 {X86::DIVR_FpI32m64, X86::DIVR_FI32m},
722 {X86::DIVR_FpI32m80, X86::DIVR_FI32m},
723 {X86::DIV_Fp32m, X86::DIV_F32m},
724 {X86::DIV_Fp64m, X86::DIV_F64m},
725 {X86::DIV_Fp64m32, X86::DIV_F32m},
726 {X86::DIV_Fp80m32, X86::DIV_F32m},
727 {X86::DIV_Fp80m64, X86::DIV_F64m},
728 {X86::DIV_FpI16m32, X86::DIV_FI16m},
729 {X86::DIV_FpI16m64, X86::DIV_FI16m},
730 {X86::DIV_FpI16m80, X86::DIV_FI16m},
731 {X86::DIV_FpI32m32, X86::DIV_FI32m},
732 {X86::DIV_FpI32m64, X86::DIV_FI32m},
733 {X86::DIV_FpI32m80, X86::DIV_FI32m},
734 {X86::ILD_Fp16m32, X86::ILD_F16m},
735 {X86::ILD_Fp16m64, X86::ILD_F16m},
736 {X86::ILD_Fp16m80, X86::ILD_F16m},
737 {X86::ILD_Fp32m32, X86::ILD_F32m},
738 {X86::ILD_Fp32m64, X86::ILD_F32m},
739 {X86::ILD_Fp32m80, X86::ILD_F32m},
740 {X86::ILD_Fp64m32, X86::ILD_F64m},
741 {X86::ILD_Fp64m64, X86::ILD_F64m},
742 {X86::ILD_Fp64m80, X86::ILD_F64m},
743 {X86::ISTT_Fp16m32, X86::ISTT_FP16m},
744 {X86::ISTT_Fp16m64, X86::ISTT_FP16m},
745 {X86::ISTT_Fp16m80, X86::ISTT_FP16m},
746 {X86::ISTT_Fp32m32, X86::ISTT_FP32m},
747 {X86::ISTT_Fp32m64, X86::ISTT_FP32m},
748 {X86::ISTT_Fp32m80, X86::ISTT_FP32m},
749 {X86::ISTT_Fp64m32, X86::ISTT_FP64m},
750 {X86::ISTT_Fp64m64, X86::ISTT_FP64m},
751 {X86::ISTT_Fp64m80, X86::ISTT_FP64m},
752 {X86::IST_Fp16m32, X86::IST_F16m},
753 {X86::IST_Fp16m64, X86::IST_F16m},
754 {X86::IST_Fp16m80, X86::IST_F16m},
755 {X86::IST_Fp32m32, X86::IST_F32m},
756 {X86::IST_Fp32m64, X86::IST_F32m},
757 {X86::IST_Fp32m80, X86::IST_F32m},
758 {X86::IST_Fp64m32, X86::IST_FP64m},
759 {X86::IST_Fp64m64, X86::IST_FP64m},
760 {X86::IST_Fp64m80, X86::IST_FP64m},
761 {X86::LD_Fp032, X86::LD_F0},
762 {X86::LD_Fp064, X86::LD_F0},
763 {X86::LD_Fp080, X86::LD_F0},
764 {X86::LD_Fp132, X86::LD_F1},
765 {X86::LD_Fp164, X86::LD_F1},
766 {X86::LD_Fp180, X86::LD_F1},
767 {X86::LD_Fp32m, X86::LD_F32m},
768 {X86::LD_Fp32m64, X86::LD_F32m},
769 {X86::LD_Fp32m80, X86::LD_F32m},
770 {X86::LD_Fp64m, X86::LD_F64m},
771 {X86::LD_Fp64m80, X86::LD_F64m},
772 {X86::LD_Fp80m, X86::LD_F80m},
773 {X86::MUL_Fp32m, X86::MUL_F32m},
774 {X86::MUL_Fp64m, X86::MUL_F64m},
775 {X86::MUL_Fp64m32, X86::MUL_F32m},
776 {X86::MUL_Fp80m32, X86::MUL_F32m},
777 {X86::MUL_Fp80m64, X86::MUL_F64m},
778 {X86::MUL_FpI16m32, X86::MUL_FI16m},
779 {X86::MUL_FpI16m64, X86::MUL_FI16m},
780 {X86::MUL_FpI16m80, X86::MUL_FI16m},
781 {X86::MUL_FpI32m32, X86::MUL_FI32m},
782 {X86::MUL_FpI32m64, X86::MUL_FI32m},
783 {X86::MUL_FpI32m80, X86::MUL_FI32m},
784 {X86::SQRT_Fp32, X86::SQRT_F},
785 {X86::SQRT_Fp64, X86::SQRT_F},
786 {X86::SQRT_Fp80, X86::SQRT_F},
787 {X86::ST_Fp32m, X86::ST_F32m},
788 {X86::ST_Fp64m, X86::ST_F64m},
789 {X86::ST_Fp64m32, X86::ST_F32m},
790 {X86::ST_Fp80m32, X86::ST_F32m},
791 {X86::ST_Fp80m64, X86::ST_F64m},
792 {X86::ST_FpP80m, X86::ST_FP80m},
793 {X86::SUBR_Fp32m, X86::SUBR_F32m},
794 {X86::SUBR_Fp64m, X86::SUBR_F64m},
795 {X86::SUBR_Fp64m32, X86::SUBR_F32m},
796 {X86::SUBR_Fp80m32, X86::SUBR_F32m},
797 {X86::SUBR_Fp80m64, X86::SUBR_F64m},
798 {X86::SUBR_FpI16m32, X86::SUBR_FI16m},
799 {X86::SUBR_FpI16m64, X86::SUBR_FI16m},
800 {X86::SUBR_FpI16m80, X86::SUBR_FI16m},
801 {X86::SUBR_FpI32m32, X86::SUBR_FI32m},
802 {X86::SUBR_FpI32m64, X86::SUBR_FI32m},
803 {X86::SUBR_FpI32m80, X86::SUBR_FI32m},
804 {X86::SUB_Fp32m, X86::SUB_F32m},
805 {X86::SUB_Fp64m, X86::SUB_F64m},
806 {X86::SUB_Fp64m32, X86::SUB_F32m},
807 {X86::SUB_Fp80m32, X86::SUB_F32m},
808 {X86::SUB_Fp80m64, X86::SUB_F64m},
809 {X86::SUB_FpI16m32, X86::SUB_FI16m},
810 {X86::SUB_FpI16m64, X86::SUB_FI16m},
811 {X86::SUB_FpI16m80, X86::SUB_FI16m},
812 {X86::SUB_FpI32m32, X86::SUB_FI32m},
813 {X86::SUB_FpI32m64, X86::SUB_FI32m},
814 {X86::SUB_FpI32m80, X86::SUB_FI32m},
815 {X86::TST_Fp32, X86::TST_F},
816 {X86::TST_Fp64, X86::TST_F},
817 {X86::TST_Fp80, X86::TST_F},
818 {X86::UCOM_FpIr32, X86::UCOM_FIr},
819 {X86::UCOM_FpIr64, X86::UCOM_FIr},
820 {X86::UCOM_FpIr80, X86::UCOM_FIr},
821 {X86::UCOM_Fpr32, X86::UCOM_Fr},
822 {X86::UCOM_Fpr64, X86::UCOM_Fr},
823 {X86::UCOM_Fpr80, X86::UCOM_Fr},
824 {X86::XAM_Fp32, X86::XAM_F},
825 {X86::XAM_Fp64, X86::XAM_F},
826 {X86::XAM_Fp80, X86::XAM_F},
827};
828
829static unsigned getConcreteOpcode(unsigned Opcode) {
831 int Opc = Lookup(OpcodeTable, Opcode);
832 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
833 return Opc;
834}
835
836//===----------------------------------------------------------------------===//
837// Helper Methods
838//===----------------------------------------------------------------------===//
839
840// PopTable - Sorted map of instructions to their popping version. The first
841// element is an instruction, the second is the version which pops.
842//
843static const TableEntry PopTable[] = {
844 {X86::ADD_FrST0, X86::ADD_FPrST0},
845
846 {X86::COMP_FST0r, X86::FCOMPP}, {X86::COM_FIr, X86::COM_FIPr},
847 {X86::COM_FST0r, X86::COMP_FST0r},
848
849 {X86::DIVR_FrST0, X86::DIVR_FPrST0}, {X86::DIV_FrST0, X86::DIV_FPrST0},
850
851 {X86::IST_F16m, X86::IST_FP16m}, {X86::IST_F32m, X86::IST_FP32m},
852
853 {X86::MUL_FrST0, X86::MUL_FPrST0},
854
855 {X86::ST_F32m, X86::ST_FP32m}, {X86::ST_F64m, X86::ST_FP64m},
856 {X86::ST_Frr, X86::ST_FPrr},
857
858 {X86::SUBR_FrST0, X86::SUBR_FPrST0}, {X86::SUB_FrST0, X86::SUB_FPrST0},
859
860 {X86::UCOM_FIr, X86::UCOM_FIPr},
861
862 {X86::UCOM_FPr, X86::UCOM_FPPr}, {X86::UCOM_Fr, X86::UCOM_FPr},
863};
864
866 if (const MachineOperand *MO =
867 MI.findRegisterDefOperand(X86::FPSW, /*TRI=*/nullptr))
868 if (!MO->isDead())
869 return true;
870 return false;
871}
872
875 MachineBasicBlock &MBB = *I->getParent();
876 while (++I != MBB.end()) {
877 MachineInstr &MI = *I;
879 return I;
880 }
881 return MBB.end();
882}
883
884/// popStackAfter - Pop the current value off of the top of the FP stack after
885/// the specified instruction. This attempts to be sneaky and combine the pop
886/// into the instruction itself if possible. The iterator is left pointing to
887/// the last instruction, be it a new pop instruction inserted, or the old
888/// instruction if it was modified in place.
889///
890void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
891 MachineInstr &MI = *I;
892 const DebugLoc &dl = MI.getDebugLoc();
894
895 popReg();
896
897 // Check to see if there is a popping version of this instruction...
898 int Opcode = Lookup(PopTable, I->getOpcode());
899 if (Opcode != -1) {
900 I->setDesc(TII->get(Opcode));
901 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
902 I->removeOperand(0);
903 MI.dropDebugNumber();
904 } else { // Insert an explicit pop
905 // If this instruction sets FPSW, which is read in following instruction,
906 // insert pop after that reader.
908 MachineBasicBlock &MBB = *MI.getParent();
910 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, /*TRI=*/nullptr))
911 I = Next;
912 }
913 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
914 }
915}
916
917/// freeStackSlotAfter - Free the specified register from the register stack, so
918/// that it is no longer in a register. If the register is currently at the top
919/// of the stack, we just pop the current instruction, otherwise we store the
920/// current top-of-stack into the specified slot, then pop the top of stack.
921void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
922 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
923 popStackAfter(I);
924 return;
925 }
926
927 // Otherwise, store the top of stack into the dead slot, killing the operand
928 // without having to add in an explicit xchg then pop.
929 //
930 I = freeStackSlotBefore(++I, FPRegNo);
931}
932
933/// freeStackSlotBefore - Free the specified register without trying any
934/// folding.
936FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
937 unsigned STReg = getSTReg(FPRegNo);
938 unsigned OldSlot = getSlot(FPRegNo);
939 unsigned TopReg = Stack[StackTop - 1];
940 Stack[OldSlot] = TopReg;
941 RegMap[TopReg] = OldSlot;
942 RegMap[FPRegNo] = ~0;
943 Stack[--StackTop] = ~0;
944 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
945 .addReg(STReg)
946 .getInstr();
947}
948
949/// adjustLiveRegs - Kill and revive registers such that exactly the FP
950/// registers with a bit in Mask are live.
951void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
952 unsigned Defs = Mask;
953 unsigned Kills = 0;
954 for (unsigned i = 0; i < StackTop; ++i) {
955 unsigned RegNo = Stack[i];
956 if (!(Defs & (1 << RegNo)))
957 // This register is live, but we don't want it.
958 Kills |= (1 << RegNo);
959 else
960 // We don't need to imp-def this live register.
961 Defs &= ~(1 << RegNo);
962 }
963 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
964
965 // Produce implicit-defs for free by using killed registers.
966 while (Kills && Defs) {
967 unsigned KReg = llvm::countr_zero(Kills);
968 unsigned DReg = llvm::countr_zero(Defs);
969 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
970 << "\n");
971 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
972 std::swap(RegMap[KReg], RegMap[DReg]);
973 Kills &= ~(1 << KReg);
974 Defs &= ~(1 << DReg);
975 }
976
977 // Kill registers by popping.
978 if (Kills && I != MBB->begin()) {
979 MachineBasicBlock::iterator I2 = std::prev(I);
980 while (StackTop) {
981 unsigned KReg = getStackEntry(0);
982 if (!(Kills & (1 << KReg)))
983 break;
984 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
985 popStackAfter(I2);
986 Kills &= ~(1 << KReg);
987 }
988 }
989
990 // Manually kill the rest.
991 while (Kills) {
992 unsigned KReg = llvm::countr_zero(Kills);
993 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
994 freeStackSlotBefore(I, KReg);
995 Kills &= ~(1 << KReg);
996 }
997
998 // Load zeros for all the imp-defs.
999 while (Defs) {
1000 unsigned DReg = llvm::countr_zero(Defs);
1001 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
1002 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
1003 pushReg(DReg);
1004 Defs &= ~(1 << DReg);
1005 }
1006
1007 // Now we should have the correct registers live.
1008 LLVM_DEBUG(dumpStack());
1009 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
1010}
1011
1012/// shuffleStackTop - emit fxch instructions before I to shuffle the top
1013/// FixCount entries into the order given by FixStack.
1014/// FIXME: Is there a better algorithm than insertion sort?
1015void FPS::shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
1017 // Move items into place, starting from the desired stack bottom.
1018 while (FixCount--) {
1019 // Old register at position FixCount.
1020 unsigned OldReg = getStackEntry(FixCount);
1021 // Desired register at position FixCount.
1022 unsigned Reg = FixStack[FixCount];
1023 if (Reg == OldReg)
1024 continue;
1025 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
1026 moveToTop(Reg, I);
1027 if (FixCount > 0)
1028 moveToTop(OldReg, I);
1029 }
1030 LLVM_DEBUG(dumpStack());
1031}
1032
1033//===----------------------------------------------------------------------===//
1034// Instruction transformation implementation
1035//===----------------------------------------------------------------------===//
1036
1037void FPS::handleCall(MachineBasicBlock::iterator &I) {
1038 MachineInstr &MI = *I;
1039 unsigned STReturns = 0;
1040
1041 bool ClobbersFPStack = false;
1042 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1043 MachineOperand &Op = MI.getOperand(i);
1044 // Check if this call clobbers the FP stack.
1045 // is sufficient.
1046 if (Op.isRegMask()) {
1047 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1048#ifndef NDEBUG
1049 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1050 for (unsigned i = 1; i != 8; ++i)
1051 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1052 "Inconsistent FP register clobber");
1053#endif
1054
1055 if (ClobbersFP0)
1056 ClobbersFPStack = true;
1057 }
1058
1059 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1060 continue;
1061
1062 assert(Op.isImplicit() && "Expected implicit def/use");
1063
1064 if (Op.isDef())
1065 STReturns |= 1 << getFPReg(Op);
1066
1067 // Remove the operand so that later passes don't see it.
1068 MI.removeOperand(i);
1069 --i;
1070 --e;
1071 }
1072
1073 // Most calls should have a regmask that clobbers the FP registers. If it
1074 // isn't present then the register allocator didn't spill the FP registers
1075 // so they are still on the stack.
1076 assert((ClobbersFPStack || STReturns == 0) &&
1077 "ST returns without FP stack clobber");
1078 if (!ClobbersFPStack)
1079 return;
1080
1081 unsigned N = llvm::countr_one(STReturns);
1082
1083 // FP registers used for function return must be consecutive starting at
1084 // FP0
1085 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1086
1087 // Reset the FP Stack - It is required because of possible leftovers from
1088 // passed arguments. The caller should assume that the FP stack is
1089 // returned empty (unless the callee returns values on FP stack).
1090 while (StackTop > 0)
1091 popReg();
1092
1093 for (unsigned I = 0; I < N; ++I)
1094 pushReg(N - I - 1);
1095
1096 // If this call has been modified, drop all variable values defined by it.
1097 // We can't track them once they've been stackified.
1098 if (STReturns)
1099 I->dropDebugNumber();
1100}
1101
1102/// If RET has an FP register use operand, pass the first one in ST(0) and
1103/// the second one in ST(1).
1104void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1105 MachineInstr &MI = *I;
1106
1107 // Find the register operands.
1108 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1109 unsigned LiveMask = 0;
1110
1111 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1112 MachineOperand &Op = MI.getOperand(i);
1113 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1114 continue;
1115 // FP Register uses must be kills unless there are two uses of the same
1116 // register, in which case only one will be a kill.
1117 assert(Op.isUse() &&
1118 (Op.isKill() || // Marked kill.
1119 getFPReg(Op) == FirstFPRegOp || // Second instance.
1120 MI.killsRegister(Op.getReg(),
1121 /*TRI=*/nullptr)) && // Later use is marked kill.
1122 "Ret only defs operands, and values aren't live beyond it");
1123
1124 if (FirstFPRegOp == ~0U)
1125 FirstFPRegOp = getFPReg(Op);
1126 else {
1127 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1128 SecondFPRegOp = getFPReg(Op);
1129 }
1130 LiveMask |= (1 << getFPReg(Op));
1131
1132 // Remove the operand so that later passes don't see it.
1133 MI.removeOperand(i);
1134 --i;
1135 --e;
1136 }
1137
1138 // We may have been carrying spurious live-ins, so make sure only the
1139 // returned registers are left live.
1140 adjustLiveRegs(LiveMask, MI);
1141 if (!LiveMask)
1142 return; // Quick check to see if any are possible.
1143
1144 // There are only four possibilities here:
1145 // 1) we are returning a single FP value. In this case, it has to be in
1146 // ST(0) already, so just declare success by removing the value from the
1147 // FP Stack.
1148 if (SecondFPRegOp == ~0U) {
1149 // Assert that the top of stack contains the right FP register.
1150 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1151 "Top of stack not the right register for RET!");
1152
1153 // Ok, everything is good, mark the value as not being on the stack
1154 // anymore so that our assertion about the stack being empty at end of
1155 // block doesn't fire.
1156 StackTop = 0;
1157 return;
1158 }
1159
1160 // Otherwise, we are returning two values:
1161 // 2) If returning the same value for both, we only have one thing in the FP
1162 // stack. Consider: RET FP1, FP1
1163 if (StackTop == 1) {
1164 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0) &&
1165 "Stack misconfiguration for RET!");
1166
1167 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1168 // register to hold it.
1169 unsigned NewReg = ScratchFPReg;
1170 duplicateToTop(FirstFPRegOp, NewReg, MI);
1171 FirstFPRegOp = NewReg;
1172 }
1173
1174 /// Okay we know we have two different FPx operands now:
1175 assert(StackTop == 2 && "Must have two values live!");
1176
1177 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1178 /// in ST(1). In this case, emit an fxch.
1179 if (getStackEntry(0) == SecondFPRegOp) {
1180 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1181 moveToTop(FirstFPRegOp, MI);
1182 }
1183
1184 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1185 /// ST(1). Just remove both from our understanding of the stack and return.
1186 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1187 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1188 StackTop = 0;
1189}
1190
1191/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1192///
1193void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1194 MachineInstr &MI = *I;
1195 unsigned DestReg = getFPReg(MI.getOperand(0));
1196
1197 // Change from the pseudo instruction to the concrete instruction.
1198 MI.removeOperand(0); // Remove the explicit ST(0) operand
1199 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1200 MI.addOperand(
1201 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1202
1203 // Result gets pushed on the stack.
1204 pushReg(DestReg);
1205
1206 MI.dropDebugNumber();
1207}
1208
1209/// handleOneArgFP - fst <mem>, ST(0)
1210///
1211void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1212 MachineInstr &MI = *I;
1213 unsigned NumOps = MI.getDesc().getNumOperands();
1214 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1215 "Can only handle fst* & ftst instructions!");
1216
1217 // Is this the last use of the source register?
1218 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1219 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1220
1221 // FISTP64m is strange because there isn't a non-popping versions.
1222 // If we have one _and_ we don't want to pop the operand, duplicate the value
1223 // on the stack instead of moving it. This ensure that popping the value is
1224 // always ok.
1225 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1226 //
1227 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1228 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1229 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1230 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1231 MI.getOpcode() == X86::IST_Fp64m64 ||
1232 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1233 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1234 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1235 MI.getOpcode() == X86::IST_Fp64m80 ||
1236 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1237 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1238 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1239 MI.getOpcode() == X86::ST_FpP80m)) {
1240 duplicateToTop(Reg, ScratchFPReg, I);
1241 } else {
1242 moveToTop(Reg, I); // Move to the top of the stack...
1243 }
1244
1245 // Convert from the pseudo instruction to the concrete instruction.
1246 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1247 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1248 MI.addOperand(
1249 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1250
1251 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1252 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1253 MI.getOpcode() == X86::ST_FP80m) {
1254 if (StackTop == 0)
1255 report_fatal_error("Stack empty??");
1256 --StackTop;
1257 } else if (KillsSrc) { // Last use of operand?
1258 popStackAfter(I);
1259 }
1260
1261 MI.dropDebugNumber();
1262}
1263
1264/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1265/// replace the value with a newly computed value. These instructions may have
1266/// non-fp operands after their FP operands.
1267///
1268/// Examples:
1269/// R1 = fchs R2
1270/// R1 = fadd R2, [mem]
1271///
1272void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1273 MachineInstr &MI = *I;
1274#ifndef NDEBUG
1275 unsigned NumOps = MI.getDesc().getNumOperands();
1276 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1277#endif
1278
1279 // Is this the last use of the source register?
1280 unsigned Reg = getFPReg(MI.getOperand(1));
1281 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1282
1283 if (KillsSrc) {
1284 // If this is the last use of the source register, just make sure it's on
1285 // the top of the stack.
1286 moveToTop(Reg, I);
1287 if (StackTop == 0)
1288 report_fatal_error("Stack cannot be empty!");
1289 --StackTop;
1290 pushReg(getFPReg(MI.getOperand(0)));
1291 } else {
1292 // If this is not the last use of the source register, _copy_ it to the top
1293 // of the stack.
1294 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1295 }
1296
1297 // Change from the pseudo instruction to the concrete instruction.
1298 MI.removeOperand(1); // Drop the source operand.
1299 MI.removeOperand(0); // Drop the destination operand.
1300 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1301 MI.dropDebugNumber();
1302}
1303
1304//===----------------------------------------------------------------------===//
1305// Define tables of various ways to map pseudo instructions
1306//
1307
1308// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1309static const TableEntry ForwardST0Table[] = {
1310 {X86::ADD_Fp32, X86::ADD_FST0r}, {X86::ADD_Fp64, X86::ADD_FST0r},
1311 {X86::ADD_Fp80, X86::ADD_FST0r}, {X86::DIV_Fp32, X86::DIV_FST0r},
1312 {X86::DIV_Fp64, X86::DIV_FST0r}, {X86::DIV_Fp80, X86::DIV_FST0r},
1313 {X86::MUL_Fp32, X86::MUL_FST0r}, {X86::MUL_Fp64, X86::MUL_FST0r},
1314 {X86::MUL_Fp80, X86::MUL_FST0r}, {X86::SUB_Fp32, X86::SUB_FST0r},
1315 {X86::SUB_Fp64, X86::SUB_FST0r}, {X86::SUB_Fp80, X86::SUB_FST0r},
1316};
1317
1318// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1319static const TableEntry ReverseST0Table[] = {
1320 {X86::ADD_Fp32, X86::ADD_FST0r}, // commutative
1321 {X86::ADD_Fp64, X86::ADD_FST0r}, // commutative
1322 {X86::ADD_Fp80, X86::ADD_FST0r}, // commutative
1323 {X86::DIV_Fp32, X86::DIVR_FST0r},
1324 {X86::DIV_Fp64, X86::DIVR_FST0r},
1325 {X86::DIV_Fp80, X86::DIVR_FST0r},
1326 {X86::MUL_Fp32, X86::MUL_FST0r}, // commutative
1327 {X86::MUL_Fp64, X86::MUL_FST0r}, // commutative
1328 {X86::MUL_Fp80, X86::MUL_FST0r}, // commutative
1329 {X86::SUB_Fp32, X86::SUBR_FST0r},
1330 {X86::SUB_Fp64, X86::SUBR_FST0r},
1331 {X86::SUB_Fp80, X86::SUBR_FST0r},
1332};
1333
1334// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1335static const TableEntry ForwardSTiTable[] = {
1336 {X86::ADD_Fp32, X86::ADD_FrST0}, // commutative
1337 {X86::ADD_Fp64, X86::ADD_FrST0}, // commutative
1338 {X86::ADD_Fp80, X86::ADD_FrST0}, // commutative
1339 {X86::DIV_Fp32, X86::DIVR_FrST0},
1340 {X86::DIV_Fp64, X86::DIVR_FrST0},
1341 {X86::DIV_Fp80, X86::DIVR_FrST0},
1342 {X86::MUL_Fp32, X86::MUL_FrST0}, // commutative
1343 {X86::MUL_Fp64, X86::MUL_FrST0}, // commutative
1344 {X86::MUL_Fp80, X86::MUL_FrST0}, // commutative
1345 {X86::SUB_Fp32, X86::SUBR_FrST0},
1346 {X86::SUB_Fp64, X86::SUBR_FrST0},
1347 {X86::SUB_Fp80, X86::SUBR_FrST0},
1348};
1349
1350// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1351static const TableEntry ReverseSTiTable[] = {
1352 {X86::ADD_Fp32, X86::ADD_FrST0}, {X86::ADD_Fp64, X86::ADD_FrST0},
1353 {X86::ADD_Fp80, X86::ADD_FrST0}, {X86::DIV_Fp32, X86::DIV_FrST0},
1354 {X86::DIV_Fp64, X86::DIV_FrST0}, {X86::DIV_Fp80, X86::DIV_FrST0},
1355 {X86::MUL_Fp32, X86::MUL_FrST0}, {X86::MUL_Fp64, X86::MUL_FrST0},
1356 {X86::MUL_Fp80, X86::MUL_FrST0}, {X86::SUB_Fp32, X86::SUB_FrST0},
1357 {X86::SUB_Fp64, X86::SUB_FrST0}, {X86::SUB_Fp80, X86::SUB_FrST0},
1358};
1359
1360/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1361/// instructions which need to be simplified and possibly transformed.
1362///
1363/// Result: ST(0) = fsub ST(0), ST(i)
1364/// ST(i) = fsub ST(0), ST(i)
1365/// ST(0) = fsubr ST(0), ST(i)
1366/// ST(i) = fsubr ST(0), ST(i)
1367///
1368void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1373 MachineInstr &MI = *I;
1374
1375 unsigned NumOperands = MI.getDesc().getNumOperands();
1376 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1377 unsigned Dest = getFPReg(MI.getOperand(0));
1378 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1379 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1380 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1381 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1382 const DebugLoc &dl = MI.getDebugLoc();
1383
1384 unsigned TOS = getStackEntry(0);
1385
1386 // One of our operands must be on the top of the stack. If neither is yet, we
1387 // need to move one.
1388 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1389 // We can choose to move either operand to the top of the stack. If one of
1390 // the operands is killed by this instruction, we want that one so that we
1391 // can update right on top of the old version.
1392 if (KillsOp0) {
1393 moveToTop(Op0, I); // Move dead operand to TOS.
1394 TOS = Op0;
1395 } else if (KillsOp1) {
1396 moveToTop(Op1, I);
1397 TOS = Op1;
1398 } else {
1399 // All of the operands are live after this instruction executes, so we
1400 // cannot update on top of any operand. Because of this, we must
1401 // duplicate one of the stack elements to the top. It doesn't matter
1402 // which one we pick.
1403 //
1404 duplicateToTop(Op0, Dest, I);
1405 Op0 = TOS = Dest;
1406 KillsOp0 = true;
1407 }
1408 } else if (!KillsOp0 && !KillsOp1) {
1409 // If we DO have one of our operands at the top of the stack, but we don't
1410 // have a dead operand, we must duplicate one of the operands to a new slot
1411 // on the stack.
1412 duplicateToTop(Op0, Dest, I);
1413 Op0 = TOS = Dest;
1414 KillsOp0 = true;
1415 }
1416
1417 // Now we know that one of our operands is on the top of the stack, and at
1418 // least one of our operands is killed by this instruction.
1419 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1420 "Stack conditions not set up right!");
1421
1422 // We decide which form to use based on what is on the top of the stack, and
1423 // which operand is killed by this instruction.
1424 ArrayRef<TableEntry> InstTable;
1425 bool isForward = TOS == Op0;
1426 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1427 if (updateST0) {
1428 if (isForward)
1429 InstTable = ForwardST0Table;
1430 else
1431 InstTable = ReverseST0Table;
1432 } else {
1433 if (isForward)
1434 InstTable = ForwardSTiTable;
1435 else
1436 InstTable = ReverseSTiTable;
1437 }
1438
1439 int Opcode = Lookup(InstTable, MI.getOpcode());
1440 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1441
1442 // NotTOS - The register which is not on the top of stack...
1443 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1444
1445 // Replace the old instruction with a new instruction
1446 MBB->remove(&*I++);
1447 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1448
1449 if (!MI.mayRaiseFPException())
1450 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1451
1452 // If both operands are killed, pop one off of the stack in addition to
1453 // overwriting the other one.
1454 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1455 assert(!updateST0 && "Should have updated other operand!");
1456 popStackAfter(I); // Pop the top of stack
1457 }
1458
1459 // Update stack information so that we know the destination register is now on
1460 // the stack.
1461 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1462 assert(UpdatedSlot < StackTop && Dest < 7);
1463 Stack[UpdatedSlot] = Dest;
1464 RegMap[Dest] = UpdatedSlot;
1465 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1466}
1467
1468/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1469/// register arguments and no explicit destinations.
1470///
1471void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1472 MachineInstr &MI = *I;
1473
1474 unsigned NumOperands = MI.getDesc().getNumOperands();
1475 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1476 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1477 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1478 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1479 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1480
1481 // Make sure the first operand is on the top of stack, the other one can be
1482 // anywhere.
1483 moveToTop(Op0, I);
1484
1485 // Change from the pseudo instruction to the concrete instruction.
1486 MI.getOperand(0).setReg(getSTReg(Op1));
1487 MI.removeOperand(1);
1488 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1489 MI.dropDebugNumber();
1490
1491 // If any of the operands are killed by this instruction, free them.
1492 if (KillsOp0)
1493 freeStackSlotAfter(I, Op0);
1494 if (KillsOp1 && Op0 != Op1)
1495 freeStackSlotAfter(I, Op1);
1496}
1497
1498/// handleCondMovFP - Handle two address conditional move instructions. These
1499/// instructions move a st(i) register to st(0) iff a condition is true. These
1500/// instructions require that the first operand is at the top of the stack, but
1501/// otherwise don't modify the stack at all.
1502void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1503 MachineInstr &MI = *I;
1504
1505 unsigned Op0 = getFPReg(MI.getOperand(0));
1506 unsigned Op1 = getFPReg(MI.getOperand(2));
1507 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1508
1509 // The first operand *must* be on the top of the stack.
1510 moveToTop(Op0, I);
1511
1512 // Change the second operand to the stack register that the operand is in.
1513 // Change from the pseudo instruction to the concrete instruction.
1514 MI.removeOperand(0);
1515 MI.removeOperand(1);
1516 MI.getOperand(0).setReg(getSTReg(Op1));
1517 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1518 MI.dropDebugNumber();
1519
1520 // If we kill the second operand, make sure to pop it from the stack.
1521 if (Op0 != Op1 && KillsOp1) {
1522 // Get this value off of the register stack.
1523 freeStackSlotAfter(I, Op1);
1524 }
1525}
1526
1527/// handleSpecialFP - Handle special instructions which behave unlike other
1528/// floating point instructions. This is primarily intended for use by pseudo
1529/// instructions.
1530///
1531void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1532 MachineInstr &MI = *Inst;
1533
1534 if (MI.isCall()) {
1535 handleCall(Inst);
1536 return;
1537 }
1538
1539 if (MI.isReturn()) {
1540 handleReturn(Inst);
1541 return;
1542 }
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 llvm_unreachable("Unknown SpecialFP instruction!");
1547 case TargetOpcode::COPY: {
1548 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1549 const MachineOperand &MO1 = MI.getOperand(1);
1550 const MachineOperand &MO0 = MI.getOperand(0);
1551 bool KillsSrc = MI.killsRegister(MO1.getReg(), /*TRI=*/nullptr);
1552
1553 // FP <- FP copy.
1554 unsigned DstFP = getFPReg(MO0);
1555 unsigned SrcFP = getFPReg(MO1);
1556 assert(isLive(SrcFP) && "Cannot copy dead register");
1557 if (KillsSrc) {
1558 // If the input operand is killed, we can just change the owner of the
1559 // incoming stack slot into the result.
1560 unsigned Slot = getSlot(SrcFP);
1561 Stack[Slot] = DstFP;
1562 RegMap[DstFP] = Slot;
1563 } else {
1564 // For COPY we just duplicate the specified value to a new stack slot.
1565 // This could be made better, but would require substantial changes.
1566 duplicateToTop(SrcFP, DstFP, Inst);
1567 }
1568 break;
1569 }
1570
1571 case TargetOpcode::IMPLICIT_DEF: {
1572 // All FP registers must be explicitly defined, so load a 0 instead.
1573 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1574 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1575 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1576 pushReg(Reg);
1577 break;
1578 }
1579
1580 case TargetOpcode::INLINEASM:
1581 case TargetOpcode::INLINEASM_BR: {
1582 // The inline asm MachineInstr currently only *uses* FP registers for the
1583 // 'f' constraint. These should be turned into the current ST(x) register
1584 // in the machine instr.
1585 //
1586 // There are special rules for x87 inline assembly. The compiler must know
1587 // exactly how many registers are popped and pushed implicitly by the asm.
1588 // Otherwise it is not possible to restore the stack state after the inline
1589 // asm.
1590 //
1591 // There are 3 kinds of input operands:
1592 //
1593 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1594 // popped input operand must be in a fixed stack slot, and it is either
1595 // tied to an output operand, or in the clobber list. The MI has ST use
1596 // and def operands for these inputs.
1597 //
1598 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1599 // preserved by the inline asm. The fixed stack slots must be STn-STm
1600 // following the popped inputs. A fixed input operand cannot be tied to
1601 // an output or appear in the clobber list. The MI has ST use operands
1602 // and no defs for these inputs.
1603 //
1604 // 3. Preserved inputs. These inputs use the "f" constraint which is
1605 // represented as an FP register. The inline asm won't change these
1606 // stack slots.
1607 //
1608 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1609 // registers do not count as output operands. The inline asm changes the
1610 // stack as if it popped all the popped inputs and then pushed all the
1611 // output operands.
1612
1613 // Scan the assembly for ST registers used, defined and clobbered. We can
1614 // only tell clobbers from defs by looking at the asm descriptor.
1615 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1616 unsigned NumOps = 0;
1617 SmallSet<unsigned, 1> FRegIdx;
1618 unsigned RCID;
1619
1620 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1621 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1622 unsigned Flags = MI.getOperand(i).getImm();
1623 const InlineAsm::Flag F(Flags);
1624
1625 NumOps = F.getNumOperandRegisters();
1626 if (NumOps != 1)
1627 continue;
1628 const MachineOperand &MO = MI.getOperand(i + 1);
1629 if (!MO.isReg())
1630 continue;
1631 unsigned STReg = MO.getReg() - X86::FP0;
1632 if (STReg >= 8)
1633 continue;
1634
1635 // If the flag has a register class constraint, this must be an operand
1636 // with constraint "f". Record its index and continue.
1637 if (F.hasRegClassConstraint(RCID)) {
1638 FRegIdx.insert(i + 1);
1639 continue;
1640 }
1641
1642 switch (F.getKind()) {
1643 case InlineAsm::Kind::RegUse:
1644 STUses |= (1u << STReg);
1645 break;
1646 case InlineAsm::Kind::RegDef:
1647 case InlineAsm::Kind::RegDefEarlyClobber:
1648 STDefs |= (1u << STReg);
1649 break;
1650 case InlineAsm::Kind::Clobber:
1651 STClobbers |= (1u << STReg);
1652 break;
1653 default:
1654 break;
1655 }
1656 }
1657
1658 if (STUses && !isMask_32(STUses))
1659 MI.emitGenericError("fixed input regs must be last on the x87 stack");
1660 unsigned NumSTUses = llvm::countr_one(STUses);
1661
1662 // Defs must be contiguous from the stack top. ST0-STn.
1663 if (STDefs && !isMask_32(STDefs)) {
1664 MI.emitGenericError("output regs must be last on the x87 stack");
1665 STDefs = NextPowerOf2(STDefs) - 1;
1666 }
1667 unsigned NumSTDefs = llvm::countr_one(STDefs);
1668
1669 // So must the clobbered stack slots. ST0-STm, m >= n.
1670 if (STClobbers && !isMask_32(STDefs | STClobbers))
1671 MI.emitGenericError("clobbers must be last on the x87 stack");
1672
1673 // Popped inputs are the ones that are also clobbered or defined.
1674 unsigned STPopped = STUses & (STDefs | STClobbers);
1675 if (STPopped && !isMask_32(STPopped))
1676 MI.emitGenericError(
1677 "implicitly popped regs must be last on the x87 stack");
1678 unsigned NumSTPopped = llvm::countr_one(STPopped);
1679
1680 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1681 << NumSTPopped << ", and defines " << NumSTDefs
1682 << " regs.\n");
1683
1684#ifndef NDEBUG
1685 // If any input operand uses constraint "f", all output register
1686 // constraints must be early-clobber defs.
1687 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1688 if (FRegIdx.count(I)) {
1689 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1690 "Operands with constraint \"f\" cannot overlap with defs");
1691 }
1692#endif
1693
1694 // Collect all FP registers (register operands with constraints "t", "u",
1695 // and "f") to kill afer the instruction.
1696 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1697 for (const MachineOperand &Op : MI.operands()) {
1698 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1699 continue;
1700 unsigned FPReg = getFPReg(Op);
1701
1702 // If we kill this operand, make sure to pop it from the stack after the
1703 // asm. We just remember it for now, and pop them all off at the end in
1704 // a batch.
1705 if (Op.isUse() && Op.isKill())
1706 FPKills |= 1U << FPReg;
1707 }
1708
1709 // Do not include registers that are implicitly popped by defs/clobbers.
1710 FPKills &= ~(STDefs | STClobbers);
1711
1712 // Now we can rearrange the live registers to match what was requested.
1713 unsigned char STUsesArray[8];
1714
1715 for (unsigned I = 0; I < NumSTUses; ++I)
1716 STUsesArray[I] = I;
1717
1718 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1719 LLVM_DEBUG({
1720 dbgs() << "Before asm: ";
1721 dumpStack();
1722 });
1723
1724 // With the stack layout fixed, rewrite the FP registers.
1725 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1726 MachineOperand &Op = MI.getOperand(i);
1727 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1728 continue;
1729
1730 unsigned FPReg = getFPReg(Op);
1731
1732 if (FRegIdx.count(i))
1733 // Operand with constraint "f".
1734 Op.setReg(getSTReg(FPReg));
1735 else
1736 // Operand with a single register class constraint ("t" or "u").
1737 Op.setReg(X86::ST0 + FPReg);
1738 }
1739
1740 // Simulate the inline asm popping its inputs and pushing its outputs.
1741 StackTop -= NumSTPopped;
1742
1743 for (unsigned i = 0; i < NumSTDefs; ++i)
1744 pushReg(NumSTDefs - i - 1);
1745
1746 // If this asm kills any FP registers (is the last use of them) we must
1747 // explicitly emit pop instructions for them. Do this now after the asm has
1748 // executed so that the ST(x) numbers are not off (which would happen if we
1749 // did this inline with operand rewriting).
1750 //
1751 // Note: this might be a non-optimal pop sequence. We might be able to do
1752 // better by trying to pop in stack order or something.
1753 while (FPKills) {
1754 unsigned FPReg = llvm::countr_zero(FPKills);
1755 if (isLive(FPReg))
1756 freeStackSlotAfter(Inst, FPReg);
1757 FPKills &= ~(1U << FPReg);
1758 }
1759
1760 // Don't delete the inline asm!
1761 return;
1762 }
1763
1764 // FAKE_USE must pop its register operand off the stack if it is killed,
1765 // because this constitutes the register's last use. If the operand
1766 // is not killed, it will have its last use later, so we leave it alone.
1767 // In either case we remove the operand so later passes don't see it.
1768 case TargetOpcode::FAKE_USE: {
1769 assert(MI.getNumExplicitOperands() == 1 &&
1770 "FAKE_USE must have exactly one operand");
1771 if (MI.getOperand(0).isKill()) {
1772 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
1773 }
1774 MI.removeOperand(0);
1775 return;
1776 }
1777 }
1778
1779 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1780
1781 // We want to leave I pointing to the previous instruction, but what if we
1782 // just erased the first instruction?
1783 if (Inst == MBB->begin()) {
1784 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1785 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1786 } else
1787 --Inst;
1788}
1789
1790void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1791 const TargetRegisterInfo &TRI =
1793 LiveRegUnits LPR(TRI);
1794
1795 LPR.addLiveOuts(MBB);
1796
1797 for (MachineInstr &MI : llvm::reverse(MBB)) {
1798 if (MI.isDebugInstr())
1799 continue;
1800
1801 std::bitset<8> Defs;
1803
1804 for (auto &MO : MI.operands()) {
1805 if (!MO.isReg())
1806 continue;
1807
1808 unsigned Reg = MO.getReg() - X86::FP0;
1809
1810 if (Reg >= 8)
1811 continue;
1812
1813 if (MO.isDef()) {
1814 Defs.set(Reg);
1815 if (LPR.available(MO.getReg()))
1816 MO.setIsDead();
1817 } else
1818 Uses.push_back(&MO);
1819 }
1820
1821 for (auto *MO : Uses)
1822 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1823 MO->setIsKill();
1824
1825 LPR.stepBackward(MI);
1826 }
1827}
1828
1829bool X86FPStackifierLegacy::runOnMachineFunction(MachineFunction &MF) {
1830 FPS Impl;
1831 if (!Impl.shouldRun(MF))
1832 return false;
1833
1834 EdgeBundles *Bundles =
1835 &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
1836 return FPS().run(MF, Bundles);
1837}
1838
1839PreservedAnalyses
1842 FPS Impl;
1843 if (!Impl.shouldRun(MF))
1844 return PreservedAnalyses::all();
1845
1846 EdgeBundles *Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(MF);
1847 bool Changed = Impl.run(MF, Bundles);
1848 if (!Changed)
1849 return PreservedAnalyses::all();
1852 return PA;
1853}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Register getFPReg(const CSKYSubtarget &STI)
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static constexpr MCPhysReg FPReg
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void setFlag(MIFlag Flag)
Set a MI flag.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
size_type size() const
Definition SmallPtrSet.h:99
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void resize(size_type N)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Changed
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
@ AddrNumOperands
Definition X86BaseInfo.h:36
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:255
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1994
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionPass * createX86FPStackifierLegacyPass()
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
std::pair< iterator, bool > insert(NodeRef N)