LLVM 20.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
80 MachineFunctionProperties::Property::NoVRegs);
81 }
82
83 StringRef getPassName() const override { return "X86 FP Stackifier"; }
84
85 private:
86 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
87
88 // Two CFG edges are related if they leave the same block, or enter the same
89 // block. The transitive closure of an edge under this relation is a
90 // LiveBundle. It represents a set of CFG edges where the live FP stack
91 // registers must be allocated identically in the x87 stack.
92 //
93 // A LiveBundle is usually all the edges leaving a block, or all the edges
94 // entering a block, but it can contain more edges if critical edges are
95 // present.
96 //
97 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
98 // but the exact mapping of FP registers to stack slots is fixed later.
99 struct LiveBundle {
100 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
101 unsigned Mask = 0;
102
103 // Number of pre-assigned live registers in FixStack. This is 0 when the
104 // stack order has not yet been fixed.
105 unsigned FixCount = 0;
106
107 // Assigned stack order for live-in registers.
108 // FixStack[i] == getStackEntry(i) for all i < FixCount.
109 unsigned char FixStack[8];
110
111 LiveBundle() = default;
112
113 // Have the live registers been assigned a stack order yet?
114 bool isFixed() const { return !Mask || FixCount; }
115 };
116
117 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
118 // with no live FP registers.
119 SmallVector<LiveBundle, 8> LiveBundles;
120
121 // The edge bundle analysis provides indices into the LiveBundles vector.
122 EdgeBundles *Bundles = nullptr;
123
124 // Return a bitmask of FP registers in block's live-in list.
125 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
126 unsigned Mask = 0;
128 I != MBB->livein_end(); ) {
129 MCPhysReg Reg = I->PhysReg;
130 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
132 Mask |= 1 << (Reg - X86::FP0);
133 if (RemoveFPs) {
134 I = MBB->removeLiveIn(I);
135 continue;
136 }
137 }
138 ++I;
139 }
140 return Mask;
141 }
142
143 // Partition all the CFG edges into LiveBundles.
144 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
145
146 MachineBasicBlock *MBB = nullptr; // Current basic block
147
148 // The hardware keeps track of how many FP registers are live, so we have
149 // to model that exactly. Usually, each live register corresponds to an
150 // FP<n> register, but when dealing with calls, returns, and inline
151 // assembly, it is sometimes necessary to have live scratch registers.
152 unsigned Stack[8]; // FP<n> Registers in each stack slot...
153 unsigned StackTop = 0; // The current top of the FP stack.
154
155 enum {
156 NumFPRegs = 8 // Including scratch pseudo-registers.
157 };
158
159 // For each live FP<n> register, point to its Stack[] entry.
160 // The first entries correspond to FP0-FP6, the rest are scratch registers
161 // used when we need slightly different live registers than what the
162 // register allocator thinks.
163 unsigned RegMap[NumFPRegs];
164
165 // Set up our stack model to match the incoming registers to MBB.
166 void setupBlockStack();
167
168 // Shuffle live registers to match the expectations of successor blocks.
169 void finishBlockStack();
170
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack() const {
173 dbgs() << "Stack contents:";
174 for (unsigned i = 0; i != StackTop; ++i) {
175 dbgs() << " FP" << Stack[i];
176 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
177 }
178 }
179#endif
180
181 /// getSlot - Return the stack slot number a particular register number is
182 /// in.
183 unsigned getSlot(unsigned RegNo) const {
184 assert(RegNo < NumFPRegs && "Regno out of range!");
185 return RegMap[RegNo];
186 }
187
188 /// isLive - Is RegNo currently live in the stack?
189 bool isLive(unsigned RegNo) const {
190 unsigned Slot = getSlot(RegNo);
191 return Slot < StackTop && Stack[Slot] == RegNo;
192 }
193
194 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
195 unsigned getStackEntry(unsigned STi) const {
196 if (STi >= StackTop)
197 report_fatal_error("Access past stack top!");
198 return Stack[StackTop-1-STi];
199 }
200
201 /// getSTReg - Return the X86::ST(i) register which contains the specified
202 /// FP<RegNo> register.
203 unsigned getSTReg(unsigned RegNo) const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
205 }
206
207 // pushReg - Push the specified FP<n> register onto the stack.
208 void pushReg(unsigned Reg) {
209 assert(Reg < NumFPRegs && "Register number out of range!");
210 if (StackTop >= 8)
211 report_fatal_error("Stack overflow!");
212 Stack[StackTop] = Reg;
213 RegMap[Reg] = StackTop++;
214 }
215
216 // popReg - Pop a register from the stack.
217 void popReg() {
218 if (StackTop == 0)
219 report_fatal_error("Cannot pop empty stack!");
220 RegMap[Stack[--StackTop]] = ~0; // Update state
221 }
222
223 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
224 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
225 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
226 if (isAtTop(RegNo)) return;
227
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
230
231 // Swap the slots the regs are in.
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
233
234 // Swap stack slot contents.
235 if (RegMap[RegOnTop] >= StackTop)
236 report_fatal_error("Access past stack top!");
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
238
239 // Emit an fxch to update the runtime processors version of the state.
240 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
241 ++NumFXCH;
242 }
243
244 void duplicateToTop(unsigned RegNo, unsigned AsReg,
246 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
247 unsigned STReg = getSTReg(RegNo);
248 pushReg(AsReg); // New register on top of stack
249
250 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
251 }
252
253 /// popStackAfter - Pop the current value off of the top of the FP stack
254 /// after the specified instruction.
255 void popStackAfter(MachineBasicBlock::iterator &I);
256
257 /// freeStackSlotAfter - Free the specified register from the register
258 /// stack, so that it is no longer in a register. If the register is
259 /// currently at the top of the stack, we just pop the current instruction,
260 /// otherwise we store the current top-of-stack into the specified slot,
261 /// then pop the top of stack.
262 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
263
264 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
265 /// instruction.
267 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
268
269 /// Adjust the live registers to be the set in Mask.
270 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
271
272 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
273 /// st(0), FP reg FixStack[1] is st(1) etc.
274 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
276
277 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
278
279 void handleCall(MachineBasicBlock::iterator &I);
280 void handleReturn(MachineBasicBlock::iterator &I);
281 void handleZeroArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFP(MachineBasicBlock::iterator &I);
283 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
284 void handleTwoArgFP(MachineBasicBlock::iterator &I);
285 void handleCompareFP(MachineBasicBlock::iterator &I);
286 void handleCondMovFP(MachineBasicBlock::iterator &I);
287 void handleSpecialFP(MachineBasicBlock::iterator &I);
288
289 // Check if a COPY instruction is using FP registers.
290 static bool isFPCopy(MachineInstr &MI) {
291 Register DstReg = MI.getOperand(0).getReg();
292 Register SrcReg = MI.getOperand(1).getReg();
293
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
296 }
297
298 void setKillFlags(MachineBasicBlock &MBB) const;
299 };
300}
301
302char FPS::ID = 0;
303
304INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
305 false, false)
309
311
312/// getFPReg - Return the X86::FPx register number for the specified operand.
313/// For example, this returns 3 for X86::FP3.
314static unsigned getFPReg(const MachineOperand &MO) {
315 assert(MO.isReg() && "Expected an FP register!");
316 Register Reg = MO.getReg();
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
318 return Reg - X86::FP0;
319}
320
321/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
322/// register references into FP stack references.
323///
324bool FPS::runOnMachineFunction(MachineFunction &MF) {
325 // We only need to run this pass if there are any FP registers used in this
326 // function. If it is all integer, there is nothing for us to do!
327 bool FPIsUsed = false;
328
329 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
330 const MachineRegisterInfo &MRI = MF.getRegInfo();
331 for (unsigned i = 0; i <= 6; ++i)
332 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
333 FPIsUsed = true;
334 break;
335 }
336
337 // Early exit.
338 if (!FPIsUsed) return false;
339
340 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
342
343 // Prepare cross-MBB liveness.
344 bundleCFGRecomputeKillFlags(MF);
345
346 StackTop = 0;
347
348 // Process the function in depth first order so that we process at least one
349 // of the predecessors for every reachable block in the function.
352
353 LiveBundle &Bundle =
354 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
355
356 // In regcall convention, some FP registers may not be passed through
357 // the stack, so they will need to be assigned to the stack first
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
359 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
360 // In the register calling convention, up to one FP argument could be
361 // saved in the first FP register.
362 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
363 // that the FP registers contain arguments.
364 // The actual value is passed in FP0.
365 // Here we fix the stack and mark FP0 as pre-assigned register.
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
368 Bundle.FixCount = 1;
369 Bundle.FixStack[0] = 0;
370 }
371
372 bool Changed = false;
373 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
374 Changed |= processBasicBlock(MF, *BB);
375
376 // Process any unreachable blocks in arbitrary order now.
377 if (MF.size() != Processed.size())
378 for (MachineBasicBlock &BB : MF)
379 if (Processed.insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
381
382 LiveBundles.clear();
383
384 return Changed;
385}
386
387/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
388/// live-out sets for the FP registers. Consistent means that the set of
389/// registers live-out from a block is identical to the live-in set of all
390/// successors. This is not enforced by the normal live-in lists since
391/// registers may be implicitly defined, or not used by all successors.
392void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
393 assert(LiveBundles.empty() && "Stale data in LiveBundles");
394 LiveBundles.resize(Bundles->getNumBundles());
395
396 // Gather the actual live-in masks for all MBBs.
397 for (MachineBasicBlock &MBB : MF) {
398 setKillFlags(MBB);
399
400 const unsigned Mask = calcLiveInMask(&MBB, false);
401 if (!Mask)
402 continue;
403 // Update MBB ingoing bundle mask.
404 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
405 }
406}
407
408/// processBasicBlock - Loop over all of the instructions in the basic block,
409/// transforming FP instructions into their stack form.
410///
411bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
412 bool Changed = false;
413 MBB = &BB;
414
415 setupBlockStack();
416
417 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
418 MachineInstr &MI = *I;
419 uint64_t Flags = MI.getDesc().TSFlags;
420
421 unsigned FPInstClass = Flags & X86II::FPTypeMask;
422 if (MI.isInlineAsm())
423 FPInstClass = X86II::SpecialFP;
424
425 if (MI.isCopy() && isFPCopy(MI))
426 FPInstClass = X86II::SpecialFP;
427
428 if (MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
430 FPInstClass = X86II::SpecialFP;
431
432 if (MI.isCall())
433 FPInstClass = X86II::SpecialFP;
434
435 // A fake_use with a floating point pseudo register argument that is
436 // killed must behave like any other floating point operation and pop
437 // the floating point stack (this is done in handleSpecialFP()).
438 // Fake_use is, however, unusual, in that sometimes its operand is not
439 // killed because a later instruction (probably a return) will use it.
440 // It is this instruction that will pop the stack.
441 // In this scenario we can safely remove the fake_use's operand
442 // (it is live anyway).
443 if (MI.isFakeUse()) {
444 const MachineOperand &MO = MI.getOperand(0);
445 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
446 if (MO.isKill())
447 FPInstClass = X86II::SpecialFP;
448 else
449 MI.removeOperand(0);
450 }
451 }
452
453 if (FPInstClass == X86II::NotFP)
454 continue; // Efficiently ignore non-fp insts!
455
456 MachineInstr *PrevMI = nullptr;
457 if (I != BB.begin())
458 PrevMI = &*std::prev(I);
459
460 ++NumFP; // Keep track of # of pseudo instrs
461 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
462
463 // Get dead variables list now because the MI pointer may be deleted as part
464 // of processing!
466 for (const MachineOperand &MO : MI.operands())
467 if (MO.isReg() && MO.isDead())
468 DeadRegs.push_back(MO.getReg());
469
470 switch (FPInstClass) {
471 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
472 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
473 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
474 case X86II::TwoArgFP: handleTwoArgFP(I); break;
475 case X86II::CompareFP: handleCompareFP(I); break;
476 case X86II::CondMovFP: handleCondMovFP(I); break;
477 case X86II::SpecialFP: handleSpecialFP(I); break;
478 default: llvm_unreachable("Unknown FP Type!");
479 }
480
481 // Check to see if any of the values defined by this instruction are dead
482 // after definition. If so, pop them.
483 for (unsigned Reg : DeadRegs) {
484 // Check if Reg is live on the stack. An inline-asm register operand that
485 // is in the clobber list and marked dead might not be live on the stack.
486 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
487 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
488 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
489 freeStackSlotAfter(I, Reg-X86::FP0);
490 }
491 }
492
493 // Print out all of the instructions expanded to if -debug
494 LLVM_DEBUG({
495 MachineBasicBlock::iterator PrevI = PrevMI;
496 if (I == PrevI) {
497 dbgs() << "Just deleted pseudo instruction\n";
498 } else {
500 // Rewind to first instruction newly inserted.
501 while (Start != BB.begin() && std::prev(Start) != PrevI)
502 --Start;
503 dbgs() << "Inserted instructions:\n\t";
504 Start->print(dbgs());
505 while (++Start != std::next(I)) {
506 }
507 }
508 dumpStack();
509 });
510 (void)PrevMI;
511
512 Changed = true;
513 }
514
515 finishBlockStack();
516
517 return Changed;
518}
519
520/// setupBlockStack - Use the live bundles to set up our model of the stack
521/// to match predecessors' live out stack.
522void FPS::setupBlockStack() {
523 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
524 << " derived from " << MBB->getName() << ".\n");
525 StackTop = 0;
526 // Get the live-in bundle for MBB.
527 const LiveBundle &Bundle =
528 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
529
530 if (!Bundle.Mask) {
531 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
532 return;
533 }
534
535 // Depth-first iteration should ensure that we always have an assigned stack.
536 assert(Bundle.isFixed() && "Reached block before any predecessors");
537
538 // Push the fixed live-in registers.
539 for (unsigned i = Bundle.FixCount; i > 0; --i) {
540 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
541 << unsigned(Bundle.FixStack[i - 1]) << '\n');
542 pushReg(Bundle.FixStack[i-1]);
543 }
544
545 // Kill off unwanted live-ins. This can happen with a critical edge.
546 // FIXME: We could keep these live registers around as zombies. They may need
547 // to be revived at the end of a short block. It might save a few instrs.
548 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
549 adjustLiveRegs(Mask, MBB->begin());
550 LLVM_DEBUG(MBB->dump());
551}
552
553/// finishBlockStack - Revive live-outs that are implicitly defined out of
554/// MBB. Shuffle live registers to match the expected fixed stack of any
555/// predecessors, and ensure that all predecessors are expecting the same
556/// stack.
557void FPS::finishBlockStack() {
558 // The RET handling below takes care of return blocks for us.
559 if (MBB->succ_empty())
560 return;
561
562 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
563 << " derived from " << MBB->getName() << ".\n");
564
565 // Get MBB's live-out bundle.
566 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
567 LiveBundle &Bundle = LiveBundles[BundleIdx];
568
569 // We may need to kill and define some registers to match successors.
570 // FIXME: This can probably be combined with the shuffle below.
572 adjustLiveRegs(Bundle.Mask, Term);
573
574 if (!Bundle.Mask) {
575 LLVM_DEBUG(dbgs() << "No live-outs.\n");
576 return;
577 }
578
579 // Has the stack order been fixed yet?
580 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
581 if (Bundle.isFixed()) {
582 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
583 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
584 } else {
585 // Not fixed yet, we get to choose.
586 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
587 Bundle.FixCount = StackTop;
588 for (unsigned i = 0; i < StackTop; ++i)
589 Bundle.FixStack[i] = getStackEntry(i);
590 }
591}
592
593
594//===----------------------------------------------------------------------===//
595// Efficient Lookup Table Support
596//===----------------------------------------------------------------------===//
597
598namespace {
599 struct TableEntry {
600 uint16_t from;
601 uint16_t to;
602 bool operator<(const TableEntry &TE) const { return from < TE.from; }
603 friend bool operator<(const TableEntry &TE, unsigned V) {
604 return TE.from < V;
605 }
606 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
607 const TableEntry &TE) {
608 return V < TE.from;
609 }
610 };
611}
612
613static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
614 const TableEntry *I = llvm::lower_bound(Table, Opcode);
615 if (I != Table.end() && I->from == Opcode)
616 return I->to;
617 return -1;
618}
619
620#ifdef NDEBUG
621#define ASSERT_SORTED(TABLE)
622#else
623#define ASSERT_SORTED(TABLE) \
624 { \
625 static std::atomic<bool> TABLE##Checked(false); \
626 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
627 assert(is_sorted(TABLE) && \
628 "All lookup tables must be sorted for efficient access!"); \
629 TABLE##Checked.store(true, std::memory_order_relaxed); \
630 } \
631 }
632#endif
633
634//===----------------------------------------------------------------------===//
635// Register File -> Register Stack Mapping Methods
636//===----------------------------------------------------------------------===//
637
638// OpcodeTable - Sorted map of register instructions to their stack version.
639// The first element is an register file pseudo instruction, the second is the
640// concrete X86 instruction which uses the register stack.
641//
642static const TableEntry OpcodeTable[] = {
643 { X86::ABS_Fp32 , X86::ABS_F },
644 { X86::ABS_Fp64 , X86::ABS_F },
645 { X86::ABS_Fp80 , X86::ABS_F },
646 { X86::ADD_Fp32m , X86::ADD_F32m },
647 { X86::ADD_Fp64m , X86::ADD_F64m },
648 { X86::ADD_Fp64m32 , X86::ADD_F32m },
649 { X86::ADD_Fp80m32 , X86::ADD_F32m },
650 { X86::ADD_Fp80m64 , X86::ADD_F64m },
651 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
652 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
653 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
654 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
655 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
656 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
657 { X86::CHS_Fp32 , X86::CHS_F },
658 { X86::CHS_Fp64 , X86::CHS_F },
659 { X86::CHS_Fp80 , X86::CHS_F },
660 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
661 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
662 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
663 { X86::CMOVB_Fp32 , X86::CMOVB_F },
664 { X86::CMOVB_Fp64 , X86::CMOVB_F },
665 { X86::CMOVB_Fp80 , X86::CMOVB_F },
666 { X86::CMOVE_Fp32 , X86::CMOVE_F },
667 { X86::CMOVE_Fp64 , X86::CMOVE_F },
668 { X86::CMOVE_Fp80 , X86::CMOVE_F },
669 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
670 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
671 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
672 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
673 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
674 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
675 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
676 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
677 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
678 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
679 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
680 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
681 { X86::CMOVP_Fp32 , X86::CMOVP_F },
682 { X86::CMOVP_Fp64 , X86::CMOVP_F },
683 { X86::CMOVP_Fp80 , X86::CMOVP_F },
684 { X86::COM_FpIr32 , X86::COM_FIr },
685 { X86::COM_FpIr64 , X86::COM_FIr },
686 { X86::COM_FpIr80 , X86::COM_FIr },
687 { X86::COM_Fpr32 , X86::COM_FST0r },
688 { X86::COM_Fpr64 , X86::COM_FST0r },
689 { X86::COM_Fpr80 , X86::COM_FST0r },
690 { X86::DIVR_Fp32m , X86::DIVR_F32m },
691 { X86::DIVR_Fp64m , X86::DIVR_F64m },
692 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
693 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
694 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
695 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
696 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
697 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
698 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
699 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
700 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
701 { X86::DIV_Fp32m , X86::DIV_F32m },
702 { X86::DIV_Fp64m , X86::DIV_F64m },
703 { X86::DIV_Fp64m32 , X86::DIV_F32m },
704 { X86::DIV_Fp80m32 , X86::DIV_F32m },
705 { X86::DIV_Fp80m64 , X86::DIV_F64m },
706 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
707 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
708 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
709 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
710 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
711 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
712 { X86::ILD_Fp16m32 , X86::ILD_F16m },
713 { X86::ILD_Fp16m64 , X86::ILD_F16m },
714 { X86::ILD_Fp16m80 , X86::ILD_F16m },
715 { X86::ILD_Fp32m32 , X86::ILD_F32m },
716 { X86::ILD_Fp32m64 , X86::ILD_F32m },
717 { X86::ILD_Fp32m80 , X86::ILD_F32m },
718 { X86::ILD_Fp64m32 , X86::ILD_F64m },
719 { X86::ILD_Fp64m64 , X86::ILD_F64m },
720 { X86::ILD_Fp64m80 , X86::ILD_F64m },
721 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
722 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
723 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
724 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
725 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
726 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
727 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
728 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
729 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
730 { X86::IST_Fp16m32 , X86::IST_F16m },
731 { X86::IST_Fp16m64 , X86::IST_F16m },
732 { X86::IST_Fp16m80 , X86::IST_F16m },
733 { X86::IST_Fp32m32 , X86::IST_F32m },
734 { X86::IST_Fp32m64 , X86::IST_F32m },
735 { X86::IST_Fp32m80 , X86::IST_F32m },
736 { X86::IST_Fp64m32 , X86::IST_FP64m },
737 { X86::IST_Fp64m64 , X86::IST_FP64m },
738 { X86::IST_Fp64m80 , X86::IST_FP64m },
739 { X86::LD_Fp032 , X86::LD_F0 },
740 { X86::LD_Fp064 , X86::LD_F0 },
741 { X86::LD_Fp080 , X86::LD_F0 },
742 { X86::LD_Fp132 , X86::LD_F1 },
743 { X86::LD_Fp164 , X86::LD_F1 },
744 { X86::LD_Fp180 , X86::LD_F1 },
745 { X86::LD_Fp32m , X86::LD_F32m },
746 { X86::LD_Fp32m64 , X86::LD_F32m },
747 { X86::LD_Fp32m80 , X86::LD_F32m },
748 { X86::LD_Fp64m , X86::LD_F64m },
749 { X86::LD_Fp64m80 , X86::LD_F64m },
750 { X86::LD_Fp80m , X86::LD_F80m },
751 { X86::MUL_Fp32m , X86::MUL_F32m },
752 { X86::MUL_Fp64m , X86::MUL_F64m },
753 { X86::MUL_Fp64m32 , X86::MUL_F32m },
754 { X86::MUL_Fp80m32 , X86::MUL_F32m },
755 { X86::MUL_Fp80m64 , X86::MUL_F64m },
756 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
757 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
758 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
759 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
760 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
761 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
762 { X86::SQRT_Fp32 , X86::SQRT_F },
763 { X86::SQRT_Fp64 , X86::SQRT_F },
764 { X86::SQRT_Fp80 , X86::SQRT_F },
765 { X86::ST_Fp32m , X86::ST_F32m },
766 { X86::ST_Fp64m , X86::ST_F64m },
767 { X86::ST_Fp64m32 , X86::ST_F32m },
768 { X86::ST_Fp80m32 , X86::ST_F32m },
769 { X86::ST_Fp80m64 , X86::ST_F64m },
770 { X86::ST_FpP80m , X86::ST_FP80m },
771 { X86::SUBR_Fp32m , X86::SUBR_F32m },
772 { X86::SUBR_Fp64m , X86::SUBR_F64m },
773 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
774 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
775 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
776 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
777 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
778 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
779 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
780 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
781 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
782 { X86::SUB_Fp32m , X86::SUB_F32m },
783 { X86::SUB_Fp64m , X86::SUB_F64m },
784 { X86::SUB_Fp64m32 , X86::SUB_F32m },
785 { X86::SUB_Fp80m32 , X86::SUB_F32m },
786 { X86::SUB_Fp80m64 , X86::SUB_F64m },
787 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
788 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
789 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
790 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
791 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
792 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
793 { X86::TST_Fp32 , X86::TST_F },
794 { X86::TST_Fp64 , X86::TST_F },
795 { X86::TST_Fp80 , X86::TST_F },
796 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
797 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
798 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
799 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
800 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
801 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
802 { X86::XAM_Fp32 , X86::XAM_F },
803 { X86::XAM_Fp64 , X86::XAM_F },
804 { X86::XAM_Fp80 , X86::XAM_F },
805};
806
807static unsigned getConcreteOpcode(unsigned Opcode) {
809 int Opc = Lookup(OpcodeTable, Opcode);
810 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
811 return Opc;
812}
813
814//===----------------------------------------------------------------------===//
815// Helper Methods
816//===----------------------------------------------------------------------===//
817
818// PopTable - Sorted map of instructions to their popping version. The first
819// element is an instruction, the second is the version which pops.
820//
821static const TableEntry PopTable[] = {
822 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
823
824 { X86::COMP_FST0r, X86::FCOMPP },
825 { X86::COM_FIr , X86::COM_FIPr },
826 { X86::COM_FST0r , X86::COMP_FST0r },
827
828 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
829 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
830
831 { X86::IST_F16m , X86::IST_FP16m },
832 { X86::IST_F32m , X86::IST_FP32m },
833
834 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
835
836 { X86::ST_F32m , X86::ST_FP32m },
837 { X86::ST_F64m , X86::ST_FP64m },
838 { X86::ST_Frr , X86::ST_FPrr },
839
840 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
841 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
842
843 { X86::UCOM_FIr , X86::UCOM_FIPr },
844
845 { X86::UCOM_FPr , X86::UCOM_FPPr },
846 { X86::UCOM_Fr , X86::UCOM_FPr },
847};
848
850 if (const MachineOperand *MO =
851 MI.findRegisterDefOperand(X86::FPSW, /*TRI=*/nullptr))
852 if (!MO->isDead())
853 return true;
854 return false;
855}
856
859 MachineBasicBlock &MBB = *I->getParent();
860 while (++I != MBB.end()) {
861 MachineInstr &MI = *I;
863 return I;
864 }
865 return MBB.end();
866}
867
868/// popStackAfter - Pop the current value off of the top of the FP stack after
869/// the specified instruction. This attempts to be sneaky and combine the pop
870/// into the instruction itself if possible. The iterator is left pointing to
871/// the last instruction, be it a new pop instruction inserted, or the old
872/// instruction if it was modified in place.
873///
874void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
875 MachineInstr &MI = *I;
876 const DebugLoc &dl = MI.getDebugLoc();
878
879 popReg();
880
881 // Check to see if there is a popping version of this instruction...
882 int Opcode = Lookup(PopTable, I->getOpcode());
883 if (Opcode != -1) {
884 I->setDesc(TII->get(Opcode));
885 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
886 I->removeOperand(0);
887 MI.dropDebugNumber();
888 } else { // Insert an explicit pop
889 // If this instruction sets FPSW, which is read in following instruction,
890 // insert pop after that reader.
892 MachineBasicBlock &MBB = *MI.getParent();
894 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, /*TRI=*/nullptr))
895 I = Next;
896 }
897 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
898 }
899}
900
901/// freeStackSlotAfter - Free the specified register from the register stack, so
902/// that it is no longer in a register. If the register is currently at the top
903/// of the stack, we just pop the current instruction, otherwise we store the
904/// current top-of-stack into the specified slot, then pop the top of stack.
905void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
906 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
907 popStackAfter(I);
908 return;
909 }
910
911 // Otherwise, store the top of stack into the dead slot, killing the operand
912 // without having to add in an explicit xchg then pop.
913 //
914 I = freeStackSlotBefore(++I, FPRegNo);
915}
916
917/// freeStackSlotBefore - Free the specified register without trying any
918/// folding.
920FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
921 unsigned STReg = getSTReg(FPRegNo);
922 unsigned OldSlot = getSlot(FPRegNo);
923 unsigned TopReg = Stack[StackTop-1];
924 Stack[OldSlot] = TopReg;
925 RegMap[TopReg] = OldSlot;
926 RegMap[FPRegNo] = ~0;
927 Stack[--StackTop] = ~0;
928 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
929 .addReg(STReg)
930 .getInstr();
931}
932
933/// adjustLiveRegs - Kill and revive registers such that exactly the FP
934/// registers with a bit in Mask are live.
935void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
936 unsigned Defs = Mask;
937 unsigned Kills = 0;
938 for (unsigned i = 0; i < StackTop; ++i) {
939 unsigned RegNo = Stack[i];
940 if (!(Defs & (1 << RegNo)))
941 // This register is live, but we don't want it.
942 Kills |= (1 << RegNo);
943 else
944 // We don't need to imp-def this live register.
945 Defs &= ~(1 << RegNo);
946 }
947 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
948
949 // Produce implicit-defs for free by using killed registers.
950 while (Kills && Defs) {
951 unsigned KReg = llvm::countr_zero(Kills);
952 unsigned DReg = llvm::countr_zero(Defs);
953 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
954 << "\n");
955 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
956 std::swap(RegMap[KReg], RegMap[DReg]);
957 Kills &= ~(1 << KReg);
958 Defs &= ~(1 << DReg);
959 }
960
961 // Kill registers by popping.
962 if (Kills && I != MBB->begin()) {
963 MachineBasicBlock::iterator I2 = std::prev(I);
964 while (StackTop) {
965 unsigned KReg = getStackEntry(0);
966 if (!(Kills & (1 << KReg)))
967 break;
968 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
969 popStackAfter(I2);
970 Kills &= ~(1 << KReg);
971 }
972 }
973
974 // Manually kill the rest.
975 while (Kills) {
976 unsigned KReg = llvm::countr_zero(Kills);
977 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
978 freeStackSlotBefore(I, KReg);
979 Kills &= ~(1 << KReg);
980 }
981
982 // Load zeros for all the imp-defs.
983 while(Defs) {
984 unsigned DReg = llvm::countr_zero(Defs);
985 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
986 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
987 pushReg(DReg);
988 Defs &= ~(1 << DReg);
989 }
990
991 // Now we should have the correct registers live.
992 LLVM_DEBUG(dumpStack());
993 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
994}
995
996/// shuffleStackTop - emit fxch instructions before I to shuffle the top
997/// FixCount entries into the order given by FixStack.
998/// FIXME: Is there a better algorithm than insertion sort?
999void FPS::shuffleStackTop(const unsigned char *FixStack,
1000 unsigned FixCount,
1002 // Move items into place, starting from the desired stack bottom.
1003 while (FixCount--) {
1004 // Old register at position FixCount.
1005 unsigned OldReg = getStackEntry(FixCount);
1006 // Desired register at position FixCount.
1007 unsigned Reg = FixStack[FixCount];
1008 if (Reg == OldReg)
1009 continue;
1010 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
1011 moveToTop(Reg, I);
1012 if (FixCount > 0)
1013 moveToTop(OldReg, I);
1014 }
1015 LLVM_DEBUG(dumpStack());
1016}
1017
1018
1019//===----------------------------------------------------------------------===//
1020// Instruction transformation implementation
1021//===----------------------------------------------------------------------===//
1022
1023void FPS::handleCall(MachineBasicBlock::iterator &I) {
1024 MachineInstr &MI = *I;
1025 unsigned STReturns = 0;
1026
1027 bool ClobbersFPStack = false;
1028 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1029 MachineOperand &Op = MI.getOperand(i);
1030 // Check if this call clobbers the FP stack.
1031 // is sufficient.
1032 if (Op.isRegMask()) {
1033 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1034#ifndef NDEBUG
1035 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1036 for (unsigned i = 1; i != 8; ++i)
1037 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1038 "Inconsistent FP register clobber");
1039#endif
1040
1041 if (ClobbersFP0)
1042 ClobbersFPStack = true;
1043 }
1044
1045 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1046 continue;
1047
1048 assert(Op.isImplicit() && "Expected implicit def/use");
1049
1050 if (Op.isDef())
1051 STReturns |= 1 << getFPReg(Op);
1052
1053 // Remove the operand so that later passes don't see it.
1054 MI.removeOperand(i);
1055 --i;
1056 --e;
1057 }
1058
1059 // Most calls should have a regmask that clobbers the FP registers. If it
1060 // isn't present then the register allocator didn't spill the FP registers
1061 // so they are still on the stack.
1062 assert((ClobbersFPStack || STReturns == 0) &&
1063 "ST returns without FP stack clobber");
1064 if (!ClobbersFPStack)
1065 return;
1066
1067 unsigned N = llvm::countr_one(STReturns);
1068
1069 // FP registers used for function return must be consecutive starting at
1070 // FP0
1071 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1072
1073 // Reset the FP Stack - It is required because of possible leftovers from
1074 // passed arguments. The caller should assume that the FP stack is
1075 // returned empty (unless the callee returns values on FP stack).
1076 while (StackTop > 0)
1077 popReg();
1078
1079 for (unsigned I = 0; I < N; ++I)
1080 pushReg(N - I - 1);
1081
1082 // If this call has been modified, drop all variable values defined by it.
1083 // We can't track them once they've been stackified.
1084 if (STReturns)
1085 I->dropDebugNumber();
1086}
1087
1088/// If RET has an FP register use operand, pass the first one in ST(0) and
1089/// the second one in ST(1).
1090void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1091 MachineInstr &MI = *I;
1092
1093 // Find the register operands.
1094 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1095 unsigned LiveMask = 0;
1096
1097 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1098 MachineOperand &Op = MI.getOperand(i);
1099 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1100 continue;
1101 // FP Register uses must be kills unless there are two uses of the same
1102 // register, in which case only one will be a kill.
1103 assert(Op.isUse() &&
1104 (Op.isKill() || // Marked kill.
1105 getFPReg(Op) == FirstFPRegOp || // Second instance.
1106 MI.killsRegister(Op.getReg(),
1107 /*TRI=*/nullptr)) && // Later use is marked kill.
1108 "Ret only defs operands, and values aren't live beyond it");
1109
1110 if (FirstFPRegOp == ~0U)
1111 FirstFPRegOp = getFPReg(Op);
1112 else {
1113 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1114 SecondFPRegOp = getFPReg(Op);
1115 }
1116 LiveMask |= (1 << getFPReg(Op));
1117
1118 // Remove the operand so that later passes don't see it.
1119 MI.removeOperand(i);
1120 --i;
1121 --e;
1122 }
1123
1124 // We may have been carrying spurious live-ins, so make sure only the
1125 // returned registers are left live.
1126 adjustLiveRegs(LiveMask, MI);
1127 if (!LiveMask) return; // Quick check to see if any are possible.
1128
1129 // There are only four possibilities here:
1130 // 1) we are returning a single FP value. In this case, it has to be in
1131 // ST(0) already, so just declare success by removing the value from the
1132 // FP Stack.
1133 if (SecondFPRegOp == ~0U) {
1134 // Assert that the top of stack contains the right FP register.
1135 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1136 "Top of stack not the right register for RET!");
1137
1138 // Ok, everything is good, mark the value as not being on the stack
1139 // anymore so that our assertion about the stack being empty at end of
1140 // block doesn't fire.
1141 StackTop = 0;
1142 return;
1143 }
1144
1145 // Otherwise, we are returning two values:
1146 // 2) If returning the same value for both, we only have one thing in the FP
1147 // stack. Consider: RET FP1, FP1
1148 if (StackTop == 1) {
1149 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1150 "Stack misconfiguration for RET!");
1151
1152 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1153 // register to hold it.
1154 unsigned NewReg = ScratchFPReg;
1155 duplicateToTop(FirstFPRegOp, NewReg, MI);
1156 FirstFPRegOp = NewReg;
1157 }
1158
1159 /// Okay we know we have two different FPx operands now:
1160 assert(StackTop == 2 && "Must have two values live!");
1161
1162 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1163 /// in ST(1). In this case, emit an fxch.
1164 if (getStackEntry(0) == SecondFPRegOp) {
1165 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1166 moveToTop(FirstFPRegOp, MI);
1167 }
1168
1169 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1170 /// ST(1). Just remove both from our understanding of the stack and return.
1171 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1172 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1173 StackTop = 0;
1174}
1175
1176/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1177///
1178void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1179 MachineInstr &MI = *I;
1180 unsigned DestReg = getFPReg(MI.getOperand(0));
1181
1182 // Change from the pseudo instruction to the concrete instruction.
1183 MI.removeOperand(0); // Remove the explicit ST(0) operand
1184 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1185 MI.addOperand(
1186 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1187
1188 // Result gets pushed on the stack.
1189 pushReg(DestReg);
1190
1191 MI.dropDebugNumber();
1192}
1193
1194/// handleOneArgFP - fst <mem>, ST(0)
1195///
1196void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1197 MachineInstr &MI = *I;
1198 unsigned NumOps = MI.getDesc().getNumOperands();
1199 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1200 "Can only handle fst* & ftst instructions!");
1201
1202 // Is this the last use of the source register?
1203 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1204 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1205
1206 // FISTP64m is strange because there isn't a non-popping versions.
1207 // If we have one _and_ we don't want to pop the operand, duplicate the value
1208 // on the stack instead of moving it. This ensure that popping the value is
1209 // always ok.
1210 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1211 //
1212 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1213 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1214 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1215 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1216 MI.getOpcode() == X86::IST_Fp64m64 ||
1217 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1218 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1219 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1220 MI.getOpcode() == X86::IST_Fp64m80 ||
1221 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1222 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1223 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1224 MI.getOpcode() == X86::ST_FpP80m)) {
1225 duplicateToTop(Reg, ScratchFPReg, I);
1226 } else {
1227 moveToTop(Reg, I); // Move to the top of the stack...
1228 }
1229
1230 // Convert from the pseudo instruction to the concrete instruction.
1231 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1232 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1233 MI.addOperand(
1234 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1235
1236 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1237 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1238 MI.getOpcode() == X86::ST_FP80m) {
1239 if (StackTop == 0)
1240 report_fatal_error("Stack empty??");
1241 --StackTop;
1242 } else if (KillsSrc) { // Last use of operand?
1243 popStackAfter(I);
1244 }
1245
1246 MI.dropDebugNumber();
1247}
1248
1249
1250/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1251/// replace the value with a newly computed value. These instructions may have
1252/// non-fp operands after their FP operands.
1253///
1254/// Examples:
1255/// R1 = fchs R2
1256/// R1 = fadd R2, [mem]
1257///
1258void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1259 MachineInstr &MI = *I;
1260#ifndef NDEBUG
1261 unsigned NumOps = MI.getDesc().getNumOperands();
1262 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1263#endif
1264
1265 // Is this the last use of the source register?
1266 unsigned Reg = getFPReg(MI.getOperand(1));
1267 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1268
1269 if (KillsSrc) {
1270 // If this is the last use of the source register, just make sure it's on
1271 // the top of the stack.
1272 moveToTop(Reg, I);
1273 if (StackTop == 0)
1274 report_fatal_error("Stack cannot be empty!");
1275 --StackTop;
1276 pushReg(getFPReg(MI.getOperand(0)));
1277 } else {
1278 // If this is not the last use of the source register, _copy_ it to the top
1279 // of the stack.
1280 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1281 }
1282
1283 // Change from the pseudo instruction to the concrete instruction.
1284 MI.removeOperand(1); // Drop the source operand.
1285 MI.removeOperand(0); // Drop the destination operand.
1286 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1287 MI.dropDebugNumber();
1288}
1289
1290
1291//===----------------------------------------------------------------------===//
1292// Define tables of various ways to map pseudo instructions
1293//
1294
1295// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1296static const TableEntry ForwardST0Table[] = {
1297 { X86::ADD_Fp32 , X86::ADD_FST0r },
1298 { X86::ADD_Fp64 , X86::ADD_FST0r },
1299 { X86::ADD_Fp80 , X86::ADD_FST0r },
1300 { X86::DIV_Fp32 , X86::DIV_FST0r },
1301 { X86::DIV_Fp64 , X86::DIV_FST0r },
1302 { X86::DIV_Fp80 , X86::DIV_FST0r },
1303 { X86::MUL_Fp32 , X86::MUL_FST0r },
1304 { X86::MUL_Fp64 , X86::MUL_FST0r },
1305 { X86::MUL_Fp80 , X86::MUL_FST0r },
1306 { X86::SUB_Fp32 , X86::SUB_FST0r },
1307 { X86::SUB_Fp64 , X86::SUB_FST0r },
1308 { X86::SUB_Fp80 , X86::SUB_FST0r },
1309};
1310
1311// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1312static const TableEntry ReverseST0Table[] = {
1313 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1314 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1315 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1316 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1317 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1318 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1319 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1320 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1321 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1322 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1323 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1324 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1325};
1326
1327// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1328static const TableEntry ForwardSTiTable[] = {
1329 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1330 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1331 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1332 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1333 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1334 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1335 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1336 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1337 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1338 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1339 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1340 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1341};
1342
1343// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1344static const TableEntry ReverseSTiTable[] = {
1345 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1346 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1347 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1348 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1349 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1350 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1351 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1352 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1353 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1354 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1355 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1356 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1357};
1358
1359
1360/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1361/// instructions which need to be simplified and possibly transformed.
1362///
1363/// Result: ST(0) = fsub ST(0), ST(i)
1364/// ST(i) = fsub ST(0), ST(i)
1365/// ST(0) = fsubr ST(0), ST(i)
1366/// ST(i) = fsubr ST(0), ST(i)
1367///
1368void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1371 MachineInstr &MI = *I;
1372
1373 unsigned NumOperands = MI.getDesc().getNumOperands();
1374 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1375 unsigned Dest = getFPReg(MI.getOperand(0));
1376 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1377 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1378 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1379 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1380 const DebugLoc &dl = MI.getDebugLoc();
1381
1382 unsigned TOS = getStackEntry(0);
1383
1384 // One of our operands must be on the top of the stack. If neither is yet, we
1385 // need to move one.
1386 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1387 // We can choose to move either operand to the top of the stack. If one of
1388 // the operands is killed by this instruction, we want that one so that we
1389 // can update right on top of the old version.
1390 if (KillsOp0) {
1391 moveToTop(Op0, I); // Move dead operand to TOS.
1392 TOS = Op0;
1393 } else if (KillsOp1) {
1394 moveToTop(Op1, I);
1395 TOS = Op1;
1396 } else {
1397 // All of the operands are live after this instruction executes, so we
1398 // cannot update on top of any operand. Because of this, we must
1399 // duplicate one of the stack elements to the top. It doesn't matter
1400 // which one we pick.
1401 //
1402 duplicateToTop(Op0, Dest, I);
1403 Op0 = TOS = Dest;
1404 KillsOp0 = true;
1405 }
1406 } else if (!KillsOp0 && !KillsOp1) {
1407 // If we DO have one of our operands at the top of the stack, but we don't
1408 // have a dead operand, we must duplicate one of the operands to a new slot
1409 // on the stack.
1410 duplicateToTop(Op0, Dest, I);
1411 Op0 = TOS = Dest;
1412 KillsOp0 = true;
1413 }
1414
1415 // Now we know that one of our operands is on the top of the stack, and at
1416 // least one of our operands is killed by this instruction.
1417 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1418 "Stack conditions not set up right!");
1419
1420 // We decide which form to use based on what is on the top of the stack, and
1421 // which operand is killed by this instruction.
1422 ArrayRef<TableEntry> InstTable;
1423 bool isForward = TOS == Op0;
1424 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1425 if (updateST0) {
1426 if (isForward)
1427 InstTable = ForwardST0Table;
1428 else
1429 InstTable = ReverseST0Table;
1430 } else {
1431 if (isForward)
1432 InstTable = ForwardSTiTable;
1433 else
1434 InstTable = ReverseSTiTable;
1435 }
1436
1437 int Opcode = Lookup(InstTable, MI.getOpcode());
1438 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1439
1440 // NotTOS - The register which is not on the top of stack...
1441 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1442
1443 // Replace the old instruction with a new instruction
1444 MBB->remove(&*I++);
1445 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1446
1447 if (!MI.mayRaiseFPException())
1448 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1449
1450 // If both operands are killed, pop one off of the stack in addition to
1451 // overwriting the other one.
1452 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1453 assert(!updateST0 && "Should have updated other operand!");
1454 popStackAfter(I); // Pop the top of stack
1455 }
1456
1457 // Update stack information so that we know the destination register is now on
1458 // the stack.
1459 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1460 assert(UpdatedSlot < StackTop && Dest < 7);
1461 Stack[UpdatedSlot] = Dest;
1462 RegMap[Dest] = UpdatedSlot;
1463 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1464}
1465
1466/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1467/// register arguments and no explicit destinations.
1468///
1469void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1470 MachineInstr &MI = *I;
1471
1472 unsigned NumOperands = MI.getDesc().getNumOperands();
1473 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1474 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1475 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1476 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1477 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1478
1479 // Make sure the first operand is on the top of stack, the other one can be
1480 // anywhere.
1481 moveToTop(Op0, I);
1482
1483 // Change from the pseudo instruction to the concrete instruction.
1484 MI.getOperand(0).setReg(getSTReg(Op1));
1485 MI.removeOperand(1);
1486 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1487 MI.dropDebugNumber();
1488
1489 // If any of the operands are killed by this instruction, free them.
1490 if (KillsOp0) freeStackSlotAfter(I, Op0);
1491 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1492}
1493
1494/// handleCondMovFP - Handle two address conditional move instructions. These
1495/// instructions move a st(i) register to st(0) iff a condition is true. These
1496/// instructions require that the first operand is at the top of the stack, but
1497/// otherwise don't modify the stack at all.
1498void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1499 MachineInstr &MI = *I;
1500
1501 unsigned Op0 = getFPReg(MI.getOperand(0));
1502 unsigned Op1 = getFPReg(MI.getOperand(2));
1503 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1504
1505 // The first operand *must* be on the top of the stack.
1506 moveToTop(Op0, I);
1507
1508 // Change the second operand to the stack register that the operand is in.
1509 // Change from the pseudo instruction to the concrete instruction.
1510 MI.removeOperand(0);
1511 MI.removeOperand(1);
1512 MI.getOperand(0).setReg(getSTReg(Op1));
1513 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1514 MI.dropDebugNumber();
1515
1516 // If we kill the second operand, make sure to pop it from the stack.
1517 if (Op0 != Op1 && KillsOp1) {
1518 // Get this value off of the register stack.
1519 freeStackSlotAfter(I, Op1);
1520 }
1521}
1522
1523
1524/// handleSpecialFP - Handle special instructions which behave unlike other
1525/// floating point instructions. This is primarily intended for use by pseudo
1526/// instructions.
1527///
1528void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1529 MachineInstr &MI = *Inst;
1530
1531 if (MI.isCall()) {
1532 handleCall(Inst);
1533 return;
1534 }
1535
1536 if (MI.isReturn()) {
1537 handleReturn(Inst);
1538 return;
1539 }
1540
1541 switch (MI.getOpcode()) {
1542 default: llvm_unreachable("Unknown SpecialFP instruction!");
1543 case TargetOpcode::COPY: {
1544 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1545 const MachineOperand &MO1 = MI.getOperand(1);
1546 const MachineOperand &MO0 = MI.getOperand(0);
1547 bool KillsSrc = MI.killsRegister(MO1.getReg(), /*TRI=*/nullptr);
1548
1549 // FP <- FP copy.
1550 unsigned DstFP = getFPReg(MO0);
1551 unsigned SrcFP = getFPReg(MO1);
1552 assert(isLive(SrcFP) && "Cannot copy dead register");
1553 if (KillsSrc) {
1554 // If the input operand is killed, we can just change the owner of the
1555 // incoming stack slot into the result.
1556 unsigned Slot = getSlot(SrcFP);
1557 Stack[Slot] = DstFP;
1558 RegMap[DstFP] = Slot;
1559 } else {
1560 // For COPY we just duplicate the specified value to a new stack slot.
1561 // This could be made better, but would require substantial changes.
1562 duplicateToTop(SrcFP, DstFP, Inst);
1563 }
1564 break;
1565 }
1566
1567 case TargetOpcode::IMPLICIT_DEF: {
1568 // All FP registers must be explicitly defined, so load a 0 instead.
1569 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1570 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1571 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1572 pushReg(Reg);
1573 break;
1574 }
1575
1576 case TargetOpcode::INLINEASM:
1577 case TargetOpcode::INLINEASM_BR: {
1578 // The inline asm MachineInstr currently only *uses* FP registers for the
1579 // 'f' constraint. These should be turned into the current ST(x) register
1580 // in the machine instr.
1581 //
1582 // There are special rules for x87 inline assembly. The compiler must know
1583 // exactly how many registers are popped and pushed implicitly by the asm.
1584 // Otherwise it is not possible to restore the stack state after the inline
1585 // asm.
1586 //
1587 // There are 3 kinds of input operands:
1588 //
1589 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1590 // popped input operand must be in a fixed stack slot, and it is either
1591 // tied to an output operand, or in the clobber list. The MI has ST use
1592 // and def operands for these inputs.
1593 //
1594 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1595 // preserved by the inline asm. The fixed stack slots must be STn-STm
1596 // following the popped inputs. A fixed input operand cannot be tied to
1597 // an output or appear in the clobber list. The MI has ST use operands
1598 // and no defs for these inputs.
1599 //
1600 // 3. Preserved inputs. These inputs use the "f" constraint which is
1601 // represented as an FP register. The inline asm won't change these
1602 // stack slots.
1603 //
1604 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1605 // registers do not count as output operands. The inline asm changes the
1606 // stack as if it popped all the popped inputs and then pushed all the
1607 // output operands.
1608
1609 // Scan the assembly for ST registers used, defined and clobbered. We can
1610 // only tell clobbers from defs by looking at the asm descriptor.
1611 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1612 unsigned NumOps = 0;
1613 SmallSet<unsigned, 1> FRegIdx;
1614 unsigned RCID;
1615
1616 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1617 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1618 unsigned Flags = MI.getOperand(i).getImm();
1619 const InlineAsm::Flag F(Flags);
1620
1621 NumOps = F.getNumOperandRegisters();
1622 if (NumOps != 1)
1623 continue;
1624 const MachineOperand &MO = MI.getOperand(i + 1);
1625 if (!MO.isReg())
1626 continue;
1627 unsigned STReg = MO.getReg() - X86::FP0;
1628 if (STReg >= 8)
1629 continue;
1630
1631 // If the flag has a register class constraint, this must be an operand
1632 // with constraint "f". Record its index and continue.
1633 if (F.hasRegClassConstraint(RCID)) {
1634 FRegIdx.insert(i + 1);
1635 continue;
1636 }
1637
1638 switch (F.getKind()) {
1639 case InlineAsm::Kind::RegUse:
1640 STUses |= (1u << STReg);
1641 break;
1642 case InlineAsm::Kind::RegDef:
1643 case InlineAsm::Kind::RegDefEarlyClobber:
1644 STDefs |= (1u << STReg);
1645 break;
1646 case InlineAsm::Kind::Clobber:
1647 STClobbers |= (1u << STReg);
1648 break;
1649 default:
1650 break;
1651 }
1652 }
1653
1654 if (STUses && !isMask_32(STUses))
1655 MI.emitGenericError("fixed input regs must be last on the x87 stack");
1656 unsigned NumSTUses = llvm::countr_one(STUses);
1657
1658 // Defs must be contiguous from the stack top. ST0-STn.
1659 if (STDefs && !isMask_32(STDefs)) {
1660 MI.emitGenericError("output regs must be last on the x87 stack");
1661 STDefs = NextPowerOf2(STDefs) - 1;
1662 }
1663 unsigned NumSTDefs = llvm::countr_one(STDefs);
1664
1665 // So must the clobbered stack slots. ST0-STm, m >= n.
1666 if (STClobbers && !isMask_32(STDefs | STClobbers))
1667 MI.emitGenericError("clobbers must be last on the x87 stack");
1668
1669 // Popped inputs are the ones that are also clobbered or defined.
1670 unsigned STPopped = STUses & (STDefs | STClobbers);
1671 if (STPopped && !isMask_32(STPopped))
1672 MI.emitGenericError(
1673 "implicitly popped regs must be last on the x87 stack");
1674 unsigned NumSTPopped = llvm::countr_one(STPopped);
1675
1676 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1677 << NumSTPopped << ", and defines " << NumSTDefs
1678 << " regs.\n");
1679
1680#ifndef NDEBUG
1681 // If any input operand uses constraint "f", all output register
1682 // constraints must be early-clobber defs.
1683 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1684 if (FRegIdx.count(I)) {
1685 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1686 "Operands with constraint \"f\" cannot overlap with defs");
1687 }
1688#endif
1689
1690 // Collect all FP registers (register operands with constraints "t", "u",
1691 // and "f") to kill afer the instruction.
1692 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1693 for (const MachineOperand &Op : MI.operands()) {
1694 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1695 continue;
1696 unsigned FPReg = getFPReg(Op);
1697
1698 // If we kill this operand, make sure to pop it from the stack after the
1699 // asm. We just remember it for now, and pop them all off at the end in
1700 // a batch.
1701 if (Op.isUse() && Op.isKill())
1702 FPKills |= 1U << FPReg;
1703 }
1704
1705 // Do not include registers that are implicitly popped by defs/clobbers.
1706 FPKills &= ~(STDefs | STClobbers);
1707
1708 // Now we can rearrange the live registers to match what was requested.
1709 unsigned char STUsesArray[8];
1710
1711 for (unsigned I = 0; I < NumSTUses; ++I)
1712 STUsesArray[I] = I;
1713
1714 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1715 LLVM_DEBUG({
1716 dbgs() << "Before asm: ";
1717 dumpStack();
1718 });
1719
1720 // With the stack layout fixed, rewrite the FP registers.
1721 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1722 MachineOperand &Op = MI.getOperand(i);
1723 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1724 continue;
1725
1726 unsigned FPReg = getFPReg(Op);
1727
1728 if (FRegIdx.count(i))
1729 // Operand with constraint "f".
1730 Op.setReg(getSTReg(FPReg));
1731 else
1732 // Operand with a single register class constraint ("t" or "u").
1733 Op.setReg(X86::ST0 + FPReg);
1734 }
1735
1736 // Simulate the inline asm popping its inputs and pushing its outputs.
1737 StackTop -= NumSTPopped;
1738
1739 for (unsigned i = 0; i < NumSTDefs; ++i)
1740 pushReg(NumSTDefs - i - 1);
1741
1742 // If this asm kills any FP registers (is the last use of them) we must
1743 // explicitly emit pop instructions for them. Do this now after the asm has
1744 // executed so that the ST(x) numbers are not off (which would happen if we
1745 // did this inline with operand rewriting).
1746 //
1747 // Note: this might be a non-optimal pop sequence. We might be able to do
1748 // better by trying to pop in stack order or something.
1749 while (FPKills) {
1750 unsigned FPReg = llvm::countr_zero(FPKills);
1751 if (isLive(FPReg))
1752 freeStackSlotAfter(Inst, FPReg);
1753 FPKills &= ~(1U << FPReg);
1754 }
1755
1756 // Don't delete the inline asm!
1757 return;
1758 }
1759
1760 // FAKE_USE must pop its register operand off the stack if it is killed,
1761 // because this constitutes the register's last use. If the operand
1762 // is not killed, it will have its last use later, so we leave it alone.
1763 // In either case we remove the operand so later passes don't see it.
1764 case TargetOpcode::FAKE_USE: {
1765 assert(MI.getNumExplicitOperands() == 1 &&
1766 "FAKE_USE must have exactly one operand");
1767 if (MI.getOperand(0).isKill()) {
1768 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
1769 }
1770 MI.removeOperand(0);
1771 return;
1772 }
1773 }
1774
1775 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1776
1777 // We want to leave I pointing to the previous instruction, but what if we
1778 // just erased the first instruction?
1779 if (Inst == MBB->begin()) {
1780 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1781 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1782 } else
1783 --Inst;
1784}
1785
1786void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1787 const TargetRegisterInfo &TRI =
1789 LiveRegUnits LPR(TRI);
1790
1791 LPR.addLiveOuts(MBB);
1792
1793 for (MachineInstr &MI : llvm::reverse(MBB)) {
1794 if (MI.isDebugInstr())
1795 continue;
1796
1797 std::bitset<8> Defs;
1799
1800 for (auto &MO : MI.operands()) {
1801 if (!MO.isReg())
1802 continue;
1803
1804 unsigned Reg = MO.getReg() - X86::FP0;
1805
1806 if (Reg >= 8)
1807 continue;
1808
1809 if (MO.isDef()) {
1810 Defs.set(Reg);
1811 if (LPR.available(MO.getReg()))
1812 MO.setIsDead();
1813 } else
1814 Uses.push_back(&MO);
1815 }
1816
1817 for (auto *MO : Uses)
1818 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1819 MO->setIsKill();
1820
1821 LPR.stepBackward(MI);
1822 }
1823}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static constexpr Register FPReg
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:94
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
Definition: X86BaseInfo.h:802
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
Definition: X86BaseInfo.h:784
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
Definition: X86BaseInfo.h:791
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
Definition: X86BaseInfo.h:786
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
Definition: X86BaseInfo.h:788
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
Definition: X86BaseInfo.h:798
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
Definition: X86BaseInfo.h:800
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
Definition: X86BaseInfo.h:795
@ AddrNumOperands
Definition: X86BaseInfo.h:36
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1978
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::pair< iterator, bool > insert(NodeRef N)