LLVM 17.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
38#include "llvm/CodeGen/Passes.h"
41#include "llvm/Config/llvm-config.h"
42#include "llvm/IR/InlineAsm.h"
44#include "llvm/Support/Debug.h"
48#include <algorithm>
49#include <bitset>
50using namespace llvm;
51
52#define DEBUG_TYPE "x86-codegen"
53
54STATISTIC(NumFXCH, "Number of fxch instructions inserted");
55STATISTIC(NumFP , "Number of floating point instructions");
56
57namespace {
58 const unsigned ScratchFPReg = 7;
59
60 struct FPS : public MachineFunctionPass {
61 static char ID;
62 FPS() : MachineFunctionPass(ID) {
63 // This is really only to keep valgrind quiet.
64 // The logic in isLive() is too much for it.
65 memset(Stack, 0, sizeof(Stack));
66 memset(RegMap, 0, sizeof(RegMap));
67 }
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesCFG();
75 }
76
77 bool runOnMachineFunction(MachineFunction &MF) override;
78
81 MachineFunctionProperties::Property::NoVRegs);
82 }
83
84 StringRef getPassName() const override { return "X86 FP Stackifier"; }
85
86 private:
87 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
88
89 // Two CFG edges are related if they leave the same block, or enter the same
90 // block. The transitive closure of an edge under this relation is a
91 // LiveBundle. It represents a set of CFG edges where the live FP stack
92 // registers must be allocated identically in the x87 stack.
93 //
94 // A LiveBundle is usually all the edges leaving a block, or all the edges
95 // entering a block, but it can contain more edges if critical edges are
96 // present.
97 //
98 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
99 // but the exact mapping of FP registers to stack slots is fixed later.
100 struct LiveBundle {
101 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
102 unsigned Mask = 0;
103
104 // Number of pre-assigned live registers in FixStack. This is 0 when the
105 // stack order has not yet been fixed.
106 unsigned FixCount = 0;
107
108 // Assigned stack order for live-in registers.
109 // FixStack[i] == getStackEntry(i) for all i < FixCount.
110 unsigned char FixStack[8];
111
112 LiveBundle() = default;
113
114 // Have the live registers been assigned a stack order yet?
115 bool isFixed() const { return !Mask || FixCount; }
116 };
117
118 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
119 // with no live FP registers.
120 SmallVector<LiveBundle, 8> LiveBundles;
121
122 // The edge bundle analysis provides indices into the LiveBundles vector.
123 EdgeBundles *Bundles = nullptr;
124
125 // Return a bitmask of FP registers in block's live-in list.
126 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
127 unsigned Mask = 0;
129 I != MBB->livein_end(); ) {
130 MCPhysReg Reg = I->PhysReg;
131 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
132 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
133 Mask |= 1 << (Reg - X86::FP0);
134 if (RemoveFPs) {
135 I = MBB->removeLiveIn(I);
136 continue;
137 }
138 }
139 ++I;
140 }
141 return Mask;
142 }
143
144 // Partition all the CFG edges into LiveBundles.
145 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
146
147 MachineBasicBlock *MBB = nullptr; // Current basic block
148
149 // The hardware keeps track of how many FP registers are live, so we have
150 // to model that exactly. Usually, each live register corresponds to an
151 // FP<n> register, but when dealing with calls, returns, and inline
152 // assembly, it is sometimes necessary to have live scratch registers.
153 unsigned Stack[8]; // FP<n> Registers in each stack slot...
154 unsigned StackTop = 0; // The current top of the FP stack.
155
156 enum {
157 NumFPRegs = 8 // Including scratch pseudo-registers.
158 };
159
160 // For each live FP<n> register, point to its Stack[] entry.
161 // The first entries correspond to FP0-FP6, the rest are scratch registers
162 // used when we need slightly different live registers than what the
163 // register allocator thinks.
164 unsigned RegMap[NumFPRegs];
165
166 // Set up our stack model to match the incoming registers to MBB.
167 void setupBlockStack();
168
169 // Shuffle live registers to match the expectations of successor blocks.
170 void finishBlockStack();
171
172#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
173 void dumpStack() const {
174 dbgs() << "Stack contents:";
175 for (unsigned i = 0; i != StackTop; ++i) {
176 dbgs() << " FP" << Stack[i];
177 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
178 }
179 }
180#endif
181
182 /// getSlot - Return the stack slot number a particular register number is
183 /// in.
184 unsigned getSlot(unsigned RegNo) const {
185 assert(RegNo < NumFPRegs && "Regno out of range!");
186 return RegMap[RegNo];
187 }
188
189 /// isLive - Is RegNo currently live in the stack?
190 bool isLive(unsigned RegNo) const {
191 unsigned Slot = getSlot(RegNo);
192 return Slot < StackTop && Stack[Slot] == RegNo;
193 }
194
195 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
196 unsigned getStackEntry(unsigned STi) const {
197 if (STi >= StackTop)
198 report_fatal_error("Access past stack top!");
199 return Stack[StackTop-1-STi];
200 }
201
202 /// getSTReg - Return the X86::ST(i) register which contains the specified
203 /// FP<RegNo> register.
204 unsigned getSTReg(unsigned RegNo) const {
205 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
206 }
207
208 // pushReg - Push the specified FP<n> register onto the stack.
209 void pushReg(unsigned Reg) {
210 assert(Reg < NumFPRegs && "Register number out of range!");
211 if (StackTop >= 8)
212 report_fatal_error("Stack overflow!");
213 Stack[StackTop] = Reg;
214 RegMap[Reg] = StackTop++;
215 }
216
217 // popReg - Pop a register from the stack.
218 void popReg() {
219 if (StackTop == 0)
220 report_fatal_error("Cannot pop empty stack!");
221 RegMap[Stack[--StackTop]] = ~0; // Update state
222 }
223
224 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
225 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
226 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
227 if (isAtTop(RegNo)) return;
228
229 unsigned STReg = getSTReg(RegNo);
230 unsigned RegOnTop = getStackEntry(0);
231
232 // Swap the slots the regs are in.
233 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
234
235 // Swap stack slot contents.
236 if (RegMap[RegOnTop] >= StackTop)
237 report_fatal_error("Access past stack top!");
238 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
239
240 // Emit an fxch to update the runtime processors version of the state.
241 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
242 ++NumFXCH;
243 }
244
245 void duplicateToTop(unsigned RegNo, unsigned AsReg,
247 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
248 unsigned STReg = getSTReg(RegNo);
249 pushReg(AsReg); // New register on top of stack
250
251 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
252 }
253
254 /// popStackAfter - Pop the current value off of the top of the FP stack
255 /// after the specified instruction.
256 void popStackAfter(MachineBasicBlock::iterator &I);
257
258 /// freeStackSlotAfter - Free the specified register from the register
259 /// stack, so that it is no longer in a register. If the register is
260 /// currently at the top of the stack, we just pop the current instruction,
261 /// otherwise we store the current top-of-stack into the specified slot,
262 /// then pop the top of stack.
263 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
264
265 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
266 /// instruction.
268 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
269
270 /// Adjust the live registers to be the set in Mask.
271 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
272
273 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
274 /// st(0), FP reg FixStack[1] is st(1) etc.
275 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
277
278 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
279
280 void handleCall(MachineBasicBlock::iterator &I);
281 void handleReturn(MachineBasicBlock::iterator &I);
282 void handleZeroArgFP(MachineBasicBlock::iterator &I);
283 void handleOneArgFP(MachineBasicBlock::iterator &I);
284 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
285 void handleTwoArgFP(MachineBasicBlock::iterator &I);
286 void handleCompareFP(MachineBasicBlock::iterator &I);
287 void handleCondMovFP(MachineBasicBlock::iterator &I);
288 void handleSpecialFP(MachineBasicBlock::iterator &I);
289
290 // Check if a COPY instruction is using FP registers.
291 static bool isFPCopy(MachineInstr &MI) {
292 Register DstReg = MI.getOperand(0).getReg();
293 Register SrcReg = MI.getOperand(1).getReg();
294
295 return X86::RFP80RegClass.contains(DstReg) ||
296 X86::RFP80RegClass.contains(SrcReg);
297 }
298
299 void setKillFlags(MachineBasicBlock &MBB) const;
300 };
301}
302
303char FPS::ID = 0;
304
305INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
306 false, false)
310
312
313/// getFPReg - Return the X86::FPx register number for the specified operand.
314/// For example, this returns 3 for X86::FP3.
315static unsigned getFPReg(const MachineOperand &MO) {
316 assert(MO.isReg() && "Expected an FP register!");
317 Register Reg = MO.getReg();
318 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
319 return Reg - X86::FP0;
320}
321
322/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
323/// register references into FP stack references.
324///
325bool FPS::runOnMachineFunction(MachineFunction &MF) {
326 // We only need to run this pass if there are any FP registers used in this
327 // function. If it is all integer, there is nothing for us to do!
328 bool FPIsUsed = false;
329
330 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
331 const MachineRegisterInfo &MRI = MF.getRegInfo();
332 for (unsigned i = 0; i <= 6; ++i)
333 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
334 FPIsUsed = true;
335 break;
336 }
337
338 // Early exit.
339 if (!FPIsUsed) return false;
340
341 Bundles = &getAnalysis<EdgeBundles>();
343
344 // Prepare cross-MBB liveness.
345 bundleCFGRecomputeKillFlags(MF);
346
347 StackTop = 0;
348
349 // Process the function in depth first order so that we process at least one
350 // of the predecessors for every reachable block in the function.
352 MachineBasicBlock *Entry = &MF.front();
353
354 LiveBundle &Bundle =
355 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
356
357 // In regcall convention, some FP registers may not be passed through
358 // the stack, so they will need to be assigned to the stack first
359 if ((Entry->getParent()->getFunction().getCallingConv() ==
360 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
361 // In the register calling convention, up to one FP argument could be
362 // saved in the first FP register.
363 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
364 // that the FP registers contain arguments.
365 // The actual value is passed in FP0.
366 // Here we fix the stack and mark FP0 as pre-assigned register.
367 assert((Bundle.Mask & 0xFE) == 0 &&
368 "Only FP0 could be passed as an argument");
369 Bundle.FixCount = 1;
370 Bundle.FixStack[0] = 0;
371 }
372
373 bool Changed = false;
374 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
375 Changed |= processBasicBlock(MF, *BB);
376
377 // Process any unreachable blocks in arbitrary order now.
378 if (MF.size() != Processed.size())
379 for (MachineBasicBlock &BB : MF)
380 if (Processed.insert(&BB).second)
381 Changed |= processBasicBlock(MF, BB);
382
383 LiveBundles.clear();
384
385 return Changed;
386}
387
388/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
389/// live-out sets for the FP registers. Consistent means that the set of
390/// registers live-out from a block is identical to the live-in set of all
391/// successors. This is not enforced by the normal live-in lists since
392/// registers may be implicitly defined, or not used by all successors.
393void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
394 assert(LiveBundles.empty() && "Stale data in LiveBundles");
395 LiveBundles.resize(Bundles->getNumBundles());
396
397 // Gather the actual live-in masks for all MBBs.
398 for (MachineBasicBlock &MBB : MF) {
399 setKillFlags(MBB);
400
401 const unsigned Mask = calcLiveInMask(&MBB, false);
402 if (!Mask)
403 continue;
404 // Update MBB ingoing bundle mask.
405 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
406 }
407}
408
409/// processBasicBlock - Loop over all of the instructions in the basic block,
410/// transforming FP instructions into their stack form.
411///
412bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
413 bool Changed = false;
414 MBB = &BB;
415
416 setupBlockStack();
417
418 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
419 MachineInstr &MI = *I;
420 uint64_t Flags = MI.getDesc().TSFlags;
421
422 unsigned FPInstClass = Flags & X86II::FPTypeMask;
423 if (MI.isInlineAsm())
424 FPInstClass = X86II::SpecialFP;
425
426 if (MI.isCopy() && isFPCopy(MI))
427 FPInstClass = X86II::SpecialFP;
428
429 if (MI.isImplicitDef() &&
430 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
431 FPInstClass = X86II::SpecialFP;
432
433 if (MI.isCall())
434 FPInstClass = X86II::SpecialFP;
435
436 if (FPInstClass == X86II::NotFP)
437 continue; // Efficiently ignore non-fp insts!
438
439 MachineInstr *PrevMI = nullptr;
440 if (I != BB.begin())
441 PrevMI = &*std::prev(I);
442
443 ++NumFP; // Keep track of # of pseudo instrs
444 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
445
446 // Get dead variables list now because the MI pointer may be deleted as part
447 // of processing!
449 for (const MachineOperand &MO : MI.operands())
450 if (MO.isReg() && MO.isDead())
451 DeadRegs.push_back(MO.getReg());
452
453 switch (FPInstClass) {
454 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
455 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
456 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
457 case X86II::TwoArgFP: handleTwoArgFP(I); break;
458 case X86II::CompareFP: handleCompareFP(I); break;
459 case X86II::CondMovFP: handleCondMovFP(I); break;
460 case X86II::SpecialFP: handleSpecialFP(I); break;
461 default: llvm_unreachable("Unknown FP Type!");
462 }
463
464 // Check to see if any of the values defined by this instruction are dead
465 // after definition. If so, pop them.
466 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
467 unsigned Reg = DeadRegs[i];
468 // Check if Reg is live on the stack. An inline-asm register operand that
469 // is in the clobber list and marked dead might not be live on the stack.
470 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
471 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
472 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
473 freeStackSlotAfter(I, Reg-X86::FP0);
474 }
475 }
476
477 // Print out all of the instructions expanded to if -debug
478 LLVM_DEBUG({
479 MachineBasicBlock::iterator PrevI = PrevMI;
480 if (I == PrevI) {
481 dbgs() << "Just deleted pseudo instruction\n";
482 } else {
484 // Rewind to first instruction newly inserted.
485 while (Start != BB.begin() && std::prev(Start) != PrevI)
486 --Start;
487 dbgs() << "Inserted instructions:\n\t";
488 Start->print(dbgs());
489 while (++Start != std::next(I)) {
490 }
491 }
492 dumpStack();
493 });
494 (void)PrevMI;
495
496 Changed = true;
497 }
498
499 finishBlockStack();
500
501 return Changed;
502}
503
504/// setupBlockStack - Use the live bundles to set up our model of the stack
505/// to match predecessors' live out stack.
506void FPS::setupBlockStack() {
507 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
508 << " derived from " << MBB->getName() << ".\n");
509 StackTop = 0;
510 // Get the live-in bundle for MBB.
511 const LiveBundle &Bundle =
512 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
513
514 if (!Bundle.Mask) {
515 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
516 return;
517 }
518
519 // Depth-first iteration should ensure that we always have an assigned stack.
520 assert(Bundle.isFixed() && "Reached block before any predecessors");
521
522 // Push the fixed live-in registers.
523 for (unsigned i = Bundle.FixCount; i > 0; --i) {
524 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
525 << unsigned(Bundle.FixStack[i - 1]) << '\n');
526 pushReg(Bundle.FixStack[i-1]);
527 }
528
529 // Kill off unwanted live-ins. This can happen with a critical edge.
530 // FIXME: We could keep these live registers around as zombies. They may need
531 // to be revived at the end of a short block. It might save a few instrs.
532 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
533 adjustLiveRegs(Mask, MBB->begin());
534 LLVM_DEBUG(MBB->dump());
535}
536
537/// finishBlockStack - Revive live-outs that are implicitly defined out of
538/// MBB. Shuffle live registers to match the expected fixed stack of any
539/// predecessors, and ensure that all predecessors are expecting the same
540/// stack.
541void FPS::finishBlockStack() {
542 // The RET handling below takes care of return blocks for us.
543 if (MBB->succ_empty())
544 return;
545
546 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
547 << " derived from " << MBB->getName() << ".\n");
548
549 // Get MBB's live-out bundle.
550 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
551 LiveBundle &Bundle = LiveBundles[BundleIdx];
552
553 // We may need to kill and define some registers to match successors.
554 // FIXME: This can probably be combined with the shuffle below.
556 adjustLiveRegs(Bundle.Mask, Term);
557
558 if (!Bundle.Mask) {
559 LLVM_DEBUG(dbgs() << "No live-outs.\n");
560 return;
561 }
562
563 // Has the stack order been fixed yet?
564 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
565 if (Bundle.isFixed()) {
566 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
567 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
568 } else {
569 // Not fixed yet, we get to choose.
570 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
571 Bundle.FixCount = StackTop;
572 for (unsigned i = 0; i < StackTop; ++i)
573 Bundle.FixStack[i] = getStackEntry(i);
574 }
575}
576
577
578//===----------------------------------------------------------------------===//
579// Efficient Lookup Table Support
580//===----------------------------------------------------------------------===//
581
582namespace {
583 struct TableEntry {
584 uint16_t from;
585 uint16_t to;
586 bool operator<(const TableEntry &TE) const { return from < TE.from; }
587 friend bool operator<(const TableEntry &TE, unsigned V) {
588 return TE.from < V;
589 }
590 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
591 const TableEntry &TE) {
592 return V < TE.from;
593 }
594 };
595}
596
597static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
598 const TableEntry *I = llvm::lower_bound(Table, Opcode);
599 if (I != Table.end() && I->from == Opcode)
600 return I->to;
601 return -1;
602}
603
604#ifdef NDEBUG
605#define ASSERT_SORTED(TABLE)
606#else
607#define ASSERT_SORTED(TABLE) \
608 { \
609 static std::atomic<bool> TABLE##Checked(false); \
610 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
611 assert(is_sorted(TABLE) && \
612 "All lookup tables must be sorted for efficient access!"); \
613 TABLE##Checked.store(true, std::memory_order_relaxed); \
614 } \
615 }
616#endif
617
618//===----------------------------------------------------------------------===//
619// Register File -> Register Stack Mapping Methods
620//===----------------------------------------------------------------------===//
621
622// OpcodeTable - Sorted map of register instructions to their stack version.
623// The first element is an register file pseudo instruction, the second is the
624// concrete X86 instruction which uses the register stack.
625//
626static const TableEntry OpcodeTable[] = {
627 { X86::ABS_Fp32 , X86::ABS_F },
628 { X86::ABS_Fp64 , X86::ABS_F },
629 { X86::ABS_Fp80 , X86::ABS_F },
630 { X86::ADD_Fp32m , X86::ADD_F32m },
631 { X86::ADD_Fp64m , X86::ADD_F64m },
632 { X86::ADD_Fp64m32 , X86::ADD_F32m },
633 { X86::ADD_Fp80m32 , X86::ADD_F32m },
634 { X86::ADD_Fp80m64 , X86::ADD_F64m },
635 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
636 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
637 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
638 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
639 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
640 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
641 { X86::CHS_Fp32 , X86::CHS_F },
642 { X86::CHS_Fp64 , X86::CHS_F },
643 { X86::CHS_Fp80 , X86::CHS_F },
644 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
645 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
646 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
647 { X86::CMOVB_Fp32 , X86::CMOVB_F },
648 { X86::CMOVB_Fp64 , X86::CMOVB_F },
649 { X86::CMOVB_Fp80 , X86::CMOVB_F },
650 { X86::CMOVE_Fp32 , X86::CMOVE_F },
651 { X86::CMOVE_Fp64 , X86::CMOVE_F },
652 { X86::CMOVE_Fp80 , X86::CMOVE_F },
653 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
654 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
655 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
656 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
657 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
658 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
659 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
660 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
661 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
662 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
663 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
664 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
665 { X86::CMOVP_Fp32 , X86::CMOVP_F },
666 { X86::CMOVP_Fp64 , X86::CMOVP_F },
667 { X86::CMOVP_Fp80 , X86::CMOVP_F },
668 { X86::COM_FpIr32 , X86::COM_FIr },
669 { X86::COM_FpIr64 , X86::COM_FIr },
670 { X86::COM_FpIr80 , X86::COM_FIr },
671 { X86::COM_Fpr32 , X86::COM_FST0r },
672 { X86::COM_Fpr64 , X86::COM_FST0r },
673 { X86::COM_Fpr80 , X86::COM_FST0r },
674 { X86::DIVR_Fp32m , X86::DIVR_F32m },
675 { X86::DIVR_Fp64m , X86::DIVR_F64m },
676 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
677 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
678 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
679 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
680 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
681 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
682 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
683 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
684 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
685 { X86::DIV_Fp32m , X86::DIV_F32m },
686 { X86::DIV_Fp64m , X86::DIV_F64m },
687 { X86::DIV_Fp64m32 , X86::DIV_F32m },
688 { X86::DIV_Fp80m32 , X86::DIV_F32m },
689 { X86::DIV_Fp80m64 , X86::DIV_F64m },
690 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
691 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
692 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
693 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
694 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
695 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
696 { X86::ILD_Fp16m32 , X86::ILD_F16m },
697 { X86::ILD_Fp16m64 , X86::ILD_F16m },
698 { X86::ILD_Fp16m80 , X86::ILD_F16m },
699 { X86::ILD_Fp32m32 , X86::ILD_F32m },
700 { X86::ILD_Fp32m64 , X86::ILD_F32m },
701 { X86::ILD_Fp32m80 , X86::ILD_F32m },
702 { X86::ILD_Fp64m32 , X86::ILD_F64m },
703 { X86::ILD_Fp64m64 , X86::ILD_F64m },
704 { X86::ILD_Fp64m80 , X86::ILD_F64m },
705 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
707 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
708 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
710 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
711 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
712 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
713 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
714 { X86::IST_Fp16m32 , X86::IST_F16m },
715 { X86::IST_Fp16m64 , X86::IST_F16m },
716 { X86::IST_Fp16m80 , X86::IST_F16m },
717 { X86::IST_Fp32m32 , X86::IST_F32m },
718 { X86::IST_Fp32m64 , X86::IST_F32m },
719 { X86::IST_Fp32m80 , X86::IST_F32m },
720 { X86::IST_Fp64m32 , X86::IST_FP64m },
721 { X86::IST_Fp64m64 , X86::IST_FP64m },
722 { X86::IST_Fp64m80 , X86::IST_FP64m },
723 { X86::LD_Fp032 , X86::LD_F0 },
724 { X86::LD_Fp064 , X86::LD_F0 },
725 { X86::LD_Fp080 , X86::LD_F0 },
726 { X86::LD_Fp132 , X86::LD_F1 },
727 { X86::LD_Fp164 , X86::LD_F1 },
728 { X86::LD_Fp180 , X86::LD_F1 },
729 { X86::LD_Fp32m , X86::LD_F32m },
730 { X86::LD_Fp32m64 , X86::LD_F32m },
731 { X86::LD_Fp32m80 , X86::LD_F32m },
732 { X86::LD_Fp64m , X86::LD_F64m },
733 { X86::LD_Fp64m80 , X86::LD_F64m },
734 { X86::LD_Fp80m , X86::LD_F80m },
735 { X86::MUL_Fp32m , X86::MUL_F32m },
736 { X86::MUL_Fp64m , X86::MUL_F64m },
737 { X86::MUL_Fp64m32 , X86::MUL_F32m },
738 { X86::MUL_Fp80m32 , X86::MUL_F32m },
739 { X86::MUL_Fp80m64 , X86::MUL_F64m },
740 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
741 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
742 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
743 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
744 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
745 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
746 { X86::SQRT_Fp32 , X86::SQRT_F },
747 { X86::SQRT_Fp64 , X86::SQRT_F },
748 { X86::SQRT_Fp80 , X86::SQRT_F },
749 { X86::ST_Fp32m , X86::ST_F32m },
750 { X86::ST_Fp64m , X86::ST_F64m },
751 { X86::ST_Fp64m32 , X86::ST_F32m },
752 { X86::ST_Fp80m32 , X86::ST_F32m },
753 { X86::ST_Fp80m64 , X86::ST_F64m },
754 { X86::ST_FpP80m , X86::ST_FP80m },
755 { X86::SUBR_Fp32m , X86::SUBR_F32m },
756 { X86::SUBR_Fp64m , X86::SUBR_F64m },
757 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
758 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
759 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
760 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
761 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
762 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
763 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
764 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
765 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
766 { X86::SUB_Fp32m , X86::SUB_F32m },
767 { X86::SUB_Fp64m , X86::SUB_F64m },
768 { X86::SUB_Fp64m32 , X86::SUB_F32m },
769 { X86::SUB_Fp80m32 , X86::SUB_F32m },
770 { X86::SUB_Fp80m64 , X86::SUB_F64m },
771 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
772 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
773 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
774 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
775 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
776 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
777 { X86::TST_Fp32 , X86::TST_F },
778 { X86::TST_Fp64 , X86::TST_F },
779 { X86::TST_Fp80 , X86::TST_F },
780 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
781 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
782 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
783 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
784 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
785 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
786 { X86::XAM_Fp32 , X86::XAM_F },
787 { X86::XAM_Fp64 , X86::XAM_F },
788 { X86::XAM_Fp80 , X86::XAM_F },
789};
790
791static unsigned getConcreteOpcode(unsigned Opcode) {
793 int Opc = Lookup(OpcodeTable, Opcode);
794 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
795 return Opc;
796}
797
798//===----------------------------------------------------------------------===//
799// Helper Methods
800//===----------------------------------------------------------------------===//
801
802// PopTable - Sorted map of instructions to their popping version. The first
803// element is an instruction, the second is the version which pops.
804//
805static const TableEntry PopTable[] = {
806 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
807
808 { X86::COMP_FST0r, X86::FCOMPP },
809 { X86::COM_FIr , X86::COM_FIPr },
810 { X86::COM_FST0r , X86::COMP_FST0r },
811
812 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
813 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
814
815 { X86::IST_F16m , X86::IST_FP16m },
816 { X86::IST_F32m , X86::IST_FP32m },
817
818 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
819
820 { X86::ST_F32m , X86::ST_FP32m },
821 { X86::ST_F64m , X86::ST_FP64m },
822 { X86::ST_Frr , X86::ST_FPrr },
823
824 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
825 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
826
827 { X86::UCOM_FIr , X86::UCOM_FIPr },
828
829 { X86::UCOM_FPr , X86::UCOM_FPPr },
830 { X86::UCOM_Fr , X86::UCOM_FPr },
831};
832
834 if (const MachineOperand *MO = MI.findRegisterDefOperand(X86::FPSW))
835 if (!MO->isDead())
836 return true;
837 return false;
838}
839
842 MachineBasicBlock &MBB = *I->getParent();
843 while (++I != MBB.end()) {
844 MachineInstr &MI = *I;
846 return I;
847 }
848 return MBB.end();
849}
850
851/// popStackAfter - Pop the current value off of the top of the FP stack after
852/// the specified instruction. This attempts to be sneaky and combine the pop
853/// into the instruction itself if possible. The iterator is left pointing to
854/// the last instruction, be it a new pop instruction inserted, or the old
855/// instruction if it was modified in place.
856///
857void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
858 MachineInstr &MI = *I;
859 const DebugLoc &dl = MI.getDebugLoc();
861
862 popReg();
863
864 // Check to see if there is a popping version of this instruction...
865 int Opcode = Lookup(PopTable, I->getOpcode());
866 if (Opcode != -1) {
867 I->setDesc(TII->get(Opcode));
868 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
869 I->removeOperand(0);
870 MI.dropDebugNumber();
871 } else { // Insert an explicit pop
872 // If this instruction sets FPSW, which is read in following instruction,
873 // insert pop after that reader.
875 MachineBasicBlock &MBB = *MI.getParent();
877 if (Next != MBB.end() && Next->readsRegister(X86::FPSW))
878 I = Next;
879 }
880 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
881 }
882}
883
884/// freeStackSlotAfter - Free the specified register from the register stack, so
885/// that it is no longer in a register. If the register is currently at the top
886/// of the stack, we just pop the current instruction, otherwise we store the
887/// current top-of-stack into the specified slot, then pop the top of stack.
888void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
889 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
890 popStackAfter(I);
891 return;
892 }
893
894 // Otherwise, store the top of stack into the dead slot, killing the operand
895 // without having to add in an explicit xchg then pop.
896 //
897 I = freeStackSlotBefore(++I, FPRegNo);
898}
899
900/// freeStackSlotBefore - Free the specified register without trying any
901/// folding.
903FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
904 unsigned STReg = getSTReg(FPRegNo);
905 unsigned OldSlot = getSlot(FPRegNo);
906 unsigned TopReg = Stack[StackTop-1];
907 Stack[OldSlot] = TopReg;
908 RegMap[TopReg] = OldSlot;
909 RegMap[FPRegNo] = ~0;
910 Stack[--StackTop] = ~0;
911 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
912 .addReg(STReg)
913 .getInstr();
914}
915
916/// adjustLiveRegs - Kill and revive registers such that exactly the FP
917/// registers with a bit in Mask are live.
918void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
919 unsigned Defs = Mask;
920 unsigned Kills = 0;
921 for (unsigned i = 0; i < StackTop; ++i) {
922 unsigned RegNo = Stack[i];
923 if (!(Defs & (1 << RegNo)))
924 // This register is live, but we don't want it.
925 Kills |= (1 << RegNo);
926 else
927 // We don't need to imp-def this live register.
928 Defs &= ~(1 << RegNo);
929 }
930 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
931
932 // Produce implicit-defs for free by using killed registers.
933 while (Kills && Defs) {
934 unsigned KReg = llvm::countr_zero(Kills);
935 unsigned DReg = llvm::countr_zero(Defs);
936 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
937 << "\n");
938 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
939 std::swap(RegMap[KReg], RegMap[DReg]);
940 Kills &= ~(1 << KReg);
941 Defs &= ~(1 << DReg);
942 }
943
944 // Kill registers by popping.
945 if (Kills && I != MBB->begin()) {
946 MachineBasicBlock::iterator I2 = std::prev(I);
947 while (StackTop) {
948 unsigned KReg = getStackEntry(0);
949 if (!(Kills & (1 << KReg)))
950 break;
951 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
952 popStackAfter(I2);
953 Kills &= ~(1 << KReg);
954 }
955 }
956
957 // Manually kill the rest.
958 while (Kills) {
959 unsigned KReg = llvm::countr_zero(Kills);
960 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
961 freeStackSlotBefore(I, KReg);
962 Kills &= ~(1 << KReg);
963 }
964
965 // Load zeros for all the imp-defs.
966 while(Defs) {
967 unsigned DReg = llvm::countr_zero(Defs);
968 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
969 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
970 pushReg(DReg);
971 Defs &= ~(1 << DReg);
972 }
973
974 // Now we should have the correct registers live.
975 LLVM_DEBUG(dumpStack());
976 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
977}
978
979/// shuffleStackTop - emit fxch instructions before I to shuffle the top
980/// FixCount entries into the order given by FixStack.
981/// FIXME: Is there a better algorithm than insertion sort?
982void FPS::shuffleStackTop(const unsigned char *FixStack,
983 unsigned FixCount,
985 // Move items into place, starting from the desired stack bottom.
986 while (FixCount--) {
987 // Old register at position FixCount.
988 unsigned OldReg = getStackEntry(FixCount);
989 // Desired register at position FixCount.
990 unsigned Reg = FixStack[FixCount];
991 if (Reg == OldReg)
992 continue;
993 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
994 moveToTop(Reg, I);
995 if (FixCount > 0)
996 moveToTop(OldReg, I);
997 }
998 LLVM_DEBUG(dumpStack());
999}
1000
1001
1002//===----------------------------------------------------------------------===//
1003// Instruction transformation implementation
1004//===----------------------------------------------------------------------===//
1005
1006void FPS::handleCall(MachineBasicBlock::iterator &I) {
1007 MachineInstr &MI = *I;
1008 unsigned STReturns = 0;
1009
1010 bool ClobbersFPStack = false;
1011 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1012 MachineOperand &Op = MI.getOperand(i);
1013 // Check if this call clobbers the FP stack.
1014 // is sufficient.
1015 if (Op.isRegMask()) {
1016 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1017#ifndef NDEBUG
1018 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1019 for (unsigned i = 1; i != 8; ++i)
1020 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1021 "Inconsistent FP register clobber");
1022#endif
1023
1024 if (ClobbersFP0)
1025 ClobbersFPStack = true;
1026 }
1027
1028 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1029 continue;
1030
1031 assert(Op.isImplicit() && "Expected implicit def/use");
1032
1033 if (Op.isDef())
1034 STReturns |= 1 << getFPReg(Op);
1035
1036 // Remove the operand so that later passes don't see it.
1037 MI.removeOperand(i);
1038 --i;
1039 --e;
1040 }
1041
1042 // Most calls should have a regmask that clobbers the FP registers. If it
1043 // isn't present then the register allocator didn't spill the FP registers
1044 // so they are still on the stack.
1045 assert((ClobbersFPStack || STReturns == 0) &&
1046 "ST returns without FP stack clobber");
1047 if (!ClobbersFPStack)
1048 return;
1049
1050 unsigned N = llvm::countr_one(STReturns);
1051
1052 // FP registers used for function return must be consecutive starting at
1053 // FP0
1054 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1055
1056 // Reset the FP Stack - It is required because of possible leftovers from
1057 // passed arguments. The caller should assume that the FP stack is
1058 // returned empty (unless the callee returns values on FP stack).
1059 while (StackTop > 0)
1060 popReg();
1061
1062 for (unsigned I = 0; I < N; ++I)
1063 pushReg(N - I - 1);
1064
1065 // If this call has been modified, drop all variable values defined by it.
1066 // We can't track them once they've been stackified.
1067 if (STReturns)
1068 I->dropDebugNumber();
1069}
1070
1071/// If RET has an FP register use operand, pass the first one in ST(0) and
1072/// the second one in ST(1).
1073void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1074 MachineInstr &MI = *I;
1075
1076 // Find the register operands.
1077 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1078 unsigned LiveMask = 0;
1079
1080 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1081 MachineOperand &Op = MI.getOperand(i);
1082 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1083 continue;
1084 // FP Register uses must be kills unless there are two uses of the same
1085 // register, in which case only one will be a kill.
1086 assert(Op.isUse() &&
1087 (Op.isKill() || // Marked kill.
1088 getFPReg(Op) == FirstFPRegOp || // Second instance.
1089 MI.killsRegister(Op.getReg())) && // Later use is marked kill.
1090 "Ret only defs operands, and values aren't live beyond it");
1091
1092 if (FirstFPRegOp == ~0U)
1093 FirstFPRegOp = getFPReg(Op);
1094 else {
1095 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1096 SecondFPRegOp = getFPReg(Op);
1097 }
1098 LiveMask |= (1 << getFPReg(Op));
1099
1100 // Remove the operand so that later passes don't see it.
1101 MI.removeOperand(i);
1102 --i;
1103 --e;
1104 }
1105
1106 // We may have been carrying spurious live-ins, so make sure only the
1107 // returned registers are left live.
1108 adjustLiveRegs(LiveMask, MI);
1109 if (!LiveMask) return; // Quick check to see if any are possible.
1110
1111 // There are only four possibilities here:
1112 // 1) we are returning a single FP value. In this case, it has to be in
1113 // ST(0) already, so just declare success by removing the value from the
1114 // FP Stack.
1115 if (SecondFPRegOp == ~0U) {
1116 // Assert that the top of stack contains the right FP register.
1117 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1118 "Top of stack not the right register for RET!");
1119
1120 // Ok, everything is good, mark the value as not being on the stack
1121 // anymore so that our assertion about the stack being empty at end of
1122 // block doesn't fire.
1123 StackTop = 0;
1124 return;
1125 }
1126
1127 // Otherwise, we are returning two values:
1128 // 2) If returning the same value for both, we only have one thing in the FP
1129 // stack. Consider: RET FP1, FP1
1130 if (StackTop == 1) {
1131 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1132 "Stack misconfiguration for RET!");
1133
1134 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1135 // register to hold it.
1136 unsigned NewReg = ScratchFPReg;
1137 duplicateToTop(FirstFPRegOp, NewReg, MI);
1138 FirstFPRegOp = NewReg;
1139 }
1140
1141 /// Okay we know we have two different FPx operands now:
1142 assert(StackTop == 2 && "Must have two values live!");
1143
1144 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1145 /// in ST(1). In this case, emit an fxch.
1146 if (getStackEntry(0) == SecondFPRegOp) {
1147 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1148 moveToTop(FirstFPRegOp, MI);
1149 }
1150
1151 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1152 /// ST(1). Just remove both from our understanding of the stack and return.
1153 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1154 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1155 StackTop = 0;
1156}
1157
1158/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1159///
1160void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1161 MachineInstr &MI = *I;
1162 unsigned DestReg = getFPReg(MI.getOperand(0));
1163
1164 // Change from the pseudo instruction to the concrete instruction.
1165 MI.removeOperand(0); // Remove the explicit ST(0) operand
1166 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1167 MI.addOperand(
1168 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1169
1170 // Result gets pushed on the stack.
1171 pushReg(DestReg);
1172
1173 MI.dropDebugNumber();
1174}
1175
1176/// handleOneArgFP - fst <mem>, ST(0)
1177///
1178void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1179 MachineInstr &MI = *I;
1180 unsigned NumOps = MI.getDesc().getNumOperands();
1181 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1182 "Can only handle fst* & ftst instructions!");
1183
1184 // Is this the last use of the source register?
1185 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1186 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1187
1188 // FISTP64m is strange because there isn't a non-popping versions.
1189 // If we have one _and_ we don't want to pop the operand, duplicate the value
1190 // on the stack instead of moving it. This ensure that popping the value is
1191 // always ok.
1192 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1193 //
1194 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1197 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1198 MI.getOpcode() == X86::IST_Fp64m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1201 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1202 MI.getOpcode() == X86::IST_Fp64m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1205 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1206 MI.getOpcode() == X86::ST_FpP80m)) {
1207 duplicateToTop(Reg, ScratchFPReg, I);
1208 } else {
1209 moveToTop(Reg, I); // Move to the top of the stack...
1210 }
1211
1212 // Convert from the pseudo instruction to the concrete instruction.
1213 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1214 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1215 MI.addOperand(
1216 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1217
1218 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1219 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1220 MI.getOpcode() == X86::ST_FP80m) {
1221 if (StackTop == 0)
1222 report_fatal_error("Stack empty??");
1223 --StackTop;
1224 } else if (KillsSrc) { // Last use of operand?
1225 popStackAfter(I);
1226 }
1227
1228 MI.dropDebugNumber();
1229}
1230
1231
1232/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1233/// replace the value with a newly computed value. These instructions may have
1234/// non-fp operands after their FP operands.
1235///
1236/// Examples:
1237/// R1 = fchs R2
1238/// R1 = fadd R2, [mem]
1239///
1240void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1241 MachineInstr &MI = *I;
1242#ifndef NDEBUG
1243 unsigned NumOps = MI.getDesc().getNumOperands();
1244 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1245#endif
1246
1247 // Is this the last use of the source register?
1248 unsigned Reg = getFPReg(MI.getOperand(1));
1249 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1250
1251 if (KillsSrc) {
1252 // If this is the last use of the source register, just make sure it's on
1253 // the top of the stack.
1254 moveToTop(Reg, I);
1255 if (StackTop == 0)
1256 report_fatal_error("Stack cannot be empty!");
1257 --StackTop;
1258 pushReg(getFPReg(MI.getOperand(0)));
1259 } else {
1260 // If this is not the last use of the source register, _copy_ it to the top
1261 // of the stack.
1262 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1263 }
1264
1265 // Change from the pseudo instruction to the concrete instruction.
1266 MI.removeOperand(1); // Drop the source operand.
1267 MI.removeOperand(0); // Drop the destination operand.
1268 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1269 MI.dropDebugNumber();
1270}
1271
1272
1273//===----------------------------------------------------------------------===//
1274// Define tables of various ways to map pseudo instructions
1275//
1276
1277// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1278static const TableEntry ForwardST0Table[] = {
1279 { X86::ADD_Fp32 , X86::ADD_FST0r },
1280 { X86::ADD_Fp64 , X86::ADD_FST0r },
1281 { X86::ADD_Fp80 , X86::ADD_FST0r },
1282 { X86::DIV_Fp32 , X86::DIV_FST0r },
1283 { X86::DIV_Fp64 , X86::DIV_FST0r },
1284 { X86::DIV_Fp80 , X86::DIV_FST0r },
1285 { X86::MUL_Fp32 , X86::MUL_FST0r },
1286 { X86::MUL_Fp64 , X86::MUL_FST0r },
1287 { X86::MUL_Fp80 , X86::MUL_FST0r },
1288 { X86::SUB_Fp32 , X86::SUB_FST0r },
1289 { X86::SUB_Fp64 , X86::SUB_FST0r },
1290 { X86::SUB_Fp80 , X86::SUB_FST0r },
1291};
1292
1293// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1294static const TableEntry ReverseST0Table[] = {
1295 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1296 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1297 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1298 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1300 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1301 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1302 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1303 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1304 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1306 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1307};
1308
1309// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1310static const TableEntry ForwardSTiTable[] = {
1311 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1312 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1313 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1314 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1316 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1317 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1318 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1319 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1320 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1322 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1323};
1324
1325// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1326static const TableEntry ReverseSTiTable[] = {
1327 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1329 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1330 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1332 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1333 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1335 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1336 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1338 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1339};
1340
1341
1342/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1343/// instructions which need to be simplified and possibly transformed.
1344///
1345/// Result: ST(0) = fsub ST(0), ST(i)
1346/// ST(i) = fsub ST(0), ST(i)
1347/// ST(0) = fsubr ST(0), ST(i)
1348/// ST(i) = fsubr ST(0), ST(i)
1349///
1350void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1353 MachineInstr &MI = *I;
1354
1355 unsigned NumOperands = MI.getDesc().getNumOperands();
1356 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1357 unsigned Dest = getFPReg(MI.getOperand(0));
1358 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1359 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1360 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1361 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1362 const DebugLoc &dl = MI.getDebugLoc();
1363
1364 unsigned TOS = getStackEntry(0);
1365
1366 // One of our operands must be on the top of the stack. If neither is yet, we
1367 // need to move one.
1368 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1369 // We can choose to move either operand to the top of the stack. If one of
1370 // the operands is killed by this instruction, we want that one so that we
1371 // can update right on top of the old version.
1372 if (KillsOp0) {
1373 moveToTop(Op0, I); // Move dead operand to TOS.
1374 TOS = Op0;
1375 } else if (KillsOp1) {
1376 moveToTop(Op1, I);
1377 TOS = Op1;
1378 } else {
1379 // All of the operands are live after this instruction executes, so we
1380 // cannot update on top of any operand. Because of this, we must
1381 // duplicate one of the stack elements to the top. It doesn't matter
1382 // which one we pick.
1383 //
1384 duplicateToTop(Op0, Dest, I);
1385 Op0 = TOS = Dest;
1386 KillsOp0 = true;
1387 }
1388 } else if (!KillsOp0 && !KillsOp1) {
1389 // If we DO have one of our operands at the top of the stack, but we don't
1390 // have a dead operand, we must duplicate one of the operands to a new slot
1391 // on the stack.
1392 duplicateToTop(Op0, Dest, I);
1393 Op0 = TOS = Dest;
1394 KillsOp0 = true;
1395 }
1396
1397 // Now we know that one of our operands is on the top of the stack, and at
1398 // least one of our operands is killed by this instruction.
1399 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1400 "Stack conditions not set up right!");
1401
1402 // We decide which form to use based on what is on the top of the stack, and
1403 // which operand is killed by this instruction.
1404 ArrayRef<TableEntry> InstTable;
1405 bool isForward = TOS == Op0;
1406 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1407 if (updateST0) {
1408 if (isForward)
1409 InstTable = ForwardST0Table;
1410 else
1411 InstTable = ReverseST0Table;
1412 } else {
1413 if (isForward)
1414 InstTable = ForwardSTiTable;
1415 else
1416 InstTable = ReverseSTiTable;
1417 }
1418
1419 int Opcode = Lookup(InstTable, MI.getOpcode());
1420 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1421
1422 // NotTOS - The register which is not on the top of stack...
1423 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1424
1425 // Replace the old instruction with a new instruction
1426 MBB->remove(&*I++);
1427 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1428
1429 if (!MI.mayRaiseFPException())
1430 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1431
1432 // If both operands are killed, pop one off of the stack in addition to
1433 // overwriting the other one.
1434 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1435 assert(!updateST0 && "Should have updated other operand!");
1436 popStackAfter(I); // Pop the top of stack
1437 }
1438
1439 // Update stack information so that we know the destination register is now on
1440 // the stack.
1441 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1442 assert(UpdatedSlot < StackTop && Dest < 7);
1443 Stack[UpdatedSlot] = Dest;
1444 RegMap[Dest] = UpdatedSlot;
1445 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1446}
1447
1448/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1449/// register arguments and no explicit destinations.
1450///
1451void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1452 MachineInstr &MI = *I;
1453
1454 unsigned NumOperands = MI.getDesc().getNumOperands();
1455 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1456 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1457 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1458 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1459 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1460
1461 // Make sure the first operand is on the top of stack, the other one can be
1462 // anywhere.
1463 moveToTop(Op0, I);
1464
1465 // Change from the pseudo instruction to the concrete instruction.
1466 MI.getOperand(0).setReg(getSTReg(Op1));
1467 MI.removeOperand(1);
1468 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1469 MI.dropDebugNumber();
1470
1471 // If any of the operands are killed by this instruction, free them.
1472 if (KillsOp0) freeStackSlotAfter(I, Op0);
1473 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1474}
1475
1476/// handleCondMovFP - Handle two address conditional move instructions. These
1477/// instructions move a st(i) register to st(0) iff a condition is true. These
1478/// instructions require that the first operand is at the top of the stack, but
1479/// otherwise don't modify the stack at all.
1480void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1481 MachineInstr &MI = *I;
1482
1483 unsigned Op0 = getFPReg(MI.getOperand(0));
1484 unsigned Op1 = getFPReg(MI.getOperand(2));
1485 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1486
1487 // The first operand *must* be on the top of the stack.
1488 moveToTop(Op0, I);
1489
1490 // Change the second operand to the stack register that the operand is in.
1491 // Change from the pseudo instruction to the concrete instruction.
1492 MI.removeOperand(0);
1493 MI.removeOperand(1);
1494 MI.getOperand(0).setReg(getSTReg(Op1));
1495 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1496 MI.dropDebugNumber();
1497
1498 // If we kill the second operand, make sure to pop it from the stack.
1499 if (Op0 != Op1 && KillsOp1) {
1500 // Get this value off of the register stack.
1501 freeStackSlotAfter(I, Op1);
1502 }
1503}
1504
1505
1506/// handleSpecialFP - Handle special instructions which behave unlike other
1507/// floating point instructions. This is primarily intended for use by pseudo
1508/// instructions.
1509///
1510void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1511 MachineInstr &MI = *Inst;
1512
1513 if (MI.isCall()) {
1514 handleCall(Inst);
1515 return;
1516 }
1517
1518 if (MI.isReturn()) {
1519 handleReturn(Inst);
1520 return;
1521 }
1522
1523 switch (MI.getOpcode()) {
1524 default: llvm_unreachable("Unknown SpecialFP instruction!");
1525 case TargetOpcode::COPY: {
1526 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1527 const MachineOperand &MO1 = MI.getOperand(1);
1528 const MachineOperand &MO0 = MI.getOperand(0);
1529 bool KillsSrc = MI.killsRegister(MO1.getReg());
1530
1531 // FP <- FP copy.
1532 unsigned DstFP = getFPReg(MO0);
1533 unsigned SrcFP = getFPReg(MO1);
1534 assert(isLive(SrcFP) && "Cannot copy dead register");
1535 if (KillsSrc) {
1536 // If the input operand is killed, we can just change the owner of the
1537 // incoming stack slot into the result.
1538 unsigned Slot = getSlot(SrcFP);
1539 Stack[Slot] = DstFP;
1540 RegMap[DstFP] = Slot;
1541 } else {
1542 // For COPY we just duplicate the specified value to a new stack slot.
1543 // This could be made better, but would require substantial changes.
1544 duplicateToTop(SrcFP, DstFP, Inst);
1545 }
1546 break;
1547 }
1548
1549 case TargetOpcode::IMPLICIT_DEF: {
1550 // All FP registers must be explicitly defined, so load a 0 instead.
1551 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1552 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1553 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1554 pushReg(Reg);
1555 break;
1556 }
1557
1558 case TargetOpcode::INLINEASM:
1559 case TargetOpcode::INLINEASM_BR: {
1560 // The inline asm MachineInstr currently only *uses* FP registers for the
1561 // 'f' constraint. These should be turned into the current ST(x) register
1562 // in the machine instr.
1563 //
1564 // There are special rules for x87 inline assembly. The compiler must know
1565 // exactly how many registers are popped and pushed implicitly by the asm.
1566 // Otherwise it is not possible to restore the stack state after the inline
1567 // asm.
1568 //
1569 // There are 3 kinds of input operands:
1570 //
1571 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1572 // popped input operand must be in a fixed stack slot, and it is either
1573 // tied to an output operand, or in the clobber list. The MI has ST use
1574 // and def operands for these inputs.
1575 //
1576 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1577 // preserved by the inline asm. The fixed stack slots must be STn-STm
1578 // following the popped inputs. A fixed input operand cannot be tied to
1579 // an output or appear in the clobber list. The MI has ST use operands
1580 // and no defs for these inputs.
1581 //
1582 // 3. Preserved inputs. These inputs use the "f" constraint which is
1583 // represented as an FP register. The inline asm won't change these
1584 // stack slots.
1585 //
1586 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1587 // registers do not count as output operands. The inline asm changes the
1588 // stack as if it popped all the popped inputs and then pushed all the
1589 // output operands.
1590
1591 // Scan the assembly for ST registers used, defined and clobbered. We can
1592 // only tell clobbers from defs by looking at the asm descriptor.
1593 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1594 unsigned NumOps = 0;
1595 SmallSet<unsigned, 1> FRegIdx;
1596 unsigned RCID;
1597
1598 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1599 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1600 unsigned Flags = MI.getOperand(i).getImm();
1601
1602 NumOps = InlineAsm::getNumOperandRegisters(Flags);
1603 if (NumOps != 1)
1604 continue;
1605 const MachineOperand &MO = MI.getOperand(i + 1);
1606 if (!MO.isReg())
1607 continue;
1608 unsigned STReg = MO.getReg() - X86::FP0;
1609 if (STReg >= 8)
1610 continue;
1611
1612 // If the flag has a register class constraint, this must be an operand
1613 // with constraint "f". Record its index and continue.
1614 if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
1615 FRegIdx.insert(i + 1);
1616 continue;
1617 }
1618
1619 switch (InlineAsm::getKind(Flags)) {
1621 STUses |= (1u << STReg);
1622 break;
1625 STDefs |= (1u << STReg);
1626 break;
1628 STClobbers |= (1u << STReg);
1629 break;
1630 default:
1631 break;
1632 }
1633 }
1634
1635 if (STUses && !isMask_32(STUses))
1636 MI.emitError("fixed input regs must be last on the x87 stack");
1637 unsigned NumSTUses = llvm::countr_one(STUses);
1638
1639 // Defs must be contiguous from the stack top. ST0-STn.
1640 if (STDefs && !isMask_32(STDefs)) {
1641 MI.emitError("output regs must be last on the x87 stack");
1642 STDefs = NextPowerOf2(STDefs) - 1;
1643 }
1644 unsigned NumSTDefs = llvm::countr_one(STDefs);
1645
1646 // So must the clobbered stack slots. ST0-STm, m >= n.
1647 if (STClobbers && !isMask_32(STDefs | STClobbers))
1648 MI.emitError("clobbers must be last on the x87 stack");
1649
1650 // Popped inputs are the ones that are also clobbered or defined.
1651 unsigned STPopped = STUses & (STDefs | STClobbers);
1652 if (STPopped && !isMask_32(STPopped))
1653 MI.emitError("implicitly popped regs must be last on the x87 stack");
1654 unsigned NumSTPopped = llvm::countr_one(STPopped);
1655
1656 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1657 << NumSTPopped << ", and defines " << NumSTDefs
1658 << " regs.\n");
1659
1660#ifndef NDEBUG
1661 // If any input operand uses constraint "f", all output register
1662 // constraints must be early-clobber defs.
1663 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1664 if (FRegIdx.count(I)) {
1665 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1666 "Operands with constraint \"f\" cannot overlap with defs");
1667 }
1668#endif
1669
1670 // Collect all FP registers (register operands with constraints "t", "u",
1671 // and "f") to kill afer the instruction.
1672 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1673 for (const MachineOperand &Op : MI.operands()) {
1674 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1675 continue;
1676 unsigned FPReg = getFPReg(Op);
1677
1678 // If we kill this operand, make sure to pop it from the stack after the
1679 // asm. We just remember it for now, and pop them all off at the end in
1680 // a batch.
1681 if (Op.isUse() && Op.isKill())
1682 FPKills |= 1U << FPReg;
1683 }
1684
1685 // Do not include registers that are implicitly popped by defs/clobbers.
1686 FPKills &= ~(STDefs | STClobbers);
1687
1688 // Now we can rearrange the live registers to match what was requested.
1689 unsigned char STUsesArray[8];
1690
1691 for (unsigned I = 0; I < NumSTUses; ++I)
1692 STUsesArray[I] = I;
1693
1694 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1695 LLVM_DEBUG({
1696 dbgs() << "Before asm: ";
1697 dumpStack();
1698 });
1699
1700 // With the stack layout fixed, rewrite the FP registers.
1701 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1702 MachineOperand &Op = MI.getOperand(i);
1703 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1704 continue;
1705
1706 unsigned FPReg = getFPReg(Op);
1707
1708 if (FRegIdx.count(i))
1709 // Operand with constraint "f".
1710 Op.setReg(getSTReg(FPReg));
1711 else
1712 // Operand with a single register class constraint ("t" or "u").
1713 Op.setReg(X86::ST0 + FPReg);
1714 }
1715
1716 // Simulate the inline asm popping its inputs and pushing its outputs.
1717 StackTop -= NumSTPopped;
1718
1719 for (unsigned i = 0; i < NumSTDefs; ++i)
1720 pushReg(NumSTDefs - i - 1);
1721
1722 // If this asm kills any FP registers (is the last use of them) we must
1723 // explicitly emit pop instructions for them. Do this now after the asm has
1724 // executed so that the ST(x) numbers are not off (which would happen if we
1725 // did this inline with operand rewriting).
1726 //
1727 // Note: this might be a non-optimal pop sequence. We might be able to do
1728 // better by trying to pop in stack order or something.
1729 while (FPKills) {
1730 unsigned FPReg = llvm::countr_zero(FPKills);
1731 if (isLive(FPReg))
1732 freeStackSlotAfter(Inst, FPReg);
1733 FPKills &= ~(1U << FPReg);
1734 }
1735
1736 // Don't delete the inline asm!
1737 return;
1738 }
1739 }
1740
1741 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1742
1743 // We want to leave I pointing to the previous instruction, but what if we
1744 // just erased the first instruction?
1745 if (Inst == MBB->begin()) {
1746 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1747 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1748 } else
1749 --Inst;
1750}
1751
1752void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1753 const TargetRegisterInfo &TRI =
1755 LivePhysRegs LPR(TRI);
1756
1757 LPR.addLiveOuts(MBB);
1758
1759 for (MachineInstr &MI : llvm::reverse(MBB)) {
1760 if (MI.isDebugInstr())
1761 continue;
1762
1763 std::bitset<8> Defs;
1765
1766 for (auto &MO : MI.operands()) {
1767 if (!MO.isReg())
1768 continue;
1769
1770 unsigned Reg = MO.getReg() - X86::FP0;
1771
1772 if (Reg >= 8)
1773 continue;
1774
1775 if (MO.isDef()) {
1776 Defs.set(Reg);
1777 if (!LPR.contains(MO.getReg()))
1778 MO.setIsDead();
1779 } else
1780 Uses.push_back(&MO);
1781 }
1782
1783 for (auto *MO : Uses)
1784 if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1785 MO->setIsKill();
1786
1787 LPR.stepBackward(MI);
1788 }
1789}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Flags
Definition: TextStubV5.cpp:93
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
A debug info location.
Definition: DebugLoc.h:33
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:363
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:242
static bool hasRegClassConstraint(unsigned Flag, unsigned &RC)
hasRegClassConstraint - Returns true if the flag contains a register class constraint.
Definition: InlineAsm.h:378
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:351
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:93
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:200
Reg
All possible values of the reg field in the ModR/M byte.
@ AddrNumOperands
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:349
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:271
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:268
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2038
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:450
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::pair< iterator, bool > insert(NodeRef N)