LLVM 19.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
80 MachineFunctionProperties::Property::NoVRegs);
81 }
82
83 StringRef getPassName() const override { return "X86 FP Stackifier"; }
84
85 private:
86 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
87
88 // Two CFG edges are related if they leave the same block, or enter the same
89 // block. The transitive closure of an edge under this relation is a
90 // LiveBundle. It represents a set of CFG edges where the live FP stack
91 // registers must be allocated identically in the x87 stack.
92 //
93 // A LiveBundle is usually all the edges leaving a block, or all the edges
94 // entering a block, but it can contain more edges if critical edges are
95 // present.
96 //
97 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
98 // but the exact mapping of FP registers to stack slots is fixed later.
99 struct LiveBundle {
100 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
101 unsigned Mask = 0;
102
103 // Number of pre-assigned live registers in FixStack. This is 0 when the
104 // stack order has not yet been fixed.
105 unsigned FixCount = 0;
106
107 // Assigned stack order for live-in registers.
108 // FixStack[i] == getStackEntry(i) for all i < FixCount.
109 unsigned char FixStack[8];
110
111 LiveBundle() = default;
112
113 // Have the live registers been assigned a stack order yet?
114 bool isFixed() const { return !Mask || FixCount; }
115 };
116
117 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
118 // with no live FP registers.
119 SmallVector<LiveBundle, 8> LiveBundles;
120
121 // The edge bundle analysis provides indices into the LiveBundles vector.
122 EdgeBundles *Bundles = nullptr;
123
124 // Return a bitmask of FP registers in block's live-in list.
125 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
126 unsigned Mask = 0;
128 I != MBB->livein_end(); ) {
129 MCPhysReg Reg = I->PhysReg;
130 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
132 Mask |= 1 << (Reg - X86::FP0);
133 if (RemoveFPs) {
134 I = MBB->removeLiveIn(I);
135 continue;
136 }
137 }
138 ++I;
139 }
140 return Mask;
141 }
142
143 // Partition all the CFG edges into LiveBundles.
144 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
145
146 MachineBasicBlock *MBB = nullptr; // Current basic block
147
148 // The hardware keeps track of how many FP registers are live, so we have
149 // to model that exactly. Usually, each live register corresponds to an
150 // FP<n> register, but when dealing with calls, returns, and inline
151 // assembly, it is sometimes necessary to have live scratch registers.
152 unsigned Stack[8]; // FP<n> Registers in each stack slot...
153 unsigned StackTop = 0; // The current top of the FP stack.
154
155 enum {
156 NumFPRegs = 8 // Including scratch pseudo-registers.
157 };
158
159 // For each live FP<n> register, point to its Stack[] entry.
160 // The first entries correspond to FP0-FP6, the rest are scratch registers
161 // used when we need slightly different live registers than what the
162 // register allocator thinks.
163 unsigned RegMap[NumFPRegs];
164
165 // Set up our stack model to match the incoming registers to MBB.
166 void setupBlockStack();
167
168 // Shuffle live registers to match the expectations of successor blocks.
169 void finishBlockStack();
170
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack() const {
173 dbgs() << "Stack contents:";
174 for (unsigned i = 0; i != StackTop; ++i) {
175 dbgs() << " FP" << Stack[i];
176 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
177 }
178 }
179#endif
180
181 /// getSlot - Return the stack slot number a particular register number is
182 /// in.
183 unsigned getSlot(unsigned RegNo) const {
184 assert(RegNo < NumFPRegs && "Regno out of range!");
185 return RegMap[RegNo];
186 }
187
188 /// isLive - Is RegNo currently live in the stack?
189 bool isLive(unsigned RegNo) const {
190 unsigned Slot = getSlot(RegNo);
191 return Slot < StackTop && Stack[Slot] == RegNo;
192 }
193
194 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
195 unsigned getStackEntry(unsigned STi) const {
196 if (STi >= StackTop)
197 report_fatal_error("Access past stack top!");
198 return Stack[StackTop-1-STi];
199 }
200
201 /// getSTReg - Return the X86::ST(i) register which contains the specified
202 /// FP<RegNo> register.
203 unsigned getSTReg(unsigned RegNo) const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
205 }
206
207 // pushReg - Push the specified FP<n> register onto the stack.
208 void pushReg(unsigned Reg) {
209 assert(Reg < NumFPRegs && "Register number out of range!");
210 if (StackTop >= 8)
211 report_fatal_error("Stack overflow!");
212 Stack[StackTop] = Reg;
213 RegMap[Reg] = StackTop++;
214 }
215
216 // popReg - Pop a register from the stack.
217 void popReg() {
218 if (StackTop == 0)
219 report_fatal_error("Cannot pop empty stack!");
220 RegMap[Stack[--StackTop]] = ~0; // Update state
221 }
222
223 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
224 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
225 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
226 if (isAtTop(RegNo)) return;
227
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
230
231 // Swap the slots the regs are in.
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
233
234 // Swap stack slot contents.
235 if (RegMap[RegOnTop] >= StackTop)
236 report_fatal_error("Access past stack top!");
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
238
239 // Emit an fxch to update the runtime processors version of the state.
240 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
241 ++NumFXCH;
242 }
243
244 void duplicateToTop(unsigned RegNo, unsigned AsReg,
246 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
247 unsigned STReg = getSTReg(RegNo);
248 pushReg(AsReg); // New register on top of stack
249
250 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
251 }
252
253 /// popStackAfter - Pop the current value off of the top of the FP stack
254 /// after the specified instruction.
255 void popStackAfter(MachineBasicBlock::iterator &I);
256
257 /// freeStackSlotAfter - Free the specified register from the register
258 /// stack, so that it is no longer in a register. If the register is
259 /// currently at the top of the stack, we just pop the current instruction,
260 /// otherwise we store the current top-of-stack into the specified slot,
261 /// then pop the top of stack.
262 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
263
264 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
265 /// instruction.
267 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
268
269 /// Adjust the live registers to be the set in Mask.
270 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
271
272 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
273 /// st(0), FP reg FixStack[1] is st(1) etc.
274 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
276
277 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
278
279 void handleCall(MachineBasicBlock::iterator &I);
280 void handleReturn(MachineBasicBlock::iterator &I);
281 void handleZeroArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFP(MachineBasicBlock::iterator &I);
283 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
284 void handleTwoArgFP(MachineBasicBlock::iterator &I);
285 void handleCompareFP(MachineBasicBlock::iterator &I);
286 void handleCondMovFP(MachineBasicBlock::iterator &I);
287 void handleSpecialFP(MachineBasicBlock::iterator &I);
288
289 // Check if a COPY instruction is using FP registers.
290 static bool isFPCopy(MachineInstr &MI) {
291 Register DstReg = MI.getOperand(0).getReg();
292 Register SrcReg = MI.getOperand(1).getReg();
293
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
296 }
297
298 void setKillFlags(MachineBasicBlock &MBB) const;
299 };
300}
301
302char FPS::ID = 0;
303
304INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
305 false, false)
309
311
312/// getFPReg - Return the X86::FPx register number for the specified operand.
313/// For example, this returns 3 for X86::FP3.
314static unsigned getFPReg(const MachineOperand &MO) {
315 assert(MO.isReg() && "Expected an FP register!");
316 Register Reg = MO.getReg();
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
318 return Reg - X86::FP0;
319}
320
321/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
322/// register references into FP stack references.
323///
324bool FPS::runOnMachineFunction(MachineFunction &MF) {
325 // We only need to run this pass if there are any FP registers used in this
326 // function. If it is all integer, there is nothing for us to do!
327 bool FPIsUsed = false;
328
329 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
330 const MachineRegisterInfo &MRI = MF.getRegInfo();
331 for (unsigned i = 0; i <= 6; ++i)
332 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
333 FPIsUsed = true;
334 break;
335 }
336
337 // Early exit.
338 if (!FPIsUsed) return false;
339
340 Bundles = &getAnalysis<EdgeBundles>();
342
343 // Prepare cross-MBB liveness.
344 bundleCFGRecomputeKillFlags(MF);
345
346 StackTop = 0;
347
348 // Process the function in depth first order so that we process at least one
349 // of the predecessors for every reachable block in the function.
351 MachineBasicBlock *Entry = &MF.front();
352
353 LiveBundle &Bundle =
354 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
355
356 // In regcall convention, some FP registers may not be passed through
357 // the stack, so they will need to be assigned to the stack first
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
359 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
360 // In the register calling convention, up to one FP argument could be
361 // saved in the first FP register.
362 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
363 // that the FP registers contain arguments.
364 // The actual value is passed in FP0.
365 // Here we fix the stack and mark FP0 as pre-assigned register.
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
368 Bundle.FixCount = 1;
369 Bundle.FixStack[0] = 0;
370 }
371
372 bool Changed = false;
373 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
374 Changed |= processBasicBlock(MF, *BB);
375
376 // Process any unreachable blocks in arbitrary order now.
377 if (MF.size() != Processed.size())
378 for (MachineBasicBlock &BB : MF)
379 if (Processed.insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
381
382 LiveBundles.clear();
383
384 return Changed;
385}
386
387/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
388/// live-out sets for the FP registers. Consistent means that the set of
389/// registers live-out from a block is identical to the live-in set of all
390/// successors. This is not enforced by the normal live-in lists since
391/// registers may be implicitly defined, or not used by all successors.
392void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
393 assert(LiveBundles.empty() && "Stale data in LiveBundles");
394 LiveBundles.resize(Bundles->getNumBundles());
395
396 // Gather the actual live-in masks for all MBBs.
397 for (MachineBasicBlock &MBB : MF) {
398 setKillFlags(MBB);
399
400 const unsigned Mask = calcLiveInMask(&MBB, false);
401 if (!Mask)
402 continue;
403 // Update MBB ingoing bundle mask.
404 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
405 }
406}
407
408/// processBasicBlock - Loop over all of the instructions in the basic block,
409/// transforming FP instructions into their stack form.
410///
411bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
412 bool Changed = false;
413 MBB = &BB;
414
415 setupBlockStack();
416
417 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
418 MachineInstr &MI = *I;
419 uint64_t Flags = MI.getDesc().TSFlags;
420
421 unsigned FPInstClass = Flags & X86II::FPTypeMask;
422 if (MI.isInlineAsm())
423 FPInstClass = X86II::SpecialFP;
424
425 if (MI.isCopy() && isFPCopy(MI))
426 FPInstClass = X86II::SpecialFP;
427
428 if (MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
430 FPInstClass = X86II::SpecialFP;
431
432 if (MI.isCall())
433 FPInstClass = X86II::SpecialFP;
434
435 if (FPInstClass == X86II::NotFP)
436 continue; // Efficiently ignore non-fp insts!
437
438 MachineInstr *PrevMI = nullptr;
439 if (I != BB.begin())
440 PrevMI = &*std::prev(I);
441
442 ++NumFP; // Keep track of # of pseudo instrs
443 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
444
445 // Get dead variables list now because the MI pointer may be deleted as part
446 // of processing!
448 for (const MachineOperand &MO : MI.operands())
449 if (MO.isReg() && MO.isDead())
450 DeadRegs.push_back(MO.getReg());
451
452 switch (FPInstClass) {
453 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
454 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
455 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
456 case X86II::TwoArgFP: handleTwoArgFP(I); break;
457 case X86II::CompareFP: handleCompareFP(I); break;
458 case X86II::CondMovFP: handleCondMovFP(I); break;
459 case X86II::SpecialFP: handleSpecialFP(I); break;
460 default: llvm_unreachable("Unknown FP Type!");
461 }
462
463 // Check to see if any of the values defined by this instruction are dead
464 // after definition. If so, pop them.
465 for (unsigned Reg : DeadRegs) {
466 // Check if Reg is live on the stack. An inline-asm register operand that
467 // is in the clobber list and marked dead might not be live on the stack.
468 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
469 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
470 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
471 freeStackSlotAfter(I, Reg-X86::FP0);
472 }
473 }
474
475 // Print out all of the instructions expanded to if -debug
476 LLVM_DEBUG({
477 MachineBasicBlock::iterator PrevI = PrevMI;
478 if (I == PrevI) {
479 dbgs() << "Just deleted pseudo instruction\n";
480 } else {
482 // Rewind to first instruction newly inserted.
483 while (Start != BB.begin() && std::prev(Start) != PrevI)
484 --Start;
485 dbgs() << "Inserted instructions:\n\t";
486 Start->print(dbgs());
487 while (++Start != std::next(I)) {
488 }
489 }
490 dumpStack();
491 });
492 (void)PrevMI;
493
494 Changed = true;
495 }
496
497 finishBlockStack();
498
499 return Changed;
500}
501
502/// setupBlockStack - Use the live bundles to set up our model of the stack
503/// to match predecessors' live out stack.
504void FPS::setupBlockStack() {
505 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
506 << " derived from " << MBB->getName() << ".\n");
507 StackTop = 0;
508 // Get the live-in bundle for MBB.
509 const LiveBundle &Bundle =
510 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
511
512 if (!Bundle.Mask) {
513 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
514 return;
515 }
516
517 // Depth-first iteration should ensure that we always have an assigned stack.
518 assert(Bundle.isFixed() && "Reached block before any predecessors");
519
520 // Push the fixed live-in registers.
521 for (unsigned i = Bundle.FixCount; i > 0; --i) {
522 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
523 << unsigned(Bundle.FixStack[i - 1]) << '\n');
524 pushReg(Bundle.FixStack[i-1]);
525 }
526
527 // Kill off unwanted live-ins. This can happen with a critical edge.
528 // FIXME: We could keep these live registers around as zombies. They may need
529 // to be revived at the end of a short block. It might save a few instrs.
530 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
531 adjustLiveRegs(Mask, MBB->begin());
532 LLVM_DEBUG(MBB->dump());
533}
534
535/// finishBlockStack - Revive live-outs that are implicitly defined out of
536/// MBB. Shuffle live registers to match the expected fixed stack of any
537/// predecessors, and ensure that all predecessors are expecting the same
538/// stack.
539void FPS::finishBlockStack() {
540 // The RET handling below takes care of return blocks for us.
541 if (MBB->succ_empty())
542 return;
543
544 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
545 << " derived from " << MBB->getName() << ".\n");
546
547 // Get MBB's live-out bundle.
548 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
549 LiveBundle &Bundle = LiveBundles[BundleIdx];
550
551 // We may need to kill and define some registers to match successors.
552 // FIXME: This can probably be combined with the shuffle below.
554 adjustLiveRegs(Bundle.Mask, Term);
555
556 if (!Bundle.Mask) {
557 LLVM_DEBUG(dbgs() << "No live-outs.\n");
558 return;
559 }
560
561 // Has the stack order been fixed yet?
562 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
563 if (Bundle.isFixed()) {
564 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
565 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
566 } else {
567 // Not fixed yet, we get to choose.
568 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
569 Bundle.FixCount = StackTop;
570 for (unsigned i = 0; i < StackTop; ++i)
571 Bundle.FixStack[i] = getStackEntry(i);
572 }
573}
574
575
576//===----------------------------------------------------------------------===//
577// Efficient Lookup Table Support
578//===----------------------------------------------------------------------===//
579
580namespace {
581 struct TableEntry {
582 uint16_t from;
583 uint16_t to;
584 bool operator<(const TableEntry &TE) const { return from < TE.from; }
585 friend bool operator<(const TableEntry &TE, unsigned V) {
586 return TE.from < V;
587 }
588 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
589 const TableEntry &TE) {
590 return V < TE.from;
591 }
592 };
593}
594
595static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
596 const TableEntry *I = llvm::lower_bound(Table, Opcode);
597 if (I != Table.end() && I->from == Opcode)
598 return I->to;
599 return -1;
600}
601
602#ifdef NDEBUG
603#define ASSERT_SORTED(TABLE)
604#else
605#define ASSERT_SORTED(TABLE) \
606 { \
607 static std::atomic<bool> TABLE##Checked(false); \
608 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
609 assert(is_sorted(TABLE) && \
610 "All lookup tables must be sorted for efficient access!"); \
611 TABLE##Checked.store(true, std::memory_order_relaxed); \
612 } \
613 }
614#endif
615
616//===----------------------------------------------------------------------===//
617// Register File -> Register Stack Mapping Methods
618//===----------------------------------------------------------------------===//
619
620// OpcodeTable - Sorted map of register instructions to their stack version.
621// The first element is an register file pseudo instruction, the second is the
622// concrete X86 instruction which uses the register stack.
623//
624static const TableEntry OpcodeTable[] = {
625 { X86::ABS_Fp32 , X86::ABS_F },
626 { X86::ABS_Fp64 , X86::ABS_F },
627 { X86::ABS_Fp80 , X86::ABS_F },
628 { X86::ADD_Fp32m , X86::ADD_F32m },
629 { X86::ADD_Fp64m , X86::ADD_F64m },
630 { X86::ADD_Fp64m32 , X86::ADD_F32m },
631 { X86::ADD_Fp80m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m64 , X86::ADD_F64m },
633 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639 { X86::CHS_Fp32 , X86::CHS_F },
640 { X86::CHS_Fp64 , X86::CHS_F },
641 { X86::CHS_Fp80 , X86::CHS_F },
642 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645 { X86::CMOVB_Fp32 , X86::CMOVB_F },
646 { X86::CMOVB_Fp64 , X86::CMOVB_F },
647 { X86::CMOVB_Fp80 , X86::CMOVB_F },
648 { X86::CMOVE_Fp32 , X86::CMOVE_F },
649 { X86::CMOVE_Fp64 , X86::CMOVE_F },
650 { X86::CMOVE_Fp80 , X86::CMOVE_F },
651 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663 { X86::CMOVP_Fp32 , X86::CMOVP_F },
664 { X86::CMOVP_Fp64 , X86::CMOVP_F },
665 { X86::CMOVP_Fp80 , X86::CMOVP_F },
666 { X86::COM_FpIr32 , X86::COM_FIr },
667 { X86::COM_FpIr64 , X86::COM_FIr },
668 { X86::COM_FpIr80 , X86::COM_FIr },
669 { X86::COM_Fpr32 , X86::COM_FST0r },
670 { X86::COM_Fpr64 , X86::COM_FST0r },
671 { X86::COM_Fpr80 , X86::COM_FST0r },
672 { X86::DIVR_Fp32m , X86::DIVR_F32m },
673 { X86::DIVR_Fp64m , X86::DIVR_F64m },
674 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
675 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
677 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
678 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
680 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
681 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
683 { X86::DIV_Fp32m , X86::DIV_F32m },
684 { X86::DIV_Fp64m , X86::DIV_F64m },
685 { X86::DIV_Fp64m32 , X86::DIV_F32m },
686 { X86::DIV_Fp80m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m64 , X86::DIV_F64m },
688 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
689 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
691 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
692 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
694 { X86::ILD_Fp16m32 , X86::ILD_F16m },
695 { X86::ILD_Fp16m64 , X86::ILD_F16m },
696 { X86::ILD_Fp16m80 , X86::ILD_F16m },
697 { X86::ILD_Fp32m32 , X86::ILD_F32m },
698 { X86::ILD_Fp32m64 , X86::ILD_F32m },
699 { X86::ILD_Fp32m80 , X86::ILD_F32m },
700 { X86::ILD_Fp64m32 , X86::ILD_F64m },
701 { X86::ILD_Fp64m64 , X86::ILD_F64m },
702 { X86::ILD_Fp64m80 , X86::ILD_F64m },
703 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
704 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
707 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
710 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
712 { X86::IST_Fp16m32 , X86::IST_F16m },
713 { X86::IST_Fp16m64 , X86::IST_F16m },
714 { X86::IST_Fp16m80 , X86::IST_F16m },
715 { X86::IST_Fp32m32 , X86::IST_F32m },
716 { X86::IST_Fp32m64 , X86::IST_F32m },
717 { X86::IST_Fp32m80 , X86::IST_F32m },
718 { X86::IST_Fp64m32 , X86::IST_FP64m },
719 { X86::IST_Fp64m64 , X86::IST_FP64m },
720 { X86::IST_Fp64m80 , X86::IST_FP64m },
721 { X86::LD_Fp032 , X86::LD_F0 },
722 { X86::LD_Fp064 , X86::LD_F0 },
723 { X86::LD_Fp080 , X86::LD_F0 },
724 { X86::LD_Fp132 , X86::LD_F1 },
725 { X86::LD_Fp164 , X86::LD_F1 },
726 { X86::LD_Fp180 , X86::LD_F1 },
727 { X86::LD_Fp32m , X86::LD_F32m },
728 { X86::LD_Fp32m64 , X86::LD_F32m },
729 { X86::LD_Fp32m80 , X86::LD_F32m },
730 { X86::LD_Fp64m , X86::LD_F64m },
731 { X86::LD_Fp64m80 , X86::LD_F64m },
732 { X86::LD_Fp80m , X86::LD_F80m },
733 { X86::MUL_Fp32m , X86::MUL_F32m },
734 { X86::MUL_Fp64m , X86::MUL_F64m },
735 { X86::MUL_Fp64m32 , X86::MUL_F32m },
736 { X86::MUL_Fp80m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m64 , X86::MUL_F64m },
738 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
739 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
741 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
742 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
744 { X86::SQRT_Fp32 , X86::SQRT_F },
745 { X86::SQRT_Fp64 , X86::SQRT_F },
746 { X86::SQRT_Fp80 , X86::SQRT_F },
747 { X86::ST_Fp32m , X86::ST_F32m },
748 { X86::ST_Fp64m , X86::ST_F64m },
749 { X86::ST_Fp64m32 , X86::ST_F32m },
750 { X86::ST_Fp80m32 , X86::ST_F32m },
751 { X86::ST_Fp80m64 , X86::ST_F64m },
752 { X86::ST_FpP80m , X86::ST_FP80m },
753 { X86::SUBR_Fp32m , X86::SUBR_F32m },
754 { X86::SUBR_Fp64m , X86::SUBR_F64m },
755 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764 { X86::SUB_Fp32m , X86::SUB_F32m },
765 { X86::SUB_Fp64m , X86::SUB_F64m },
766 { X86::SUB_Fp64m32 , X86::SUB_F32m },
767 { X86::SUB_Fp80m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m64 , X86::SUB_F64m },
769 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775 { X86::TST_Fp32 , X86::TST_F },
776 { X86::TST_Fp64 , X86::TST_F },
777 { X86::TST_Fp80 , X86::TST_F },
778 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
784 { X86::XAM_Fp32 , X86::XAM_F },
785 { X86::XAM_Fp64 , X86::XAM_F },
786 { X86::XAM_Fp80 , X86::XAM_F },
787};
788
789static unsigned getConcreteOpcode(unsigned Opcode) {
791 int Opc = Lookup(OpcodeTable, Opcode);
792 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
793 return Opc;
794}
795
796//===----------------------------------------------------------------------===//
797// Helper Methods
798//===----------------------------------------------------------------------===//
799
800// PopTable - Sorted map of instructions to their popping version. The first
801// element is an instruction, the second is the version which pops.
802//
803static const TableEntry PopTable[] = {
804 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
805
806 { X86::COMP_FST0r, X86::FCOMPP },
807 { X86::COM_FIr , X86::COM_FIPr },
808 { X86::COM_FST0r , X86::COMP_FST0r },
809
810 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
811 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
812
813 { X86::IST_F16m , X86::IST_FP16m },
814 { X86::IST_F32m , X86::IST_FP32m },
815
816 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
817
818 { X86::ST_F32m , X86::ST_FP32m },
819 { X86::ST_F64m , X86::ST_FP64m },
820 { X86::ST_Frr , X86::ST_FPrr },
821
822 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
823 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
824
825 { X86::UCOM_FIr , X86::UCOM_FIPr },
826
827 { X86::UCOM_FPr , X86::UCOM_FPPr },
828 { X86::UCOM_Fr , X86::UCOM_FPr },
829};
830
832 if (const MachineOperand *MO = MI.findRegisterDefOperand(X86::FPSW))
833 if (!MO->isDead())
834 return true;
835 return false;
836}
837
840 MachineBasicBlock &MBB = *I->getParent();
841 while (++I != MBB.end()) {
842 MachineInstr &MI = *I;
844 return I;
845 }
846 return MBB.end();
847}
848
849/// popStackAfter - Pop the current value off of the top of the FP stack after
850/// the specified instruction. This attempts to be sneaky and combine the pop
851/// into the instruction itself if possible. The iterator is left pointing to
852/// the last instruction, be it a new pop instruction inserted, or the old
853/// instruction if it was modified in place.
854///
855void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
856 MachineInstr &MI = *I;
857 const DebugLoc &dl = MI.getDebugLoc();
859
860 popReg();
861
862 // Check to see if there is a popping version of this instruction...
863 int Opcode = Lookup(PopTable, I->getOpcode());
864 if (Opcode != -1) {
865 I->setDesc(TII->get(Opcode));
866 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
867 I->removeOperand(0);
868 MI.dropDebugNumber();
869 } else { // Insert an explicit pop
870 // If this instruction sets FPSW, which is read in following instruction,
871 // insert pop after that reader.
873 MachineBasicBlock &MBB = *MI.getParent();
875 if (Next != MBB.end() && Next->readsRegister(X86::FPSW))
876 I = Next;
877 }
878 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
879 }
880}
881
882/// freeStackSlotAfter - Free the specified register from the register stack, so
883/// that it is no longer in a register. If the register is currently at the top
884/// of the stack, we just pop the current instruction, otherwise we store the
885/// current top-of-stack into the specified slot, then pop the top of stack.
886void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
887 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
888 popStackAfter(I);
889 return;
890 }
891
892 // Otherwise, store the top of stack into the dead slot, killing the operand
893 // without having to add in an explicit xchg then pop.
894 //
895 I = freeStackSlotBefore(++I, FPRegNo);
896}
897
898/// freeStackSlotBefore - Free the specified register without trying any
899/// folding.
901FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
902 unsigned STReg = getSTReg(FPRegNo);
903 unsigned OldSlot = getSlot(FPRegNo);
904 unsigned TopReg = Stack[StackTop-1];
905 Stack[OldSlot] = TopReg;
906 RegMap[TopReg] = OldSlot;
907 RegMap[FPRegNo] = ~0;
908 Stack[--StackTop] = ~0;
909 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
910 .addReg(STReg)
911 .getInstr();
912}
913
914/// adjustLiveRegs - Kill and revive registers such that exactly the FP
915/// registers with a bit in Mask are live.
916void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
917 unsigned Defs = Mask;
918 unsigned Kills = 0;
919 for (unsigned i = 0; i < StackTop; ++i) {
920 unsigned RegNo = Stack[i];
921 if (!(Defs & (1 << RegNo)))
922 // This register is live, but we don't want it.
923 Kills |= (1 << RegNo);
924 else
925 // We don't need to imp-def this live register.
926 Defs &= ~(1 << RegNo);
927 }
928 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
929
930 // Produce implicit-defs for free by using killed registers.
931 while (Kills && Defs) {
932 unsigned KReg = llvm::countr_zero(Kills);
933 unsigned DReg = llvm::countr_zero(Defs);
934 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
935 << "\n");
936 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
937 std::swap(RegMap[KReg], RegMap[DReg]);
938 Kills &= ~(1 << KReg);
939 Defs &= ~(1 << DReg);
940 }
941
942 // Kill registers by popping.
943 if (Kills && I != MBB->begin()) {
944 MachineBasicBlock::iterator I2 = std::prev(I);
945 while (StackTop) {
946 unsigned KReg = getStackEntry(0);
947 if (!(Kills & (1 << KReg)))
948 break;
949 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
950 popStackAfter(I2);
951 Kills &= ~(1 << KReg);
952 }
953 }
954
955 // Manually kill the rest.
956 while (Kills) {
957 unsigned KReg = llvm::countr_zero(Kills);
958 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
959 freeStackSlotBefore(I, KReg);
960 Kills &= ~(1 << KReg);
961 }
962
963 // Load zeros for all the imp-defs.
964 while(Defs) {
965 unsigned DReg = llvm::countr_zero(Defs);
966 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
967 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
968 pushReg(DReg);
969 Defs &= ~(1 << DReg);
970 }
971
972 // Now we should have the correct registers live.
973 LLVM_DEBUG(dumpStack());
974 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
975}
976
977/// shuffleStackTop - emit fxch instructions before I to shuffle the top
978/// FixCount entries into the order given by FixStack.
979/// FIXME: Is there a better algorithm than insertion sort?
980void FPS::shuffleStackTop(const unsigned char *FixStack,
981 unsigned FixCount,
983 // Move items into place, starting from the desired stack bottom.
984 while (FixCount--) {
985 // Old register at position FixCount.
986 unsigned OldReg = getStackEntry(FixCount);
987 // Desired register at position FixCount.
988 unsigned Reg = FixStack[FixCount];
989 if (Reg == OldReg)
990 continue;
991 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
992 moveToTop(Reg, I);
993 if (FixCount > 0)
994 moveToTop(OldReg, I);
995 }
996 LLVM_DEBUG(dumpStack());
997}
998
999
1000//===----------------------------------------------------------------------===//
1001// Instruction transformation implementation
1002//===----------------------------------------------------------------------===//
1003
1004void FPS::handleCall(MachineBasicBlock::iterator &I) {
1005 MachineInstr &MI = *I;
1006 unsigned STReturns = 0;
1007
1008 bool ClobbersFPStack = false;
1009 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1010 MachineOperand &Op = MI.getOperand(i);
1011 // Check if this call clobbers the FP stack.
1012 // is sufficient.
1013 if (Op.isRegMask()) {
1014 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1015#ifndef NDEBUG
1016 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1017 for (unsigned i = 1; i != 8; ++i)
1018 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1019 "Inconsistent FP register clobber");
1020#endif
1021
1022 if (ClobbersFP0)
1023 ClobbersFPStack = true;
1024 }
1025
1026 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1027 continue;
1028
1029 assert(Op.isImplicit() && "Expected implicit def/use");
1030
1031 if (Op.isDef())
1032 STReturns |= 1 << getFPReg(Op);
1033
1034 // Remove the operand so that later passes don't see it.
1035 MI.removeOperand(i);
1036 --i;
1037 --e;
1038 }
1039
1040 // Most calls should have a regmask that clobbers the FP registers. If it
1041 // isn't present then the register allocator didn't spill the FP registers
1042 // so they are still on the stack.
1043 assert((ClobbersFPStack || STReturns == 0) &&
1044 "ST returns without FP stack clobber");
1045 if (!ClobbersFPStack)
1046 return;
1047
1048 unsigned N = llvm::countr_one(STReturns);
1049
1050 // FP registers used for function return must be consecutive starting at
1051 // FP0
1052 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1053
1054 // Reset the FP Stack - It is required because of possible leftovers from
1055 // passed arguments. The caller should assume that the FP stack is
1056 // returned empty (unless the callee returns values on FP stack).
1057 while (StackTop > 0)
1058 popReg();
1059
1060 for (unsigned I = 0; I < N; ++I)
1061 pushReg(N - I - 1);
1062
1063 // If this call has been modified, drop all variable values defined by it.
1064 // We can't track them once they've been stackified.
1065 if (STReturns)
1066 I->dropDebugNumber();
1067}
1068
1069/// If RET has an FP register use operand, pass the first one in ST(0) and
1070/// the second one in ST(1).
1071void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1072 MachineInstr &MI = *I;
1073
1074 // Find the register operands.
1075 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1076 unsigned LiveMask = 0;
1077
1078 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1079 MachineOperand &Op = MI.getOperand(i);
1080 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1081 continue;
1082 // FP Register uses must be kills unless there are two uses of the same
1083 // register, in which case only one will be a kill.
1084 assert(Op.isUse() &&
1085 (Op.isKill() || // Marked kill.
1086 getFPReg(Op) == FirstFPRegOp || // Second instance.
1087 MI.killsRegister(Op.getReg())) && // Later use is marked kill.
1088 "Ret only defs operands, and values aren't live beyond it");
1089
1090 if (FirstFPRegOp == ~0U)
1091 FirstFPRegOp = getFPReg(Op);
1092 else {
1093 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1094 SecondFPRegOp = getFPReg(Op);
1095 }
1096 LiveMask |= (1 << getFPReg(Op));
1097
1098 // Remove the operand so that later passes don't see it.
1099 MI.removeOperand(i);
1100 --i;
1101 --e;
1102 }
1103
1104 // We may have been carrying spurious live-ins, so make sure only the
1105 // returned registers are left live.
1106 adjustLiveRegs(LiveMask, MI);
1107 if (!LiveMask) return; // Quick check to see if any are possible.
1108
1109 // There are only four possibilities here:
1110 // 1) we are returning a single FP value. In this case, it has to be in
1111 // ST(0) already, so just declare success by removing the value from the
1112 // FP Stack.
1113 if (SecondFPRegOp == ~0U) {
1114 // Assert that the top of stack contains the right FP register.
1115 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1116 "Top of stack not the right register for RET!");
1117
1118 // Ok, everything is good, mark the value as not being on the stack
1119 // anymore so that our assertion about the stack being empty at end of
1120 // block doesn't fire.
1121 StackTop = 0;
1122 return;
1123 }
1124
1125 // Otherwise, we are returning two values:
1126 // 2) If returning the same value for both, we only have one thing in the FP
1127 // stack. Consider: RET FP1, FP1
1128 if (StackTop == 1) {
1129 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1130 "Stack misconfiguration for RET!");
1131
1132 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1133 // register to hold it.
1134 unsigned NewReg = ScratchFPReg;
1135 duplicateToTop(FirstFPRegOp, NewReg, MI);
1136 FirstFPRegOp = NewReg;
1137 }
1138
1139 /// Okay we know we have two different FPx operands now:
1140 assert(StackTop == 2 && "Must have two values live!");
1141
1142 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1143 /// in ST(1). In this case, emit an fxch.
1144 if (getStackEntry(0) == SecondFPRegOp) {
1145 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1146 moveToTop(FirstFPRegOp, MI);
1147 }
1148
1149 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1150 /// ST(1). Just remove both from our understanding of the stack and return.
1151 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1152 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1153 StackTop = 0;
1154}
1155
1156/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1157///
1158void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1159 MachineInstr &MI = *I;
1160 unsigned DestReg = getFPReg(MI.getOperand(0));
1161
1162 // Change from the pseudo instruction to the concrete instruction.
1163 MI.removeOperand(0); // Remove the explicit ST(0) operand
1164 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1165 MI.addOperand(
1166 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1167
1168 // Result gets pushed on the stack.
1169 pushReg(DestReg);
1170
1171 MI.dropDebugNumber();
1172}
1173
1174/// handleOneArgFP - fst <mem>, ST(0)
1175///
1176void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1177 MachineInstr &MI = *I;
1178 unsigned NumOps = MI.getDesc().getNumOperands();
1179 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1180 "Can only handle fst* & ftst instructions!");
1181
1182 // Is this the last use of the source register?
1183 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1184 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1185
1186 // FISTP64m is strange because there isn't a non-popping versions.
1187 // If we have one _and_ we don't want to pop the operand, duplicate the value
1188 // on the stack instead of moving it. This ensure that popping the value is
1189 // always ok.
1190 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1191 //
1192 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1193 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1194 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1196 MI.getOpcode() == X86::IST_Fp64m64 ||
1197 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1198 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1200 MI.getOpcode() == X86::IST_Fp64m80 ||
1201 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1202 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1204 MI.getOpcode() == X86::ST_FpP80m)) {
1205 duplicateToTop(Reg, ScratchFPReg, I);
1206 } else {
1207 moveToTop(Reg, I); // Move to the top of the stack...
1208 }
1209
1210 // Convert from the pseudo instruction to the concrete instruction.
1211 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1212 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1213 MI.addOperand(
1214 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1215
1216 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1217 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1218 MI.getOpcode() == X86::ST_FP80m) {
1219 if (StackTop == 0)
1220 report_fatal_error("Stack empty??");
1221 --StackTop;
1222 } else if (KillsSrc) { // Last use of operand?
1223 popStackAfter(I);
1224 }
1225
1226 MI.dropDebugNumber();
1227}
1228
1229
1230/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1231/// replace the value with a newly computed value. These instructions may have
1232/// non-fp operands after their FP operands.
1233///
1234/// Examples:
1235/// R1 = fchs R2
1236/// R1 = fadd R2, [mem]
1237///
1238void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1239 MachineInstr &MI = *I;
1240#ifndef NDEBUG
1241 unsigned NumOps = MI.getDesc().getNumOperands();
1242 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1243#endif
1244
1245 // Is this the last use of the source register?
1246 unsigned Reg = getFPReg(MI.getOperand(1));
1247 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1248
1249 if (KillsSrc) {
1250 // If this is the last use of the source register, just make sure it's on
1251 // the top of the stack.
1252 moveToTop(Reg, I);
1253 if (StackTop == 0)
1254 report_fatal_error("Stack cannot be empty!");
1255 --StackTop;
1256 pushReg(getFPReg(MI.getOperand(0)));
1257 } else {
1258 // If this is not the last use of the source register, _copy_ it to the top
1259 // of the stack.
1260 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1261 }
1262
1263 // Change from the pseudo instruction to the concrete instruction.
1264 MI.removeOperand(1); // Drop the source operand.
1265 MI.removeOperand(0); // Drop the destination operand.
1266 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1267 MI.dropDebugNumber();
1268}
1269
1270
1271//===----------------------------------------------------------------------===//
1272// Define tables of various ways to map pseudo instructions
1273//
1274
1275// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1276static const TableEntry ForwardST0Table[] = {
1277 { X86::ADD_Fp32 , X86::ADD_FST0r },
1278 { X86::ADD_Fp64 , X86::ADD_FST0r },
1279 { X86::ADD_Fp80 , X86::ADD_FST0r },
1280 { X86::DIV_Fp32 , X86::DIV_FST0r },
1281 { X86::DIV_Fp64 , X86::DIV_FST0r },
1282 { X86::DIV_Fp80 , X86::DIV_FST0r },
1283 { X86::MUL_Fp32 , X86::MUL_FST0r },
1284 { X86::MUL_Fp64 , X86::MUL_FST0r },
1285 { X86::MUL_Fp80 , X86::MUL_FST0r },
1286 { X86::SUB_Fp32 , X86::SUB_FST0r },
1287 { X86::SUB_Fp64 , X86::SUB_FST0r },
1288 { X86::SUB_Fp80 , X86::SUB_FST0r },
1289};
1290
1291// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1292static const TableEntry ReverseST0Table[] = {
1293 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1294 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1295 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1296 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1297 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1298 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1299 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1300 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1301 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1302 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1303 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1304 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1305};
1306
1307// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1308static const TableEntry ForwardSTiTable[] = {
1309 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1310 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1311 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1312 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1313 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1314 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1315 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1316 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1317 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1318 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1319 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1320 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1321};
1322
1323// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1324static const TableEntry ReverseSTiTable[] = {
1325 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1326 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1327 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1328 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1329 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1330 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1331 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1332 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1333 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1334 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1335 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1336 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1337};
1338
1339
1340/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1341/// instructions which need to be simplified and possibly transformed.
1342///
1343/// Result: ST(0) = fsub ST(0), ST(i)
1344/// ST(i) = fsub ST(0), ST(i)
1345/// ST(0) = fsubr ST(0), ST(i)
1346/// ST(i) = fsubr ST(0), ST(i)
1347///
1348void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1351 MachineInstr &MI = *I;
1352
1353 unsigned NumOperands = MI.getDesc().getNumOperands();
1354 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1355 unsigned Dest = getFPReg(MI.getOperand(0));
1356 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1357 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1358 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1359 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1360 const DebugLoc &dl = MI.getDebugLoc();
1361
1362 unsigned TOS = getStackEntry(0);
1363
1364 // One of our operands must be on the top of the stack. If neither is yet, we
1365 // need to move one.
1366 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1367 // We can choose to move either operand to the top of the stack. If one of
1368 // the operands is killed by this instruction, we want that one so that we
1369 // can update right on top of the old version.
1370 if (KillsOp0) {
1371 moveToTop(Op0, I); // Move dead operand to TOS.
1372 TOS = Op0;
1373 } else if (KillsOp1) {
1374 moveToTop(Op1, I);
1375 TOS = Op1;
1376 } else {
1377 // All of the operands are live after this instruction executes, so we
1378 // cannot update on top of any operand. Because of this, we must
1379 // duplicate one of the stack elements to the top. It doesn't matter
1380 // which one we pick.
1381 //
1382 duplicateToTop(Op0, Dest, I);
1383 Op0 = TOS = Dest;
1384 KillsOp0 = true;
1385 }
1386 } else if (!KillsOp0 && !KillsOp1) {
1387 // If we DO have one of our operands at the top of the stack, but we don't
1388 // have a dead operand, we must duplicate one of the operands to a new slot
1389 // on the stack.
1390 duplicateToTop(Op0, Dest, I);
1391 Op0 = TOS = Dest;
1392 KillsOp0 = true;
1393 }
1394
1395 // Now we know that one of our operands is on the top of the stack, and at
1396 // least one of our operands is killed by this instruction.
1397 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1398 "Stack conditions not set up right!");
1399
1400 // We decide which form to use based on what is on the top of the stack, and
1401 // which operand is killed by this instruction.
1402 ArrayRef<TableEntry> InstTable;
1403 bool isForward = TOS == Op0;
1404 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1405 if (updateST0) {
1406 if (isForward)
1407 InstTable = ForwardST0Table;
1408 else
1409 InstTable = ReverseST0Table;
1410 } else {
1411 if (isForward)
1412 InstTable = ForwardSTiTable;
1413 else
1414 InstTable = ReverseSTiTable;
1415 }
1416
1417 int Opcode = Lookup(InstTable, MI.getOpcode());
1418 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1419
1420 // NotTOS - The register which is not on the top of stack...
1421 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1422
1423 // Replace the old instruction with a new instruction
1424 MBB->remove(&*I++);
1425 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1426
1427 if (!MI.mayRaiseFPException())
1428 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1429
1430 // If both operands are killed, pop one off of the stack in addition to
1431 // overwriting the other one.
1432 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1433 assert(!updateST0 && "Should have updated other operand!");
1434 popStackAfter(I); // Pop the top of stack
1435 }
1436
1437 // Update stack information so that we know the destination register is now on
1438 // the stack.
1439 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1440 assert(UpdatedSlot < StackTop && Dest < 7);
1441 Stack[UpdatedSlot] = Dest;
1442 RegMap[Dest] = UpdatedSlot;
1443 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1444}
1445
1446/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1447/// register arguments and no explicit destinations.
1448///
1449void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1450 MachineInstr &MI = *I;
1451
1452 unsigned NumOperands = MI.getDesc().getNumOperands();
1453 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1454 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1455 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1456 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1457 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1458
1459 // Make sure the first operand is on the top of stack, the other one can be
1460 // anywhere.
1461 moveToTop(Op0, I);
1462
1463 // Change from the pseudo instruction to the concrete instruction.
1464 MI.getOperand(0).setReg(getSTReg(Op1));
1465 MI.removeOperand(1);
1466 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1467 MI.dropDebugNumber();
1468
1469 // If any of the operands are killed by this instruction, free them.
1470 if (KillsOp0) freeStackSlotAfter(I, Op0);
1471 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1472}
1473
1474/// handleCondMovFP - Handle two address conditional move instructions. These
1475/// instructions move a st(i) register to st(0) iff a condition is true. These
1476/// instructions require that the first operand is at the top of the stack, but
1477/// otherwise don't modify the stack at all.
1478void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1479 MachineInstr &MI = *I;
1480
1481 unsigned Op0 = getFPReg(MI.getOperand(0));
1482 unsigned Op1 = getFPReg(MI.getOperand(2));
1483 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1484
1485 // The first operand *must* be on the top of the stack.
1486 moveToTop(Op0, I);
1487
1488 // Change the second operand to the stack register that the operand is in.
1489 // Change from the pseudo instruction to the concrete instruction.
1490 MI.removeOperand(0);
1491 MI.removeOperand(1);
1492 MI.getOperand(0).setReg(getSTReg(Op1));
1493 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1494 MI.dropDebugNumber();
1495
1496 // If we kill the second operand, make sure to pop it from the stack.
1497 if (Op0 != Op1 && KillsOp1) {
1498 // Get this value off of the register stack.
1499 freeStackSlotAfter(I, Op1);
1500 }
1501}
1502
1503
1504/// handleSpecialFP - Handle special instructions which behave unlike other
1505/// floating point instructions. This is primarily intended for use by pseudo
1506/// instructions.
1507///
1508void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1509 MachineInstr &MI = *Inst;
1510
1511 if (MI.isCall()) {
1512 handleCall(Inst);
1513 return;
1514 }
1515
1516 if (MI.isReturn()) {
1517 handleReturn(Inst);
1518 return;
1519 }
1520
1521 switch (MI.getOpcode()) {
1522 default: llvm_unreachable("Unknown SpecialFP instruction!");
1523 case TargetOpcode::COPY: {
1524 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1525 const MachineOperand &MO1 = MI.getOperand(1);
1526 const MachineOperand &MO0 = MI.getOperand(0);
1527 bool KillsSrc = MI.killsRegister(MO1.getReg());
1528
1529 // FP <- FP copy.
1530 unsigned DstFP = getFPReg(MO0);
1531 unsigned SrcFP = getFPReg(MO1);
1532 assert(isLive(SrcFP) && "Cannot copy dead register");
1533 if (KillsSrc) {
1534 // If the input operand is killed, we can just change the owner of the
1535 // incoming stack slot into the result.
1536 unsigned Slot = getSlot(SrcFP);
1537 Stack[Slot] = DstFP;
1538 RegMap[DstFP] = Slot;
1539 } else {
1540 // For COPY we just duplicate the specified value to a new stack slot.
1541 // This could be made better, but would require substantial changes.
1542 duplicateToTop(SrcFP, DstFP, Inst);
1543 }
1544 break;
1545 }
1546
1547 case TargetOpcode::IMPLICIT_DEF: {
1548 // All FP registers must be explicitly defined, so load a 0 instead.
1549 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1550 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1551 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1552 pushReg(Reg);
1553 break;
1554 }
1555
1556 case TargetOpcode::INLINEASM:
1557 case TargetOpcode::INLINEASM_BR: {
1558 // The inline asm MachineInstr currently only *uses* FP registers for the
1559 // 'f' constraint. These should be turned into the current ST(x) register
1560 // in the machine instr.
1561 //
1562 // There are special rules for x87 inline assembly. The compiler must know
1563 // exactly how many registers are popped and pushed implicitly by the asm.
1564 // Otherwise it is not possible to restore the stack state after the inline
1565 // asm.
1566 //
1567 // There are 3 kinds of input operands:
1568 //
1569 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1570 // popped input operand must be in a fixed stack slot, and it is either
1571 // tied to an output operand, or in the clobber list. The MI has ST use
1572 // and def operands for these inputs.
1573 //
1574 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1575 // preserved by the inline asm. The fixed stack slots must be STn-STm
1576 // following the popped inputs. A fixed input operand cannot be tied to
1577 // an output or appear in the clobber list. The MI has ST use operands
1578 // and no defs for these inputs.
1579 //
1580 // 3. Preserved inputs. These inputs use the "f" constraint which is
1581 // represented as an FP register. The inline asm won't change these
1582 // stack slots.
1583 //
1584 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1585 // registers do not count as output operands. The inline asm changes the
1586 // stack as if it popped all the popped inputs and then pushed all the
1587 // output operands.
1588
1589 // Scan the assembly for ST registers used, defined and clobbered. We can
1590 // only tell clobbers from defs by looking at the asm descriptor.
1591 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1592 unsigned NumOps = 0;
1593 SmallSet<unsigned, 1> FRegIdx;
1594 unsigned RCID;
1595
1596 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1597 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1598 unsigned Flags = MI.getOperand(i).getImm();
1599 const InlineAsm::Flag F(Flags);
1600
1601 NumOps = F.getNumOperandRegisters();
1602 if (NumOps != 1)
1603 continue;
1604 const MachineOperand &MO = MI.getOperand(i + 1);
1605 if (!MO.isReg())
1606 continue;
1607 unsigned STReg = MO.getReg() - X86::FP0;
1608 if (STReg >= 8)
1609 continue;
1610
1611 // If the flag has a register class constraint, this must be an operand
1612 // with constraint "f". Record its index and continue.
1613 if (F.hasRegClassConstraint(RCID)) {
1614 FRegIdx.insert(i + 1);
1615 continue;
1616 }
1617
1618 switch (F.getKind()) {
1619 case InlineAsm::Kind::RegUse:
1620 STUses |= (1u << STReg);
1621 break;
1622 case InlineAsm::Kind::RegDef:
1623 case InlineAsm::Kind::RegDefEarlyClobber:
1624 STDefs |= (1u << STReg);
1625 break;
1626 case InlineAsm::Kind::Clobber:
1627 STClobbers |= (1u << STReg);
1628 break;
1629 default:
1630 break;
1631 }
1632 }
1633
1634 if (STUses && !isMask_32(STUses))
1635 MI.emitError("fixed input regs must be last on the x87 stack");
1636 unsigned NumSTUses = llvm::countr_one(STUses);
1637
1638 // Defs must be contiguous from the stack top. ST0-STn.
1639 if (STDefs && !isMask_32(STDefs)) {
1640 MI.emitError("output regs must be last on the x87 stack");
1641 STDefs = NextPowerOf2(STDefs) - 1;
1642 }
1643 unsigned NumSTDefs = llvm::countr_one(STDefs);
1644
1645 // So must the clobbered stack slots. ST0-STm, m >= n.
1646 if (STClobbers && !isMask_32(STDefs | STClobbers))
1647 MI.emitError("clobbers must be last on the x87 stack");
1648
1649 // Popped inputs are the ones that are also clobbered or defined.
1650 unsigned STPopped = STUses & (STDefs | STClobbers);
1651 if (STPopped && !isMask_32(STPopped))
1652 MI.emitError("implicitly popped regs must be last on the x87 stack");
1653 unsigned NumSTPopped = llvm::countr_one(STPopped);
1654
1655 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1656 << NumSTPopped << ", and defines " << NumSTDefs
1657 << " regs.\n");
1658
1659#ifndef NDEBUG
1660 // If any input operand uses constraint "f", all output register
1661 // constraints must be early-clobber defs.
1662 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1663 if (FRegIdx.count(I)) {
1664 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1665 "Operands with constraint \"f\" cannot overlap with defs");
1666 }
1667#endif
1668
1669 // Collect all FP registers (register operands with constraints "t", "u",
1670 // and "f") to kill afer the instruction.
1671 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1672 for (const MachineOperand &Op : MI.operands()) {
1673 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1674 continue;
1675 unsigned FPReg = getFPReg(Op);
1676
1677 // If we kill this operand, make sure to pop it from the stack after the
1678 // asm. We just remember it for now, and pop them all off at the end in
1679 // a batch.
1680 if (Op.isUse() && Op.isKill())
1681 FPKills |= 1U << FPReg;
1682 }
1683
1684 // Do not include registers that are implicitly popped by defs/clobbers.
1685 FPKills &= ~(STDefs | STClobbers);
1686
1687 // Now we can rearrange the live registers to match what was requested.
1688 unsigned char STUsesArray[8];
1689
1690 for (unsigned I = 0; I < NumSTUses; ++I)
1691 STUsesArray[I] = I;
1692
1693 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1694 LLVM_DEBUG({
1695 dbgs() << "Before asm: ";
1696 dumpStack();
1697 });
1698
1699 // With the stack layout fixed, rewrite the FP registers.
1700 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1701 MachineOperand &Op = MI.getOperand(i);
1702 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1703 continue;
1704
1705 unsigned FPReg = getFPReg(Op);
1706
1707 if (FRegIdx.count(i))
1708 // Operand with constraint "f".
1709 Op.setReg(getSTReg(FPReg));
1710 else
1711 // Operand with a single register class constraint ("t" or "u").
1712 Op.setReg(X86::ST0 + FPReg);
1713 }
1714
1715 // Simulate the inline asm popping its inputs and pushing its outputs.
1716 StackTop -= NumSTPopped;
1717
1718 for (unsigned i = 0; i < NumSTDefs; ++i)
1719 pushReg(NumSTDefs - i - 1);
1720
1721 // If this asm kills any FP registers (is the last use of them) we must
1722 // explicitly emit pop instructions for them. Do this now after the asm has
1723 // executed so that the ST(x) numbers are not off (which would happen if we
1724 // did this inline with operand rewriting).
1725 //
1726 // Note: this might be a non-optimal pop sequence. We might be able to do
1727 // better by trying to pop in stack order or something.
1728 while (FPKills) {
1729 unsigned FPReg = llvm::countr_zero(FPKills);
1730 if (isLive(FPReg))
1731 freeStackSlotAfter(Inst, FPReg);
1732 FPKills &= ~(1U << FPReg);
1733 }
1734
1735 // Don't delete the inline asm!
1736 return;
1737 }
1738 }
1739
1740 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1741
1742 // We want to leave I pointing to the previous instruction, but what if we
1743 // just erased the first instruction?
1744 if (Inst == MBB->begin()) {
1745 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1746 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1747 } else
1748 --Inst;
1749}
1750
1751void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1752 const TargetRegisterInfo &TRI =
1754 LivePhysRegs LPR(TRI);
1755
1756 LPR.addLiveOuts(MBB);
1757
1758 for (MachineInstr &MI : llvm::reverse(MBB)) {
1759 if (MI.isDebugInstr())
1760 continue;
1761
1762 std::bitset<8> Defs;
1764
1765 for (auto &MO : MI.operands()) {
1766 if (!MO.isReg())
1767 continue;
1768
1769 unsigned Reg = MO.getReg() - X86::FP0;
1770
1771 if (Reg >= 8)
1772 continue;
1773
1774 if (MO.isDef()) {
1775 Defs.set(Reg);
1776 if (!LPR.contains(MO.getReg()))
1777 MO.setIsDead();
1778 } else
1779 Uses.push_back(&MO);
1780 }
1781
1782 for (auto *MO : Uses)
1783 if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1784 MO->setIsKill();
1785
1786 LPR.stepBackward(MI);
1787 }
1788}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:94
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
Definition: X86BaseInfo.h:808
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
Definition: X86BaseInfo.h:790
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
Definition: X86BaseInfo.h:797
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
Definition: X86BaseInfo.h:792
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
Definition: X86BaseInfo.h:794
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
Definition: X86BaseInfo.h:804
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
Definition: X86BaseInfo.h:806
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
Definition: X86BaseInfo.h:801
@ AddrNumOperands
Definition: X86BaseInfo.h:36
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:240
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1950
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::pair< iterator, bool > insert(NodeRef N)