LLVM 19.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
80 MachineFunctionProperties::Property::NoVRegs);
81 }
82
83 StringRef getPassName() const override { return "X86 FP Stackifier"; }
84
85 private:
86 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
87
88 // Two CFG edges are related if they leave the same block, or enter the same
89 // block. The transitive closure of an edge under this relation is a
90 // LiveBundle. It represents a set of CFG edges where the live FP stack
91 // registers must be allocated identically in the x87 stack.
92 //
93 // A LiveBundle is usually all the edges leaving a block, or all the edges
94 // entering a block, but it can contain more edges if critical edges are
95 // present.
96 //
97 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
98 // but the exact mapping of FP registers to stack slots is fixed later.
99 struct LiveBundle {
100 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
101 unsigned Mask = 0;
102
103 // Number of pre-assigned live registers in FixStack. This is 0 when the
104 // stack order has not yet been fixed.
105 unsigned FixCount = 0;
106
107 // Assigned stack order for live-in registers.
108 // FixStack[i] == getStackEntry(i) for all i < FixCount.
109 unsigned char FixStack[8];
110
111 LiveBundle() = default;
112
113 // Have the live registers been assigned a stack order yet?
114 bool isFixed() const { return !Mask || FixCount; }
115 };
116
117 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
118 // with no live FP registers.
119 SmallVector<LiveBundle, 8> LiveBundles;
120
121 // The edge bundle analysis provides indices into the LiveBundles vector.
122 EdgeBundles *Bundles = nullptr;
123
124 // Return a bitmask of FP registers in block's live-in list.
125 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
126 unsigned Mask = 0;
128 I != MBB->livein_end(); ) {
129 MCPhysReg Reg = I->PhysReg;
130 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
132 Mask |= 1 << (Reg - X86::FP0);
133 if (RemoveFPs) {
134 I = MBB->removeLiveIn(I);
135 continue;
136 }
137 }
138 ++I;
139 }
140 return Mask;
141 }
142
143 // Partition all the CFG edges into LiveBundles.
144 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
145
146 MachineBasicBlock *MBB = nullptr; // Current basic block
147
148 // The hardware keeps track of how many FP registers are live, so we have
149 // to model that exactly. Usually, each live register corresponds to an
150 // FP<n> register, but when dealing with calls, returns, and inline
151 // assembly, it is sometimes necessary to have live scratch registers.
152 unsigned Stack[8]; // FP<n> Registers in each stack slot...
153 unsigned StackTop = 0; // The current top of the FP stack.
154
155 enum {
156 NumFPRegs = 8 // Including scratch pseudo-registers.
157 };
158
159 // For each live FP<n> register, point to its Stack[] entry.
160 // The first entries correspond to FP0-FP6, the rest are scratch registers
161 // used when we need slightly different live registers than what the
162 // register allocator thinks.
163 unsigned RegMap[NumFPRegs];
164
165 // Set up our stack model to match the incoming registers to MBB.
166 void setupBlockStack();
167
168 // Shuffle live registers to match the expectations of successor blocks.
169 void finishBlockStack();
170
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack() const {
173 dbgs() << "Stack contents:";
174 for (unsigned i = 0; i != StackTop; ++i) {
175 dbgs() << " FP" << Stack[i];
176 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
177 }
178 }
179#endif
180
181 /// getSlot - Return the stack slot number a particular register number is
182 /// in.
183 unsigned getSlot(unsigned RegNo) const {
184 assert(RegNo < NumFPRegs && "Regno out of range!");
185 return RegMap[RegNo];
186 }
187
188 /// isLive - Is RegNo currently live in the stack?
189 bool isLive(unsigned RegNo) const {
190 unsigned Slot = getSlot(RegNo);
191 return Slot < StackTop && Stack[Slot] == RegNo;
192 }
193
194 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
195 unsigned getStackEntry(unsigned STi) const {
196 if (STi >= StackTop)
197 report_fatal_error("Access past stack top!");
198 return Stack[StackTop-1-STi];
199 }
200
201 /// getSTReg - Return the X86::ST(i) register which contains the specified
202 /// FP<RegNo> register.
203 unsigned getSTReg(unsigned RegNo) const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
205 }
206
207 // pushReg - Push the specified FP<n> register onto the stack.
208 void pushReg(unsigned Reg) {
209 assert(Reg < NumFPRegs && "Register number out of range!");
210 if (StackTop >= 8)
211 report_fatal_error("Stack overflow!");
212 Stack[StackTop] = Reg;
213 RegMap[Reg] = StackTop++;
214 }
215
216 // popReg - Pop a register from the stack.
217 void popReg() {
218 if (StackTop == 0)
219 report_fatal_error("Cannot pop empty stack!");
220 RegMap[Stack[--StackTop]] = ~0; // Update state
221 }
222
223 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
224 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
225 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
226 if (isAtTop(RegNo)) return;
227
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
230
231 // Swap the slots the regs are in.
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
233
234 // Swap stack slot contents.
235 if (RegMap[RegOnTop] >= StackTop)
236 report_fatal_error("Access past stack top!");
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
238
239 // Emit an fxch to update the runtime processors version of the state.
240 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
241 ++NumFXCH;
242 }
243
244 void duplicateToTop(unsigned RegNo, unsigned AsReg,
246 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
247 unsigned STReg = getSTReg(RegNo);
248 pushReg(AsReg); // New register on top of stack
249
250 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
251 }
252
253 /// popStackAfter - Pop the current value off of the top of the FP stack
254 /// after the specified instruction.
255 void popStackAfter(MachineBasicBlock::iterator &I);
256
257 /// freeStackSlotAfter - Free the specified register from the register
258 /// stack, so that it is no longer in a register. If the register is
259 /// currently at the top of the stack, we just pop the current instruction,
260 /// otherwise we store the current top-of-stack into the specified slot,
261 /// then pop the top of stack.
262 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
263
264 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
265 /// instruction.
267 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
268
269 /// Adjust the live registers to be the set in Mask.
270 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
271
272 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
273 /// st(0), FP reg FixStack[1] is st(1) etc.
274 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
276
277 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
278
279 void handleCall(MachineBasicBlock::iterator &I);
280 void handleReturn(MachineBasicBlock::iterator &I);
281 void handleZeroArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFP(MachineBasicBlock::iterator &I);
283 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
284 void handleTwoArgFP(MachineBasicBlock::iterator &I);
285 void handleCompareFP(MachineBasicBlock::iterator &I);
286 void handleCondMovFP(MachineBasicBlock::iterator &I);
287 void handleSpecialFP(MachineBasicBlock::iterator &I);
288
289 // Check if a COPY instruction is using FP registers.
290 static bool isFPCopy(MachineInstr &MI) {
291 Register DstReg = MI.getOperand(0).getReg();
292 Register SrcReg = MI.getOperand(1).getReg();
293
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
296 }
297
298 void setKillFlags(MachineBasicBlock &MBB) const;
299 };
300}
301
302char FPS::ID = 0;
303
304INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
305 false, false)
309
311
312/// getFPReg - Return the X86::FPx register number for the specified operand.
313/// For example, this returns 3 for X86::FP3.
314static unsigned getFPReg(const MachineOperand &MO) {
315 assert(MO.isReg() && "Expected an FP register!");
316 Register Reg = MO.getReg();
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
318 return Reg - X86::FP0;
319}
320
321/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
322/// register references into FP stack references.
323///
324bool FPS::runOnMachineFunction(MachineFunction &MF) {
325 // We only need to run this pass if there are any FP registers used in this
326 // function. If it is all integer, there is nothing for us to do!
327 bool FPIsUsed = false;
328
329 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
330 const MachineRegisterInfo &MRI = MF.getRegInfo();
331 for (unsigned i = 0; i <= 6; ++i)
332 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
333 FPIsUsed = true;
334 break;
335 }
336
337 // Early exit.
338 if (!FPIsUsed) return false;
339
340 Bundles = &getAnalysis<EdgeBundles>();
342
343 // Prepare cross-MBB liveness.
344 bundleCFGRecomputeKillFlags(MF);
345
346 StackTop = 0;
347
348 // Process the function in depth first order so that we process at least one
349 // of the predecessors for every reachable block in the function.
351 MachineBasicBlock *Entry = &MF.front();
352
353 LiveBundle &Bundle =
354 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
355
356 // In regcall convention, some FP registers may not be passed through
357 // the stack, so they will need to be assigned to the stack first
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
359 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
360 // In the register calling convention, up to one FP argument could be
361 // saved in the first FP register.
362 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
363 // that the FP registers contain arguments.
364 // The actual value is passed in FP0.
365 // Here we fix the stack and mark FP0 as pre-assigned register.
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
368 Bundle.FixCount = 1;
369 Bundle.FixStack[0] = 0;
370 }
371
372 bool Changed = false;
373 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
374 Changed |= processBasicBlock(MF, *BB);
375
376 // Process any unreachable blocks in arbitrary order now.
377 if (MF.size() != Processed.size())
378 for (MachineBasicBlock &BB : MF)
379 if (Processed.insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
381
382 LiveBundles.clear();
383
384 return Changed;
385}
386
387/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
388/// live-out sets for the FP registers. Consistent means that the set of
389/// registers live-out from a block is identical to the live-in set of all
390/// successors. This is not enforced by the normal live-in lists since
391/// registers may be implicitly defined, or not used by all successors.
392void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
393 assert(LiveBundles.empty() && "Stale data in LiveBundles");
394 LiveBundles.resize(Bundles->getNumBundles());
395
396 // Gather the actual live-in masks for all MBBs.
397 for (MachineBasicBlock &MBB : MF) {
398 setKillFlags(MBB);
399
400 const unsigned Mask = calcLiveInMask(&MBB, false);
401 if (!Mask)
402 continue;
403 // Update MBB ingoing bundle mask.
404 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
405 }
406}
407
408/// processBasicBlock - Loop over all of the instructions in the basic block,
409/// transforming FP instructions into their stack form.
410///
411bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
412 bool Changed = false;
413 MBB = &BB;
414
415 setupBlockStack();
416
417 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
418 MachineInstr &MI = *I;
419 uint64_t Flags = MI.getDesc().TSFlags;
420
421 unsigned FPInstClass = Flags & X86II::FPTypeMask;
422 if (MI.isInlineAsm())
423 FPInstClass = X86II::SpecialFP;
424
425 if (MI.isCopy() && isFPCopy(MI))
426 FPInstClass = X86II::SpecialFP;
427
428 if (MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
430 FPInstClass = X86II::SpecialFP;
431
432 if (MI.isCall())
433 FPInstClass = X86II::SpecialFP;
434
435 if (FPInstClass == X86II::NotFP)
436 continue; // Efficiently ignore non-fp insts!
437
438 MachineInstr *PrevMI = nullptr;
439 if (I != BB.begin())
440 PrevMI = &*std::prev(I);
441
442 ++NumFP; // Keep track of # of pseudo instrs
443 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
444
445 // Get dead variables list now because the MI pointer may be deleted as part
446 // of processing!
448 for (const MachineOperand &MO : MI.operands())
449 if (MO.isReg() && MO.isDead())
450 DeadRegs.push_back(MO.getReg());
451
452 switch (FPInstClass) {
453 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
454 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
455 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
456 case X86II::TwoArgFP: handleTwoArgFP(I); break;
457 case X86II::CompareFP: handleCompareFP(I); break;
458 case X86II::CondMovFP: handleCondMovFP(I); break;
459 case X86II::SpecialFP: handleSpecialFP(I); break;
460 default: llvm_unreachable("Unknown FP Type!");
461 }
462
463 // Check to see if any of the values defined by this instruction are dead
464 // after definition. If so, pop them.
465 for (unsigned Reg : DeadRegs) {
466 // Check if Reg is live on the stack. An inline-asm register operand that
467 // is in the clobber list and marked dead might not be live on the stack.
468 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
469 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
470 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
471 freeStackSlotAfter(I, Reg-X86::FP0);
472 }
473 }
474
475 // Print out all of the instructions expanded to if -debug
476 LLVM_DEBUG({
477 MachineBasicBlock::iterator PrevI = PrevMI;
478 if (I == PrevI) {
479 dbgs() << "Just deleted pseudo instruction\n";
480 } else {
482 // Rewind to first instruction newly inserted.
483 while (Start != BB.begin() && std::prev(Start) != PrevI)
484 --Start;
485 dbgs() << "Inserted instructions:\n\t";
486 Start->print(dbgs());
487 while (++Start != std::next(I)) {
488 }
489 }
490 dumpStack();
491 });
492 (void)PrevMI;
493
494 Changed = true;
495 }
496
497 finishBlockStack();
498
499 return Changed;
500}
501
502/// setupBlockStack - Use the live bundles to set up our model of the stack
503/// to match predecessors' live out stack.
504void FPS::setupBlockStack() {
505 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
506 << " derived from " << MBB->getName() << ".\n");
507 StackTop = 0;
508 // Get the live-in bundle for MBB.
509 const LiveBundle &Bundle =
510 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
511
512 if (!Bundle.Mask) {
513 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
514 return;
515 }
516
517 // Depth-first iteration should ensure that we always have an assigned stack.
518 assert(Bundle.isFixed() && "Reached block before any predecessors");
519
520 // Push the fixed live-in registers.
521 for (unsigned i = Bundle.FixCount; i > 0; --i) {
522 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
523 << unsigned(Bundle.FixStack[i - 1]) << '\n');
524 pushReg(Bundle.FixStack[i-1]);
525 }
526
527 // Kill off unwanted live-ins. This can happen with a critical edge.
528 // FIXME: We could keep these live registers around as zombies. They may need
529 // to be revived at the end of a short block. It might save a few instrs.
530 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
531 adjustLiveRegs(Mask, MBB->begin());
532 LLVM_DEBUG(MBB->dump());
533}
534
535/// finishBlockStack - Revive live-outs that are implicitly defined out of
536/// MBB. Shuffle live registers to match the expected fixed stack of any
537/// predecessors, and ensure that all predecessors are expecting the same
538/// stack.
539void FPS::finishBlockStack() {
540 // The RET handling below takes care of return blocks for us.
541 if (MBB->succ_empty())
542 return;
543
544 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
545 << " derived from " << MBB->getName() << ".\n");
546
547 // Get MBB's live-out bundle.
548 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
549 LiveBundle &Bundle = LiveBundles[BundleIdx];
550
551 // We may need to kill and define some registers to match successors.
552 // FIXME: This can probably be combined with the shuffle below.
554 adjustLiveRegs(Bundle.Mask, Term);
555
556 if (!Bundle.Mask) {
557 LLVM_DEBUG(dbgs() << "No live-outs.\n");
558 return;
559 }
560
561 // Has the stack order been fixed yet?
562 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
563 if (Bundle.isFixed()) {
564 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
565 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
566 } else {
567 // Not fixed yet, we get to choose.
568 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
569 Bundle.FixCount = StackTop;
570 for (unsigned i = 0; i < StackTop; ++i)
571 Bundle.FixStack[i] = getStackEntry(i);
572 }
573}
574
575
576//===----------------------------------------------------------------------===//
577// Efficient Lookup Table Support
578//===----------------------------------------------------------------------===//
579
580namespace {
581 struct TableEntry {
582 uint16_t from;
583 uint16_t to;
584 bool operator<(const TableEntry &TE) const { return from < TE.from; }
585 friend bool operator<(const TableEntry &TE, unsigned V) {
586 return TE.from < V;
587 }
588 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
589 const TableEntry &TE) {
590 return V < TE.from;
591 }
592 };
593}
594
595static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
596 const TableEntry *I = llvm::lower_bound(Table, Opcode);
597 if (I != Table.end() && I->from == Opcode)
598 return I->to;
599 return -1;
600}
601
602#ifdef NDEBUG
603#define ASSERT_SORTED(TABLE)
604#else
605#define ASSERT_SORTED(TABLE) \
606 { \
607 static std::atomic<bool> TABLE##Checked(false); \
608 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
609 assert(is_sorted(TABLE) && \
610 "All lookup tables must be sorted for efficient access!"); \
611 TABLE##Checked.store(true, std::memory_order_relaxed); \
612 } \
613 }
614#endif
615
616//===----------------------------------------------------------------------===//
617// Register File -> Register Stack Mapping Methods
618//===----------------------------------------------------------------------===//
619
620// OpcodeTable - Sorted map of register instructions to their stack version.
621// The first element is an register file pseudo instruction, the second is the
622// concrete X86 instruction which uses the register stack.
623//
624static const TableEntry OpcodeTable[] = {
625 { X86::ABS_Fp32 , X86::ABS_F },
626 { X86::ABS_Fp64 , X86::ABS_F },
627 { X86::ABS_Fp80 , X86::ABS_F },
628 { X86::ADD_Fp32m , X86::ADD_F32m },
629 { X86::ADD_Fp64m , X86::ADD_F64m },
630 { X86::ADD_Fp64m32 , X86::ADD_F32m },
631 { X86::ADD_Fp80m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m64 , X86::ADD_F64m },
633 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639 { X86::CHS_Fp32 , X86::CHS_F },
640 { X86::CHS_Fp64 , X86::CHS_F },
641 { X86::CHS_Fp80 , X86::CHS_F },
642 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645 { X86::CMOVB_Fp32 , X86::CMOVB_F },
646 { X86::CMOVB_Fp64 , X86::CMOVB_F },
647 { X86::CMOVB_Fp80 , X86::CMOVB_F },
648 { X86::CMOVE_Fp32 , X86::CMOVE_F },
649 { X86::CMOVE_Fp64 , X86::CMOVE_F },
650 { X86::CMOVE_Fp80 , X86::CMOVE_F },
651 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663 { X86::CMOVP_Fp32 , X86::CMOVP_F },
664 { X86::CMOVP_Fp64 , X86::CMOVP_F },
665 { X86::CMOVP_Fp80 , X86::CMOVP_F },
666 { X86::COM_FpIr32 , X86::COM_FIr },
667 { X86::COM_FpIr64 , X86::COM_FIr },
668 { X86::COM_FpIr80 , X86::COM_FIr },
669 { X86::COM_Fpr32 , X86::COM_FST0r },
670 { X86::COM_Fpr64 , X86::COM_FST0r },
671 { X86::COM_Fpr80 , X86::COM_FST0r },
672 { X86::DIVR_Fp32m , X86::DIVR_F32m },
673 { X86::DIVR_Fp64m , X86::DIVR_F64m },
674 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
675 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
677 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
678 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
680 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
681 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
683 { X86::DIV_Fp32m , X86::DIV_F32m },
684 { X86::DIV_Fp64m , X86::DIV_F64m },
685 { X86::DIV_Fp64m32 , X86::DIV_F32m },
686 { X86::DIV_Fp80m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m64 , X86::DIV_F64m },
688 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
689 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
691 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
692 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
694 { X86::ILD_Fp16m32 , X86::ILD_F16m },
695 { X86::ILD_Fp16m64 , X86::ILD_F16m },
696 { X86::ILD_Fp16m80 , X86::ILD_F16m },
697 { X86::ILD_Fp32m32 , X86::ILD_F32m },
698 { X86::ILD_Fp32m64 , X86::ILD_F32m },
699 { X86::ILD_Fp32m80 , X86::ILD_F32m },
700 { X86::ILD_Fp64m32 , X86::ILD_F64m },
701 { X86::ILD_Fp64m64 , X86::ILD_F64m },
702 { X86::ILD_Fp64m80 , X86::ILD_F64m },
703 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
704 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
707 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
710 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
712 { X86::IST_Fp16m32 , X86::IST_F16m },
713 { X86::IST_Fp16m64 , X86::IST_F16m },
714 { X86::IST_Fp16m80 , X86::IST_F16m },
715 { X86::IST_Fp32m32 , X86::IST_F32m },
716 { X86::IST_Fp32m64 , X86::IST_F32m },
717 { X86::IST_Fp32m80 , X86::IST_F32m },
718 { X86::IST_Fp64m32 , X86::IST_FP64m },
719 { X86::IST_Fp64m64 , X86::IST_FP64m },
720 { X86::IST_Fp64m80 , X86::IST_FP64m },
721 { X86::LD_Fp032 , X86::LD_F0 },
722 { X86::LD_Fp064 , X86::LD_F0 },
723 { X86::LD_Fp080 , X86::LD_F0 },
724 { X86::LD_Fp132 , X86::LD_F1 },
725 { X86::LD_Fp164 , X86::LD_F1 },
726 { X86::LD_Fp180 , X86::LD_F1 },
727 { X86::LD_Fp32m , X86::LD_F32m },
728 { X86::LD_Fp32m64 , X86::LD_F32m },
729 { X86::LD_Fp32m80 , X86::LD_F32m },
730 { X86::LD_Fp64m , X86::LD_F64m },
731 { X86::LD_Fp64m80 , X86::LD_F64m },
732 { X86::LD_Fp80m , X86::LD_F80m },
733 { X86::MUL_Fp32m , X86::MUL_F32m },
734 { X86::MUL_Fp64m , X86::MUL_F64m },
735 { X86::MUL_Fp64m32 , X86::MUL_F32m },
736 { X86::MUL_Fp80m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m64 , X86::MUL_F64m },
738 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
739 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
741 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
742 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
744 { X86::SQRT_Fp32 , X86::SQRT_F },
745 { X86::SQRT_Fp64 , X86::SQRT_F },
746 { X86::SQRT_Fp80 , X86::SQRT_F },
747 { X86::ST_Fp32m , X86::ST_F32m },
748 { X86::ST_Fp64m , X86::ST_F64m },
749 { X86::ST_Fp64m32 , X86::ST_F32m },
750 { X86::ST_Fp80m32 , X86::ST_F32m },
751 { X86::ST_Fp80m64 , X86::ST_F64m },
752 { X86::ST_FpP80m , X86::ST_FP80m },
753 { X86::SUBR_Fp32m , X86::SUBR_F32m },
754 { X86::SUBR_Fp64m , X86::SUBR_F64m },
755 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764 { X86::SUB_Fp32m , X86::SUB_F32m },
765 { X86::SUB_Fp64m , X86::SUB_F64m },
766 { X86::SUB_Fp64m32 , X86::SUB_F32m },
767 { X86::SUB_Fp80m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m64 , X86::SUB_F64m },
769 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775 { X86::TST_Fp32 , X86::TST_F },
776 { X86::TST_Fp64 , X86::TST_F },
777 { X86::TST_Fp80 , X86::TST_F },
778 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
784 { X86::XAM_Fp32 , X86::XAM_F },
785 { X86::XAM_Fp64 , X86::XAM_F },
786 { X86::XAM_Fp80 , X86::XAM_F },
787};
788
789static unsigned getConcreteOpcode(unsigned Opcode) {
791 int Opc = Lookup(OpcodeTable, Opcode);
792 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
793 return Opc;
794}
795
796//===----------------------------------------------------------------------===//
797// Helper Methods
798//===----------------------------------------------------------------------===//
799
800// PopTable - Sorted map of instructions to their popping version. The first
801// element is an instruction, the second is the version which pops.
802//
803static const TableEntry PopTable[] = {
804 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
805
806 { X86::COMP_FST0r, X86::FCOMPP },
807 { X86::COM_FIr , X86::COM_FIPr },
808 { X86::COM_FST0r , X86::COMP_FST0r },
809
810 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
811 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
812
813 { X86::IST_F16m , X86::IST_FP16m },
814 { X86::IST_F32m , X86::IST_FP32m },
815
816 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
817
818 { X86::ST_F32m , X86::ST_FP32m },
819 { X86::ST_F64m , X86::ST_FP64m },
820 { X86::ST_Frr , X86::ST_FPrr },
821
822 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
823 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
824
825 { X86::UCOM_FIr , X86::UCOM_FIPr },
826
827 { X86::UCOM_FPr , X86::UCOM_FPPr },
828 { X86::UCOM_Fr , X86::UCOM_FPr },
829};
830
832 if (const MachineOperand *MO =
833 MI.findRegisterDefOperand(X86::FPSW, /*TRI=*/nullptr))
834 if (!MO->isDead())
835 return true;
836 return false;
837}
838
841 MachineBasicBlock &MBB = *I->getParent();
842 while (++I != MBB.end()) {
843 MachineInstr &MI = *I;
845 return I;
846 }
847 return MBB.end();
848}
849
850/// popStackAfter - Pop the current value off of the top of the FP stack after
851/// the specified instruction. This attempts to be sneaky and combine the pop
852/// into the instruction itself if possible. The iterator is left pointing to
853/// the last instruction, be it a new pop instruction inserted, or the old
854/// instruction if it was modified in place.
855///
856void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
857 MachineInstr &MI = *I;
858 const DebugLoc &dl = MI.getDebugLoc();
860
861 popReg();
862
863 // Check to see if there is a popping version of this instruction...
864 int Opcode = Lookup(PopTable, I->getOpcode());
865 if (Opcode != -1) {
866 I->setDesc(TII->get(Opcode));
867 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
868 I->removeOperand(0);
869 MI.dropDebugNumber();
870 } else { // Insert an explicit pop
871 // If this instruction sets FPSW, which is read in following instruction,
872 // insert pop after that reader.
874 MachineBasicBlock &MBB = *MI.getParent();
876 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, /*TRI=*/nullptr))
877 I = Next;
878 }
879 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
880 }
881}
882
883/// freeStackSlotAfter - Free the specified register from the register stack, so
884/// that it is no longer in a register. If the register is currently at the top
885/// of the stack, we just pop the current instruction, otherwise we store the
886/// current top-of-stack into the specified slot, then pop the top of stack.
887void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
888 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
889 popStackAfter(I);
890 return;
891 }
892
893 // Otherwise, store the top of stack into the dead slot, killing the operand
894 // without having to add in an explicit xchg then pop.
895 //
896 I = freeStackSlotBefore(++I, FPRegNo);
897}
898
899/// freeStackSlotBefore - Free the specified register without trying any
900/// folding.
902FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
903 unsigned STReg = getSTReg(FPRegNo);
904 unsigned OldSlot = getSlot(FPRegNo);
905 unsigned TopReg = Stack[StackTop-1];
906 Stack[OldSlot] = TopReg;
907 RegMap[TopReg] = OldSlot;
908 RegMap[FPRegNo] = ~0;
909 Stack[--StackTop] = ~0;
910 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
911 .addReg(STReg)
912 .getInstr();
913}
914
915/// adjustLiveRegs - Kill and revive registers such that exactly the FP
916/// registers with a bit in Mask are live.
917void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
918 unsigned Defs = Mask;
919 unsigned Kills = 0;
920 for (unsigned i = 0; i < StackTop; ++i) {
921 unsigned RegNo = Stack[i];
922 if (!(Defs & (1 << RegNo)))
923 // This register is live, but we don't want it.
924 Kills |= (1 << RegNo);
925 else
926 // We don't need to imp-def this live register.
927 Defs &= ~(1 << RegNo);
928 }
929 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
930
931 // Produce implicit-defs for free by using killed registers.
932 while (Kills && Defs) {
933 unsigned KReg = llvm::countr_zero(Kills);
934 unsigned DReg = llvm::countr_zero(Defs);
935 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
936 << "\n");
937 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
938 std::swap(RegMap[KReg], RegMap[DReg]);
939 Kills &= ~(1 << KReg);
940 Defs &= ~(1 << DReg);
941 }
942
943 // Kill registers by popping.
944 if (Kills && I != MBB->begin()) {
945 MachineBasicBlock::iterator I2 = std::prev(I);
946 while (StackTop) {
947 unsigned KReg = getStackEntry(0);
948 if (!(Kills & (1 << KReg)))
949 break;
950 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
951 popStackAfter(I2);
952 Kills &= ~(1 << KReg);
953 }
954 }
955
956 // Manually kill the rest.
957 while (Kills) {
958 unsigned KReg = llvm::countr_zero(Kills);
959 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
960 freeStackSlotBefore(I, KReg);
961 Kills &= ~(1 << KReg);
962 }
963
964 // Load zeros for all the imp-defs.
965 while(Defs) {
966 unsigned DReg = llvm::countr_zero(Defs);
967 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
968 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
969 pushReg(DReg);
970 Defs &= ~(1 << DReg);
971 }
972
973 // Now we should have the correct registers live.
974 LLVM_DEBUG(dumpStack());
975 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
976}
977
978/// shuffleStackTop - emit fxch instructions before I to shuffle the top
979/// FixCount entries into the order given by FixStack.
980/// FIXME: Is there a better algorithm than insertion sort?
981void FPS::shuffleStackTop(const unsigned char *FixStack,
982 unsigned FixCount,
984 // Move items into place, starting from the desired stack bottom.
985 while (FixCount--) {
986 // Old register at position FixCount.
987 unsigned OldReg = getStackEntry(FixCount);
988 // Desired register at position FixCount.
989 unsigned Reg = FixStack[FixCount];
990 if (Reg == OldReg)
991 continue;
992 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
993 moveToTop(Reg, I);
994 if (FixCount > 0)
995 moveToTop(OldReg, I);
996 }
997 LLVM_DEBUG(dumpStack());
998}
999
1000
1001//===----------------------------------------------------------------------===//
1002// Instruction transformation implementation
1003//===----------------------------------------------------------------------===//
1004
1005void FPS::handleCall(MachineBasicBlock::iterator &I) {
1006 MachineInstr &MI = *I;
1007 unsigned STReturns = 0;
1008
1009 bool ClobbersFPStack = false;
1010 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1011 MachineOperand &Op = MI.getOperand(i);
1012 // Check if this call clobbers the FP stack.
1013 // is sufficient.
1014 if (Op.isRegMask()) {
1015 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1016#ifndef NDEBUG
1017 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1018 for (unsigned i = 1; i != 8; ++i)
1019 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1020 "Inconsistent FP register clobber");
1021#endif
1022
1023 if (ClobbersFP0)
1024 ClobbersFPStack = true;
1025 }
1026
1027 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1028 continue;
1029
1030 assert(Op.isImplicit() && "Expected implicit def/use");
1031
1032 if (Op.isDef())
1033 STReturns |= 1 << getFPReg(Op);
1034
1035 // Remove the operand so that later passes don't see it.
1036 MI.removeOperand(i);
1037 --i;
1038 --e;
1039 }
1040
1041 // Most calls should have a regmask that clobbers the FP registers. If it
1042 // isn't present then the register allocator didn't spill the FP registers
1043 // so they are still on the stack.
1044 assert((ClobbersFPStack || STReturns == 0) &&
1045 "ST returns without FP stack clobber");
1046 if (!ClobbersFPStack)
1047 return;
1048
1049 unsigned N = llvm::countr_one(STReturns);
1050
1051 // FP registers used for function return must be consecutive starting at
1052 // FP0
1053 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1054
1055 // Reset the FP Stack - It is required because of possible leftovers from
1056 // passed arguments. The caller should assume that the FP stack is
1057 // returned empty (unless the callee returns values on FP stack).
1058 while (StackTop > 0)
1059 popReg();
1060
1061 for (unsigned I = 0; I < N; ++I)
1062 pushReg(N - I - 1);
1063
1064 // If this call has been modified, drop all variable values defined by it.
1065 // We can't track them once they've been stackified.
1066 if (STReturns)
1067 I->dropDebugNumber();
1068}
1069
1070/// If RET has an FP register use operand, pass the first one in ST(0) and
1071/// the second one in ST(1).
1072void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1073 MachineInstr &MI = *I;
1074
1075 // Find the register operands.
1076 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1077 unsigned LiveMask = 0;
1078
1079 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1080 MachineOperand &Op = MI.getOperand(i);
1081 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1082 continue;
1083 // FP Register uses must be kills unless there are two uses of the same
1084 // register, in which case only one will be a kill.
1085 assert(Op.isUse() &&
1086 (Op.isKill() || // Marked kill.
1087 getFPReg(Op) == FirstFPRegOp || // Second instance.
1088 MI.killsRegister(Op.getReg(),
1089 /*TRI=*/nullptr)) && // Later use is marked kill.
1090 "Ret only defs operands, and values aren't live beyond it");
1091
1092 if (FirstFPRegOp == ~0U)
1093 FirstFPRegOp = getFPReg(Op);
1094 else {
1095 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1096 SecondFPRegOp = getFPReg(Op);
1097 }
1098 LiveMask |= (1 << getFPReg(Op));
1099
1100 // Remove the operand so that later passes don't see it.
1101 MI.removeOperand(i);
1102 --i;
1103 --e;
1104 }
1105
1106 // We may have been carrying spurious live-ins, so make sure only the
1107 // returned registers are left live.
1108 adjustLiveRegs(LiveMask, MI);
1109 if (!LiveMask) return; // Quick check to see if any are possible.
1110
1111 // There are only four possibilities here:
1112 // 1) we are returning a single FP value. In this case, it has to be in
1113 // ST(0) already, so just declare success by removing the value from the
1114 // FP Stack.
1115 if (SecondFPRegOp == ~0U) {
1116 // Assert that the top of stack contains the right FP register.
1117 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1118 "Top of stack not the right register for RET!");
1119
1120 // Ok, everything is good, mark the value as not being on the stack
1121 // anymore so that our assertion about the stack being empty at end of
1122 // block doesn't fire.
1123 StackTop = 0;
1124 return;
1125 }
1126
1127 // Otherwise, we are returning two values:
1128 // 2) If returning the same value for both, we only have one thing in the FP
1129 // stack. Consider: RET FP1, FP1
1130 if (StackTop == 1) {
1131 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1132 "Stack misconfiguration for RET!");
1133
1134 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1135 // register to hold it.
1136 unsigned NewReg = ScratchFPReg;
1137 duplicateToTop(FirstFPRegOp, NewReg, MI);
1138 FirstFPRegOp = NewReg;
1139 }
1140
1141 /// Okay we know we have two different FPx operands now:
1142 assert(StackTop == 2 && "Must have two values live!");
1143
1144 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1145 /// in ST(1). In this case, emit an fxch.
1146 if (getStackEntry(0) == SecondFPRegOp) {
1147 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1148 moveToTop(FirstFPRegOp, MI);
1149 }
1150
1151 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1152 /// ST(1). Just remove both from our understanding of the stack and return.
1153 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1154 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1155 StackTop = 0;
1156}
1157
1158/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1159///
1160void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1161 MachineInstr &MI = *I;
1162 unsigned DestReg = getFPReg(MI.getOperand(0));
1163
1164 // Change from the pseudo instruction to the concrete instruction.
1165 MI.removeOperand(0); // Remove the explicit ST(0) operand
1166 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1167 MI.addOperand(
1168 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1169
1170 // Result gets pushed on the stack.
1171 pushReg(DestReg);
1172
1173 MI.dropDebugNumber();
1174}
1175
1176/// handleOneArgFP - fst <mem>, ST(0)
1177///
1178void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1179 MachineInstr &MI = *I;
1180 unsigned NumOps = MI.getDesc().getNumOperands();
1181 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1182 "Can only handle fst* & ftst instructions!");
1183
1184 // Is this the last use of the source register?
1185 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1186 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1187
1188 // FISTP64m is strange because there isn't a non-popping versions.
1189 // If we have one _and_ we don't want to pop the operand, duplicate the value
1190 // on the stack instead of moving it. This ensure that popping the value is
1191 // always ok.
1192 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1193 //
1194 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1197 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1198 MI.getOpcode() == X86::IST_Fp64m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1201 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1202 MI.getOpcode() == X86::IST_Fp64m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1205 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1206 MI.getOpcode() == X86::ST_FpP80m)) {
1207 duplicateToTop(Reg, ScratchFPReg, I);
1208 } else {
1209 moveToTop(Reg, I); // Move to the top of the stack...
1210 }
1211
1212 // Convert from the pseudo instruction to the concrete instruction.
1213 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1214 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1215 MI.addOperand(
1216 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1217
1218 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1219 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1220 MI.getOpcode() == X86::ST_FP80m) {
1221 if (StackTop == 0)
1222 report_fatal_error("Stack empty??");
1223 --StackTop;
1224 } else if (KillsSrc) { // Last use of operand?
1225 popStackAfter(I);
1226 }
1227
1228 MI.dropDebugNumber();
1229}
1230
1231
1232/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1233/// replace the value with a newly computed value. These instructions may have
1234/// non-fp operands after their FP operands.
1235///
1236/// Examples:
1237/// R1 = fchs R2
1238/// R1 = fadd R2, [mem]
1239///
1240void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1241 MachineInstr &MI = *I;
1242#ifndef NDEBUG
1243 unsigned NumOps = MI.getDesc().getNumOperands();
1244 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1245#endif
1246
1247 // Is this the last use of the source register?
1248 unsigned Reg = getFPReg(MI.getOperand(1));
1249 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1250
1251 if (KillsSrc) {
1252 // If this is the last use of the source register, just make sure it's on
1253 // the top of the stack.
1254 moveToTop(Reg, I);
1255 if (StackTop == 0)
1256 report_fatal_error("Stack cannot be empty!");
1257 --StackTop;
1258 pushReg(getFPReg(MI.getOperand(0)));
1259 } else {
1260 // If this is not the last use of the source register, _copy_ it to the top
1261 // of the stack.
1262 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1263 }
1264
1265 // Change from the pseudo instruction to the concrete instruction.
1266 MI.removeOperand(1); // Drop the source operand.
1267 MI.removeOperand(0); // Drop the destination operand.
1268 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1269 MI.dropDebugNumber();
1270}
1271
1272
1273//===----------------------------------------------------------------------===//
1274// Define tables of various ways to map pseudo instructions
1275//
1276
1277// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1278static const TableEntry ForwardST0Table[] = {
1279 { X86::ADD_Fp32 , X86::ADD_FST0r },
1280 { X86::ADD_Fp64 , X86::ADD_FST0r },
1281 { X86::ADD_Fp80 , X86::ADD_FST0r },
1282 { X86::DIV_Fp32 , X86::DIV_FST0r },
1283 { X86::DIV_Fp64 , X86::DIV_FST0r },
1284 { X86::DIV_Fp80 , X86::DIV_FST0r },
1285 { X86::MUL_Fp32 , X86::MUL_FST0r },
1286 { X86::MUL_Fp64 , X86::MUL_FST0r },
1287 { X86::MUL_Fp80 , X86::MUL_FST0r },
1288 { X86::SUB_Fp32 , X86::SUB_FST0r },
1289 { X86::SUB_Fp64 , X86::SUB_FST0r },
1290 { X86::SUB_Fp80 , X86::SUB_FST0r },
1291};
1292
1293// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1294static const TableEntry ReverseST0Table[] = {
1295 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1296 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1297 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1298 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1300 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1301 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1302 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1303 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1304 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1306 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1307};
1308
1309// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1310static const TableEntry ForwardSTiTable[] = {
1311 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1312 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1313 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1314 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1316 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1317 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1318 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1319 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1320 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1322 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1323};
1324
1325// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1326static const TableEntry ReverseSTiTable[] = {
1327 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1329 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1330 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1332 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1333 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1335 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1336 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1338 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1339};
1340
1341
1342/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1343/// instructions which need to be simplified and possibly transformed.
1344///
1345/// Result: ST(0) = fsub ST(0), ST(i)
1346/// ST(i) = fsub ST(0), ST(i)
1347/// ST(0) = fsubr ST(0), ST(i)
1348/// ST(i) = fsubr ST(0), ST(i)
1349///
1350void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1353 MachineInstr &MI = *I;
1354
1355 unsigned NumOperands = MI.getDesc().getNumOperands();
1356 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1357 unsigned Dest = getFPReg(MI.getOperand(0));
1358 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1359 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1360 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1361 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1362 const DebugLoc &dl = MI.getDebugLoc();
1363
1364 unsigned TOS = getStackEntry(0);
1365
1366 // One of our operands must be on the top of the stack. If neither is yet, we
1367 // need to move one.
1368 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1369 // We can choose to move either operand to the top of the stack. If one of
1370 // the operands is killed by this instruction, we want that one so that we
1371 // can update right on top of the old version.
1372 if (KillsOp0) {
1373 moveToTop(Op0, I); // Move dead operand to TOS.
1374 TOS = Op0;
1375 } else if (KillsOp1) {
1376 moveToTop(Op1, I);
1377 TOS = Op1;
1378 } else {
1379 // All of the operands are live after this instruction executes, so we
1380 // cannot update on top of any operand. Because of this, we must
1381 // duplicate one of the stack elements to the top. It doesn't matter
1382 // which one we pick.
1383 //
1384 duplicateToTop(Op0, Dest, I);
1385 Op0 = TOS = Dest;
1386 KillsOp0 = true;
1387 }
1388 } else if (!KillsOp0 && !KillsOp1) {
1389 // If we DO have one of our operands at the top of the stack, but we don't
1390 // have a dead operand, we must duplicate one of the operands to a new slot
1391 // on the stack.
1392 duplicateToTop(Op0, Dest, I);
1393 Op0 = TOS = Dest;
1394 KillsOp0 = true;
1395 }
1396
1397 // Now we know that one of our operands is on the top of the stack, and at
1398 // least one of our operands is killed by this instruction.
1399 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1400 "Stack conditions not set up right!");
1401
1402 // We decide which form to use based on what is on the top of the stack, and
1403 // which operand is killed by this instruction.
1404 ArrayRef<TableEntry> InstTable;
1405 bool isForward = TOS == Op0;
1406 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1407 if (updateST0) {
1408 if (isForward)
1409 InstTable = ForwardST0Table;
1410 else
1411 InstTable = ReverseST0Table;
1412 } else {
1413 if (isForward)
1414 InstTable = ForwardSTiTable;
1415 else
1416 InstTable = ReverseSTiTable;
1417 }
1418
1419 int Opcode = Lookup(InstTable, MI.getOpcode());
1420 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1421
1422 // NotTOS - The register which is not on the top of stack...
1423 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1424
1425 // Replace the old instruction with a new instruction
1426 MBB->remove(&*I++);
1427 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1428
1429 if (!MI.mayRaiseFPException())
1430 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1431
1432 // If both operands are killed, pop one off of the stack in addition to
1433 // overwriting the other one.
1434 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1435 assert(!updateST0 && "Should have updated other operand!");
1436 popStackAfter(I); // Pop the top of stack
1437 }
1438
1439 // Update stack information so that we know the destination register is now on
1440 // the stack.
1441 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1442 assert(UpdatedSlot < StackTop && Dest < 7);
1443 Stack[UpdatedSlot] = Dest;
1444 RegMap[Dest] = UpdatedSlot;
1445 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1446}
1447
1448/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1449/// register arguments and no explicit destinations.
1450///
1451void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1452 MachineInstr &MI = *I;
1453
1454 unsigned NumOperands = MI.getDesc().getNumOperands();
1455 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1456 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1457 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1458 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1459 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1460
1461 // Make sure the first operand is on the top of stack, the other one can be
1462 // anywhere.
1463 moveToTop(Op0, I);
1464
1465 // Change from the pseudo instruction to the concrete instruction.
1466 MI.getOperand(0).setReg(getSTReg(Op1));
1467 MI.removeOperand(1);
1468 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1469 MI.dropDebugNumber();
1470
1471 // If any of the operands are killed by this instruction, free them.
1472 if (KillsOp0) freeStackSlotAfter(I, Op0);
1473 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1474}
1475
1476/// handleCondMovFP - Handle two address conditional move instructions. These
1477/// instructions move a st(i) register to st(0) iff a condition is true. These
1478/// instructions require that the first operand is at the top of the stack, but
1479/// otherwise don't modify the stack at all.
1480void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1481 MachineInstr &MI = *I;
1482
1483 unsigned Op0 = getFPReg(MI.getOperand(0));
1484 unsigned Op1 = getFPReg(MI.getOperand(2));
1485 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1486
1487 // The first operand *must* be on the top of the stack.
1488 moveToTop(Op0, I);
1489
1490 // Change the second operand to the stack register that the operand is in.
1491 // Change from the pseudo instruction to the concrete instruction.
1492 MI.removeOperand(0);
1493 MI.removeOperand(1);
1494 MI.getOperand(0).setReg(getSTReg(Op1));
1495 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1496 MI.dropDebugNumber();
1497
1498 // If we kill the second operand, make sure to pop it from the stack.
1499 if (Op0 != Op1 && KillsOp1) {
1500 // Get this value off of the register stack.
1501 freeStackSlotAfter(I, Op1);
1502 }
1503}
1504
1505
1506/// handleSpecialFP - Handle special instructions which behave unlike other
1507/// floating point instructions. This is primarily intended for use by pseudo
1508/// instructions.
1509///
1510void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1511 MachineInstr &MI = *Inst;
1512
1513 if (MI.isCall()) {
1514 handleCall(Inst);
1515 return;
1516 }
1517
1518 if (MI.isReturn()) {
1519 handleReturn(Inst);
1520 return;
1521 }
1522
1523 switch (MI.getOpcode()) {
1524 default: llvm_unreachable("Unknown SpecialFP instruction!");
1525 case TargetOpcode::COPY: {
1526 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1527 const MachineOperand &MO1 = MI.getOperand(1);
1528 const MachineOperand &MO0 = MI.getOperand(0);
1529 bool KillsSrc = MI.killsRegister(MO1.getReg(), /*TRI=*/nullptr);
1530
1531 // FP <- FP copy.
1532 unsigned DstFP = getFPReg(MO0);
1533 unsigned SrcFP = getFPReg(MO1);
1534 assert(isLive(SrcFP) && "Cannot copy dead register");
1535 if (KillsSrc) {
1536 // If the input operand is killed, we can just change the owner of the
1537 // incoming stack slot into the result.
1538 unsigned Slot = getSlot(SrcFP);
1539 Stack[Slot] = DstFP;
1540 RegMap[DstFP] = Slot;
1541 } else {
1542 // For COPY we just duplicate the specified value to a new stack slot.
1543 // This could be made better, but would require substantial changes.
1544 duplicateToTop(SrcFP, DstFP, Inst);
1545 }
1546 break;
1547 }
1548
1549 case TargetOpcode::IMPLICIT_DEF: {
1550 // All FP registers must be explicitly defined, so load a 0 instead.
1551 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1552 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1553 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1554 pushReg(Reg);
1555 break;
1556 }
1557
1558 case TargetOpcode::INLINEASM:
1559 case TargetOpcode::INLINEASM_BR: {
1560 // The inline asm MachineInstr currently only *uses* FP registers for the
1561 // 'f' constraint. These should be turned into the current ST(x) register
1562 // in the machine instr.
1563 //
1564 // There are special rules for x87 inline assembly. The compiler must know
1565 // exactly how many registers are popped and pushed implicitly by the asm.
1566 // Otherwise it is not possible to restore the stack state after the inline
1567 // asm.
1568 //
1569 // There are 3 kinds of input operands:
1570 //
1571 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1572 // popped input operand must be in a fixed stack slot, and it is either
1573 // tied to an output operand, or in the clobber list. The MI has ST use
1574 // and def operands for these inputs.
1575 //
1576 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1577 // preserved by the inline asm. The fixed stack slots must be STn-STm
1578 // following the popped inputs. A fixed input operand cannot be tied to
1579 // an output or appear in the clobber list. The MI has ST use operands
1580 // and no defs for these inputs.
1581 //
1582 // 3. Preserved inputs. These inputs use the "f" constraint which is
1583 // represented as an FP register. The inline asm won't change these
1584 // stack slots.
1585 //
1586 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1587 // registers do not count as output operands. The inline asm changes the
1588 // stack as if it popped all the popped inputs and then pushed all the
1589 // output operands.
1590
1591 // Scan the assembly for ST registers used, defined and clobbered. We can
1592 // only tell clobbers from defs by looking at the asm descriptor.
1593 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1594 unsigned NumOps = 0;
1595 SmallSet<unsigned, 1> FRegIdx;
1596 unsigned RCID;
1597
1598 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1599 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1600 unsigned Flags = MI.getOperand(i).getImm();
1601 const InlineAsm::Flag F(Flags);
1602
1603 NumOps = F.getNumOperandRegisters();
1604 if (NumOps != 1)
1605 continue;
1606 const MachineOperand &MO = MI.getOperand(i + 1);
1607 if (!MO.isReg())
1608 continue;
1609 unsigned STReg = MO.getReg() - X86::FP0;
1610 if (STReg >= 8)
1611 continue;
1612
1613 // If the flag has a register class constraint, this must be an operand
1614 // with constraint "f". Record its index and continue.
1615 if (F.hasRegClassConstraint(RCID)) {
1616 FRegIdx.insert(i + 1);
1617 continue;
1618 }
1619
1620 switch (F.getKind()) {
1621 case InlineAsm::Kind::RegUse:
1622 STUses |= (1u << STReg);
1623 break;
1624 case InlineAsm::Kind::RegDef:
1625 case InlineAsm::Kind::RegDefEarlyClobber:
1626 STDefs |= (1u << STReg);
1627 break;
1628 case InlineAsm::Kind::Clobber:
1629 STClobbers |= (1u << STReg);
1630 break;
1631 default:
1632 break;
1633 }
1634 }
1635
1636 if (STUses && !isMask_32(STUses))
1637 MI.emitError("fixed input regs must be last on the x87 stack");
1638 unsigned NumSTUses = llvm::countr_one(STUses);
1639
1640 // Defs must be contiguous from the stack top. ST0-STn.
1641 if (STDefs && !isMask_32(STDefs)) {
1642 MI.emitError("output regs must be last on the x87 stack");
1643 STDefs = NextPowerOf2(STDefs) - 1;
1644 }
1645 unsigned NumSTDefs = llvm::countr_one(STDefs);
1646
1647 // So must the clobbered stack slots. ST0-STm, m >= n.
1648 if (STClobbers && !isMask_32(STDefs | STClobbers))
1649 MI.emitError("clobbers must be last on the x87 stack");
1650
1651 // Popped inputs are the ones that are also clobbered or defined.
1652 unsigned STPopped = STUses & (STDefs | STClobbers);
1653 if (STPopped && !isMask_32(STPopped))
1654 MI.emitError("implicitly popped regs must be last on the x87 stack");
1655 unsigned NumSTPopped = llvm::countr_one(STPopped);
1656
1657 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1658 << NumSTPopped << ", and defines " << NumSTDefs
1659 << " regs.\n");
1660
1661#ifndef NDEBUG
1662 // If any input operand uses constraint "f", all output register
1663 // constraints must be early-clobber defs.
1664 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1665 if (FRegIdx.count(I)) {
1666 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1667 "Operands with constraint \"f\" cannot overlap with defs");
1668 }
1669#endif
1670
1671 // Collect all FP registers (register operands with constraints "t", "u",
1672 // and "f") to kill afer the instruction.
1673 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1674 for (const MachineOperand &Op : MI.operands()) {
1675 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1676 continue;
1677 unsigned FPReg = getFPReg(Op);
1678
1679 // If we kill this operand, make sure to pop it from the stack after the
1680 // asm. We just remember it for now, and pop them all off at the end in
1681 // a batch.
1682 if (Op.isUse() && Op.isKill())
1683 FPKills |= 1U << FPReg;
1684 }
1685
1686 // Do not include registers that are implicitly popped by defs/clobbers.
1687 FPKills &= ~(STDefs | STClobbers);
1688
1689 // Now we can rearrange the live registers to match what was requested.
1690 unsigned char STUsesArray[8];
1691
1692 for (unsigned I = 0; I < NumSTUses; ++I)
1693 STUsesArray[I] = I;
1694
1695 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1696 LLVM_DEBUG({
1697 dbgs() << "Before asm: ";
1698 dumpStack();
1699 });
1700
1701 // With the stack layout fixed, rewrite the FP registers.
1702 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1703 MachineOperand &Op = MI.getOperand(i);
1704 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1705 continue;
1706
1707 unsigned FPReg = getFPReg(Op);
1708
1709 if (FRegIdx.count(i))
1710 // Operand with constraint "f".
1711 Op.setReg(getSTReg(FPReg));
1712 else
1713 // Operand with a single register class constraint ("t" or "u").
1714 Op.setReg(X86::ST0 + FPReg);
1715 }
1716
1717 // Simulate the inline asm popping its inputs and pushing its outputs.
1718 StackTop -= NumSTPopped;
1719
1720 for (unsigned i = 0; i < NumSTDefs; ++i)
1721 pushReg(NumSTDefs - i - 1);
1722
1723 // If this asm kills any FP registers (is the last use of them) we must
1724 // explicitly emit pop instructions for them. Do this now after the asm has
1725 // executed so that the ST(x) numbers are not off (which would happen if we
1726 // did this inline with operand rewriting).
1727 //
1728 // Note: this might be a non-optimal pop sequence. We might be able to do
1729 // better by trying to pop in stack order or something.
1730 while (FPKills) {
1731 unsigned FPReg = llvm::countr_zero(FPKills);
1732 if (isLive(FPReg))
1733 freeStackSlotAfter(Inst, FPReg);
1734 FPKills &= ~(1U << FPReg);
1735 }
1736
1737 // Don't delete the inline asm!
1738 return;
1739 }
1740 }
1741
1742 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1743
1744 // We want to leave I pointing to the previous instruction, but what if we
1745 // just erased the first instruction?
1746 if (Inst == MBB->begin()) {
1747 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1748 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1749 } else
1750 --Inst;
1751}
1752
1753void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1754 const TargetRegisterInfo &TRI =
1756 LiveRegUnits LPR(TRI);
1757
1758 LPR.addLiveOuts(MBB);
1759
1760 for (MachineInstr &MI : llvm::reverse(MBB)) {
1761 if (MI.isDebugInstr())
1762 continue;
1763
1764 std::bitset<8> Defs;
1766
1767 for (auto &MO : MI.operands()) {
1768 if (!MO.isReg())
1769 continue;
1770
1771 unsigned Reg = MO.getReg() - X86::FP0;
1772
1773 if (Reg >= 8)
1774 continue;
1775
1776 if (MO.isDef()) {
1777 Defs.set(Reg);
1778 if (LPR.available(MO.getReg()))
1779 MO.setIsDead();
1780 } else
1781 Uses.push_back(&MO);
1782 }
1783
1784 for (auto *MO : Uses)
1785 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1786 MO->setIsKill();
1787
1788 LPR.stepBackward(MI);
1789 }
1790}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:94
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
Definition: X86BaseInfo.h:816
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
Definition: X86BaseInfo.h:798
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
Definition: X86BaseInfo.h:805
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
Definition: X86BaseInfo.h:800
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
Definition: X86BaseInfo.h:802
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
Definition: X86BaseInfo.h:812
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
Definition: X86BaseInfo.h:814
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
Definition: X86BaseInfo.h:809
@ AddrNumOperands
Definition: X86BaseInfo.h:36
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:251
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::pair< iterator, bool > insert(NodeRef N)