LLVM  3.7.0
X86FloatingPoint.cpp
Go to the documentation of this file.
1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the pass which converts floating point instructions from
11 // pseudo registers into register stack instructions. This pass uses live
12 // variable information to indicate where the FPn registers are used and their
13 // lifetimes.
14 //
15 // The x87 hardware tracks liveness of the stack registers, so it is necessary
16 // to implement exact liveness tracking between basic blocks. The CFG edges are
17 // partitioned into bundles where the same FP registers must be live in
18 // identical stack positions. Instructions are inserted at the end of each basic
19 // block to rearrange the live registers to match the outgoing bundle.
20 //
21 // This approach avoids splitting critical edges at the potential cost of more
22 // live register shuffling instructions when critical edges are present.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "X86.h"
27 #include "X86InstrInfo.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
39 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/Support/Debug.h"
47 #include <algorithm>
48 #include <bitset>
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "x86-codegen"
52 
53 STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54 STATISTIC(NumFP , "Number of floating point instructions");
55 
56 namespace {
57  const unsigned ScratchFPReg = 7;
58 
59  struct FPS : public MachineFunctionPass {
60  static char ID;
61  FPS() : MachineFunctionPass(ID) {
63  // This is really only to keep valgrind quiet.
64  // The logic in isLive() is too much for it.
65  memset(Stack, 0, sizeof(Stack));
66  memset(RegMap, 0, sizeof(RegMap));
67  }
68 
69  void getAnalysisUsage(AnalysisUsage &AU) const override {
70  AU.setPreservesCFG();
75  }
76 
77  bool runOnMachineFunction(MachineFunction &MF) override;
78 
79  const char *getPassName() const override { return "X86 FP Stackifier"; }
80 
81  private:
82  const TargetInstrInfo *TII; // Machine instruction info.
83 
84  // Two CFG edges are related if they leave the same block, or enter the same
85  // block. The transitive closure of an edge under this relation is a
86  // LiveBundle. It represents a set of CFG edges where the live FP stack
87  // registers must be allocated identically in the x87 stack.
88  //
89  // A LiveBundle is usually all the edges leaving a block, or all the edges
90  // entering a block, but it can contain more edges if critical edges are
91  // present.
92  //
93  // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
94  // but the exact mapping of FP registers to stack slots is fixed later.
95  struct LiveBundle {
96  // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
97  unsigned Mask;
98 
99  // Number of pre-assigned live registers in FixStack. This is 0 when the
100  // stack order has not yet been fixed.
101  unsigned FixCount;
102 
103  // Assigned stack order for live-in registers.
104  // FixStack[i] == getStackEntry(i) for all i < FixCount.
105  unsigned char FixStack[8];
106 
107  LiveBundle() : Mask(0), FixCount(0) {}
108 
109  // Have the live registers been assigned a stack order yet?
110  bool isFixed() const { return !Mask || FixCount; }
111  };
112 
113  // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
114  // with no live FP registers.
115  SmallVector<LiveBundle, 8> LiveBundles;
116 
117  // The edge bundle analysis provides indices into the LiveBundles vector.
118  EdgeBundles *Bundles;
119 
120  // Return a bitmask of FP registers in block's live-in list.
121  static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
122  unsigned Mask = 0;
124  E = MBB->livein_end(); I != E; ++I) {
125  unsigned Reg = *I;
126  if (Reg < X86::FP0 || Reg > X86::FP6)
127  continue;
128  Mask |= 1 << (Reg - X86::FP0);
129  }
130  return Mask;
131  }
132 
133  // Partition all the CFG edges into LiveBundles.
134  void bundleCFG(MachineFunction &MF);
135 
136  MachineBasicBlock *MBB; // Current basic block
137 
138  // The hardware keeps track of how many FP registers are live, so we have
139  // to model that exactly. Usually, each live register corresponds to an
140  // FP<n> register, but when dealing with calls, returns, and inline
141  // assembly, it is sometimes necessary to have live scratch registers.
142  unsigned Stack[8]; // FP<n> Registers in each stack slot...
143  unsigned StackTop; // The current top of the FP stack.
144 
145  enum {
146  NumFPRegs = 8 // Including scratch pseudo-registers.
147  };
148 
149  // For each live FP<n> register, point to its Stack[] entry.
150  // The first entries correspond to FP0-FP6, the rest are scratch registers
151  // used when we need slightly different live registers than what the
152  // register allocator thinks.
153  unsigned RegMap[NumFPRegs];
154 
155  // Set up our stack model to match the incoming registers to MBB.
156  void setupBlockStack();
157 
158  // Shuffle live registers to match the expectations of successor blocks.
159  void finishBlockStack();
160 
161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162  void dumpStack() const {
163  dbgs() << "Stack contents:";
164  for (unsigned i = 0; i != StackTop; ++i) {
165  dbgs() << " FP" << Stack[i];
166  assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
167  }
168  }
169 #endif
170 
171  /// getSlot - Return the stack slot number a particular register number is
172  /// in.
173  unsigned getSlot(unsigned RegNo) const {
174  assert(RegNo < NumFPRegs && "Regno out of range!");
175  return RegMap[RegNo];
176  }
177 
178  /// isLive - Is RegNo currently live in the stack?
179  bool isLive(unsigned RegNo) const {
180  unsigned Slot = getSlot(RegNo);
181  return Slot < StackTop && Stack[Slot] == RegNo;
182  }
183 
184  /// getStackEntry - Return the X86::FP<n> register in register ST(i).
185  unsigned getStackEntry(unsigned STi) const {
186  if (STi >= StackTop)
187  report_fatal_error("Access past stack top!");
188  return Stack[StackTop-1-STi];
189  }
190 
191  /// getSTReg - Return the X86::ST(i) register which contains the specified
192  /// FP<RegNo> register.
193  unsigned getSTReg(unsigned RegNo) const {
194  return StackTop - 1 - getSlot(RegNo) + X86::ST0;
195  }
196 
197  // pushReg - Push the specified FP<n> register onto the stack.
198  void pushReg(unsigned Reg) {
199  assert(Reg < NumFPRegs && "Register number out of range!");
200  if (StackTop >= 8)
201  report_fatal_error("Stack overflow!");
202  Stack[StackTop] = Reg;
203  RegMap[Reg] = StackTop++;
204  }
205 
206  bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
207  void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
208  DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
209  if (isAtTop(RegNo)) return;
210 
211  unsigned STReg = getSTReg(RegNo);
212  unsigned RegOnTop = getStackEntry(0);
213 
214  // Swap the slots the regs are in.
215  std::swap(RegMap[RegNo], RegMap[RegOnTop]);
216 
217  // Swap stack slot contents.
218  if (RegMap[RegOnTop] >= StackTop)
219  report_fatal_error("Access past stack top!");
220  std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
221 
222  // Emit an fxch to update the runtime processors version of the state.
223  BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
224  ++NumFXCH;
225  }
226 
227  void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
228  DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
229  unsigned STReg = getSTReg(RegNo);
230  pushReg(AsReg); // New register on top of stack
231 
232  BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
233  }
234 
235  /// popStackAfter - Pop the current value off of the top of the FP stack
236  /// after the specified instruction.
237  void popStackAfter(MachineBasicBlock::iterator &I);
238 
239  /// freeStackSlotAfter - Free the specified register from the register
240  /// stack, so that it is no longer in a register. If the register is
241  /// currently at the top of the stack, we just pop the current instruction,
242  /// otherwise we store the current top-of-stack into the specified slot,
243  /// then pop the top of stack.
244  void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
245 
246  /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
247  /// instruction.
249  freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
250 
251  /// Adjust the live registers to be the set in Mask.
252  void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
253 
254  /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
255  /// st(0), FP reg FixStack[1] is st(1) etc.
256  void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
258 
259  bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
260 
261  void handleCall(MachineBasicBlock::iterator &I);
262  void handleZeroArgFP(MachineBasicBlock::iterator &I);
263  void handleOneArgFP(MachineBasicBlock::iterator &I);
264  void handleOneArgFPRW(MachineBasicBlock::iterator &I);
265  void handleTwoArgFP(MachineBasicBlock::iterator &I);
266  void handleCompareFP(MachineBasicBlock::iterator &I);
267  void handleCondMovFP(MachineBasicBlock::iterator &I);
268  void handleSpecialFP(MachineBasicBlock::iterator &I);
269 
270  // Check if a COPY instruction is using FP registers.
271  static bool isFPCopy(MachineInstr *MI) {
272  unsigned DstReg = MI->getOperand(0).getReg();
273  unsigned SrcReg = MI->getOperand(1).getReg();
274 
275  return X86::RFP80RegClass.contains(DstReg) ||
276  X86::RFP80RegClass.contains(SrcReg);
277  }
278 
279  void setKillFlags(MachineBasicBlock &MBB) const;
280  };
281  char FPS::ID = 0;
282 }
283 
285 
286 /// getFPReg - Return the X86::FPx register number for the specified operand.
287 /// For example, this returns 3 for X86::FP3.
288 static unsigned getFPReg(const MachineOperand &MO) {
289  assert(MO.isReg() && "Expected an FP register!");
290  unsigned Reg = MO.getReg();
291  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
292  return Reg - X86::FP0;
293 }
294 
295 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
296 /// register references into FP stack references.
297 ///
298 bool FPS::runOnMachineFunction(MachineFunction &MF) {
299  // We only need to run this pass if there are any FP registers used in this
300  // function. If it is all integer, there is nothing for us to do!
301  bool FPIsUsed = false;
302 
303  static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
304  for (unsigned i = 0; i <= 6; ++i)
305  if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
306  FPIsUsed = true;
307  break;
308  }
309 
310  // Early exit.
311  if (!FPIsUsed) return false;
312 
313  Bundles = &getAnalysis<EdgeBundles>();
314  TII = MF.getSubtarget().getInstrInfo();
315 
316  // Prepare cross-MBB liveness.
317  bundleCFG(MF);
318 
319  StackTop = 0;
320 
321  // Process the function in depth first order so that we process at least one
322  // of the predecessors for every reachable block in the function.
324  MachineBasicBlock *Entry = MF.begin();
325 
326  bool Changed = false;
327  for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
328  Changed |= processBasicBlock(MF, *BB);
329 
330  // Process any unreachable blocks in arbitrary order now.
331  if (MF.size() != Processed.size())
332  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
333  if (Processed.insert(BB).second)
334  Changed |= processBasicBlock(MF, *BB);
335 
336  LiveBundles.clear();
337 
338  return Changed;
339 }
340 
341 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and
342 /// live-out sets for the FP registers. Consistent means that the set of
343 /// registers live-out from a block is identical to the live-in set of all
344 /// successors. This is not enforced by the normal live-in lists since
345 /// registers may be implicitly defined, or not used by all successors.
346 void FPS::bundleCFG(MachineFunction &MF) {
347  assert(LiveBundles.empty() && "Stale data in LiveBundles");
348  LiveBundles.resize(Bundles->getNumBundles());
349 
350  // Gather the actual live-in masks for all MBBs.
351  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
352  MachineBasicBlock *MBB = I;
353  const unsigned Mask = calcLiveInMask(MBB);
354  if (!Mask)
355  continue;
356  // Update MBB ingoing bundle mask.
357  LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
358  }
359 }
360 
361 /// processBasicBlock - Loop over all of the instructions in the basic block,
362 /// transforming FP instructions into their stack form.
363 ///
364 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
365  bool Changed = false;
366  MBB = &BB;
367 
368  setKillFlags(BB);
369  setupBlockStack();
370 
371  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
372  MachineInstr *MI = I;
373  uint64_t Flags = MI->getDesc().TSFlags;
374 
375  unsigned FPInstClass = Flags & X86II::FPTypeMask;
376  if (MI->isInlineAsm())
377  FPInstClass = X86II::SpecialFP;
378 
379  if (MI->isCopy() && isFPCopy(MI))
380  FPInstClass = X86II::SpecialFP;
381 
382  if (MI->isImplicitDef() &&
383  X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
384  FPInstClass = X86II::SpecialFP;
385 
386  if (MI->isCall())
387  FPInstClass = X86II::SpecialFP;
388 
389  if (FPInstClass == X86II::NotFP)
390  continue; // Efficiently ignore non-fp insts!
391 
392  MachineInstr *PrevMI = nullptr;
393  if (I != BB.begin())
394  PrevMI = std::prev(I);
395 
396  ++NumFP; // Keep track of # of pseudo instrs
397  DEBUG(dbgs() << "\nFPInst:\t" << *MI);
398 
399  // Get dead variables list now because the MI pointer may be deleted as part
400  // of processing!
401  SmallVector<unsigned, 8> DeadRegs;
402  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
403  const MachineOperand &MO = MI->getOperand(i);
404  if (MO.isReg() && MO.isDead())
405  DeadRegs.push_back(MO.getReg());
406  }
407 
408  switch (FPInstClass) {
409  case X86II::ZeroArgFP: handleZeroArgFP(I); break;
410  case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
411  case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
412  case X86II::TwoArgFP: handleTwoArgFP(I); break;
413  case X86II::CompareFP: handleCompareFP(I); break;
414  case X86II::CondMovFP: handleCondMovFP(I); break;
415  case X86II::SpecialFP: handleSpecialFP(I); break;
416  default: llvm_unreachable("Unknown FP Type!");
417  }
418 
419  // Check to see if any of the values defined by this instruction are dead
420  // after definition. If so, pop them.
421  for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
422  unsigned Reg = DeadRegs[i];
423  // Check if Reg is live on the stack. An inline-asm register operand that
424  // is in the clobber list and marked dead might not be live on the stack.
425  if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
426  DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
427  freeStackSlotAfter(I, Reg-X86::FP0);
428  }
429  }
430 
431  // Print out all of the instructions expanded to if -debug
432  DEBUG(
433  MachineBasicBlock::iterator PrevI(PrevMI);
434  if (I == PrevI) {
435  dbgs() << "Just deleted pseudo instruction\n";
436  } else {
438  // Rewind to first instruction newly inserted.
439  while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
440  dbgs() << "Inserted instructions:\n\t";
441  Start->print(dbgs());
442  while (++Start != std::next(I)) {}
443  }
444  dumpStack();
445  );
446  (void)PrevMI;
447 
448  Changed = true;
449  }
450 
451  finishBlockStack();
452 
453  return Changed;
454 }
455 
456 /// setupBlockStack - Use the live bundles to set up our model of the stack
457 /// to match predecessors' live out stack.
458 void FPS::setupBlockStack() {
459  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
460  << " derived from " << MBB->getName() << ".\n");
461  StackTop = 0;
462  // Get the live-in bundle for MBB.
463  const LiveBundle &Bundle =
464  LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
465 
466  if (!Bundle.Mask) {
467  DEBUG(dbgs() << "Block has no FP live-ins.\n");
468  return;
469  }
470 
471  // Depth-first iteration should ensure that we always have an assigned stack.
472  assert(Bundle.isFixed() && "Reached block before any predecessors");
473 
474  // Push the fixed live-in registers.
475  for (unsigned i = Bundle.FixCount; i > 0; --i) {
476  MBB->addLiveIn(X86::ST0+i-1);
477  DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
478  << unsigned(Bundle.FixStack[i-1]) << '\n');
479  pushReg(Bundle.FixStack[i-1]);
480  }
481 
482  // Kill off unwanted live-ins. This can happen with a critical edge.
483  // FIXME: We could keep these live registers around as zombies. They may need
484  // to be revived at the end of a short block. It might save a few instrs.
485  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
486  DEBUG(MBB->dump());
487 }
488 
489 /// finishBlockStack - Revive live-outs that are implicitly defined out of
490 /// MBB. Shuffle live registers to match the expected fixed stack of any
491 /// predecessors, and ensure that all predecessors are expecting the same
492 /// stack.
493 void FPS::finishBlockStack() {
494  // The RET handling below takes care of return blocks for us.
495  if (MBB->succ_empty())
496  return;
497 
498  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
499  << " derived from " << MBB->getName() << ".\n");
500 
501  // Get MBB's live-out bundle.
502  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
503  LiveBundle &Bundle = LiveBundles[BundleIdx];
504 
505  // We may need to kill and define some registers to match successors.
506  // FIXME: This can probably be combined with the shuffle below.
508  adjustLiveRegs(Bundle.Mask, Term);
509 
510  if (!Bundle.Mask) {
511  DEBUG(dbgs() << "No live-outs.\n");
512  return;
513  }
514 
515  // Has the stack order been fixed yet?
516  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
517  if (Bundle.isFixed()) {
518  DEBUG(dbgs() << "Shuffling stack to match.\n");
519  shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
520  } else {
521  // Not fixed yet, we get to choose.
522  DEBUG(dbgs() << "Fixing stack order now.\n");
523  Bundle.FixCount = StackTop;
524  for (unsigned i = 0; i < StackTop; ++i)
525  Bundle.FixStack[i] = getStackEntry(i);
526  }
527 }
528 
529 
530 //===----------------------------------------------------------------------===//
531 // Efficient Lookup Table Support
532 //===----------------------------------------------------------------------===//
533 
534 namespace {
535  struct TableEntry {
536  uint16_t from;
537  uint16_t to;
538  bool operator<(const TableEntry &TE) const { return from < TE.from; }
539  friend bool operator<(const TableEntry &TE, unsigned V) {
540  return TE.from < V;
541  }
542  friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
543  const TableEntry &TE) {
544  return V < TE.from;
545  }
546  };
547 }
548 
549 #ifndef NDEBUG
550 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
551  for (unsigned i = 0; i != NumEntries-1; ++i)
552  if (!(Table[i] < Table[i+1])) return false;
553  return true;
554 }
555 #endif
556 
557 static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
558  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
559  if (I != Table+N && I->from == Opcode)
560  return I->to;
561  return -1;
562 }
563 
564 #ifdef NDEBUG
565 #define ASSERT_SORTED(TABLE)
566 #else
567 #define ASSERT_SORTED(TABLE) \
568  { static bool TABLE##Checked = false; \
569  if (!TABLE##Checked) { \
570  assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
571  "All lookup tables must be sorted for efficient access!"); \
572  TABLE##Checked = true; \
573  } \
574  }
575 #endif
576 
577 //===----------------------------------------------------------------------===//
578 // Register File -> Register Stack Mapping Methods
579 //===----------------------------------------------------------------------===//
580 
581 // OpcodeTable - Sorted map of register instructions to their stack version.
582 // The first element is an register file pseudo instruction, the second is the
583 // concrete X86 instruction which uses the register stack.
584 //
585 static const TableEntry OpcodeTable[] = {
586  { X86::ABS_Fp32 , X86::ABS_F },
587  { X86::ABS_Fp64 , X86::ABS_F },
588  { X86::ABS_Fp80 , X86::ABS_F },
589  { X86::ADD_Fp32m , X86::ADD_F32m },
590  { X86::ADD_Fp64m , X86::ADD_F64m },
591  { X86::ADD_Fp64m32 , X86::ADD_F32m },
592  { X86::ADD_Fp80m32 , X86::ADD_F32m },
593  { X86::ADD_Fp80m64 , X86::ADD_F64m },
594  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
595  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
596  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
597  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
598  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
599  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
600  { X86::CHS_Fp32 , X86::CHS_F },
601  { X86::CHS_Fp64 , X86::CHS_F },
602  { X86::CHS_Fp80 , X86::CHS_F },
603  { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
604  { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
605  { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
606  { X86::CMOVB_Fp32 , X86::CMOVB_F },
607  { X86::CMOVB_Fp64 , X86::CMOVB_F },
608  { X86::CMOVB_Fp80 , X86::CMOVB_F },
609  { X86::CMOVE_Fp32 , X86::CMOVE_F },
610  { X86::CMOVE_Fp64 , X86::CMOVE_F },
611  { X86::CMOVE_Fp80 , X86::CMOVE_F },
612  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
613  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
614  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
615  { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
616  { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
617  { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
618  { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
619  { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
620  { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
621  { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
622  { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
623  { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
624  { X86::CMOVP_Fp32 , X86::CMOVP_F },
625  { X86::CMOVP_Fp64 , X86::CMOVP_F },
626  { X86::CMOVP_Fp80 , X86::CMOVP_F },
627  { X86::COS_Fp32 , X86::COS_F },
628  { X86::COS_Fp64 , X86::COS_F },
629  { X86::COS_Fp80 , X86::COS_F },
630  { X86::DIVR_Fp32m , X86::DIVR_F32m },
631  { X86::DIVR_Fp64m , X86::DIVR_F64m },
632  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
633  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
634  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
635  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
636  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
637  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
638  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
639  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
640  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
641  { X86::DIV_Fp32m , X86::DIV_F32m },
642  { X86::DIV_Fp64m , X86::DIV_F64m },
643  { X86::DIV_Fp64m32 , X86::DIV_F32m },
644  { X86::DIV_Fp80m32 , X86::DIV_F32m },
645  { X86::DIV_Fp80m64 , X86::DIV_F64m },
646  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
647  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
648  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
649  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
650  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
651  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
652  { X86::ILD_Fp16m32 , X86::ILD_F16m },
653  { X86::ILD_Fp16m64 , X86::ILD_F16m },
654  { X86::ILD_Fp16m80 , X86::ILD_F16m },
655  { X86::ILD_Fp32m32 , X86::ILD_F32m },
656  { X86::ILD_Fp32m64 , X86::ILD_F32m },
657  { X86::ILD_Fp32m80 , X86::ILD_F32m },
658  { X86::ILD_Fp64m32 , X86::ILD_F64m },
659  { X86::ILD_Fp64m64 , X86::ILD_F64m },
660  { X86::ILD_Fp64m80 , X86::ILD_F64m },
661  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
662  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
663  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
664  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
665  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
666  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
667  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
668  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
669  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
670  { X86::IST_Fp16m32 , X86::IST_F16m },
671  { X86::IST_Fp16m64 , X86::IST_F16m },
672  { X86::IST_Fp16m80 , X86::IST_F16m },
673  { X86::IST_Fp32m32 , X86::IST_F32m },
674  { X86::IST_Fp32m64 , X86::IST_F32m },
675  { X86::IST_Fp32m80 , X86::IST_F32m },
676  { X86::IST_Fp64m32 , X86::IST_FP64m },
677  { X86::IST_Fp64m64 , X86::IST_FP64m },
678  { X86::IST_Fp64m80 , X86::IST_FP64m },
679  { X86::LD_Fp032 , X86::LD_F0 },
680  { X86::LD_Fp064 , X86::LD_F0 },
681  { X86::LD_Fp080 , X86::LD_F0 },
682  { X86::LD_Fp132 , X86::LD_F1 },
683  { X86::LD_Fp164 , X86::LD_F1 },
684  { X86::LD_Fp180 , X86::LD_F1 },
685  { X86::LD_Fp32m , X86::LD_F32m },
686  { X86::LD_Fp32m64 , X86::LD_F32m },
687  { X86::LD_Fp32m80 , X86::LD_F32m },
688  { X86::LD_Fp64m , X86::LD_F64m },
689  { X86::LD_Fp64m80 , X86::LD_F64m },
690  { X86::LD_Fp80m , X86::LD_F80m },
691  { X86::MUL_Fp32m , X86::MUL_F32m },
692  { X86::MUL_Fp64m , X86::MUL_F64m },
693  { X86::MUL_Fp64m32 , X86::MUL_F32m },
694  { X86::MUL_Fp80m32 , X86::MUL_F32m },
695  { X86::MUL_Fp80m64 , X86::MUL_F64m },
696  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
697  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
698  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
699  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
700  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
701  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
702  { X86::SIN_Fp32 , X86::SIN_F },
703  { X86::SIN_Fp64 , X86::SIN_F },
704  { X86::SIN_Fp80 , X86::SIN_F },
705  { X86::SQRT_Fp32 , X86::SQRT_F },
706  { X86::SQRT_Fp64 , X86::SQRT_F },
707  { X86::SQRT_Fp80 , X86::SQRT_F },
708  { X86::ST_Fp32m , X86::ST_F32m },
709  { X86::ST_Fp64m , X86::ST_F64m },
710  { X86::ST_Fp64m32 , X86::ST_F32m },
711  { X86::ST_Fp80m32 , X86::ST_F32m },
712  { X86::ST_Fp80m64 , X86::ST_F64m },
713  { X86::ST_FpP80m , X86::ST_FP80m },
714  { X86::SUBR_Fp32m , X86::SUBR_F32m },
715  { X86::SUBR_Fp64m , X86::SUBR_F64m },
716  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
717  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
718  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
719  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
720  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
721  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
722  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
723  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
724  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
725  { X86::SUB_Fp32m , X86::SUB_F32m },
726  { X86::SUB_Fp64m , X86::SUB_F64m },
727  { X86::SUB_Fp64m32 , X86::SUB_F32m },
728  { X86::SUB_Fp80m32 , X86::SUB_F32m },
729  { X86::SUB_Fp80m64 , X86::SUB_F64m },
730  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
731  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
732  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
733  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
734  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
735  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
736  { X86::TST_Fp32 , X86::TST_F },
737  { X86::TST_Fp64 , X86::TST_F },
738  { X86::TST_Fp80 , X86::TST_F },
739  { X86::UCOM_FpIr32 , X86::UCOM_FIr },
740  { X86::UCOM_FpIr64 , X86::UCOM_FIr },
741  { X86::UCOM_FpIr80 , X86::UCOM_FIr },
742  { X86::UCOM_Fpr32 , X86::UCOM_Fr },
743  { X86::UCOM_Fpr64 , X86::UCOM_Fr },
744  { X86::UCOM_Fpr80 , X86::UCOM_Fr },
745 };
746 
747 static unsigned getConcreteOpcode(unsigned Opcode) {
749  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
750  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
751  return Opc;
752 }
753 
754 //===----------------------------------------------------------------------===//
755 // Helper Methods
756 //===----------------------------------------------------------------------===//
757 
758 // PopTable - Sorted map of instructions to their popping version. The first
759 // element is an instruction, the second is the version which pops.
760 //
761 static const TableEntry PopTable[] = {
762  { X86::ADD_FrST0 , X86::ADD_FPrST0 },
763 
764  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
765  { X86::DIV_FrST0 , X86::DIV_FPrST0 },
766 
767  { X86::IST_F16m , X86::IST_FP16m },
768  { X86::IST_F32m , X86::IST_FP32m },
769 
770  { X86::MUL_FrST0 , X86::MUL_FPrST0 },
771 
772  { X86::ST_F32m , X86::ST_FP32m },
773  { X86::ST_F64m , X86::ST_FP64m },
774  { X86::ST_Frr , X86::ST_FPrr },
775 
776  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
777  { X86::SUB_FrST0 , X86::SUB_FPrST0 },
778 
779  { X86::UCOM_FIr , X86::UCOM_FIPr },
780 
781  { X86::UCOM_FPr , X86::UCOM_FPPr },
782  { X86::UCOM_Fr , X86::UCOM_FPr },
783 };
784 
785 /// popStackAfter - Pop the current value off of the top of the FP stack after
786 /// the specified instruction. This attempts to be sneaky and combine the pop
787 /// into the instruction itself if possible. The iterator is left pointing to
788 /// the last instruction, be it a new pop instruction inserted, or the old
789 /// instruction if it was modified in place.
790 ///
791 void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
792  MachineInstr* MI = I;
793  DebugLoc dl = MI->getDebugLoc();
795  if (StackTop == 0)
796  report_fatal_error("Cannot pop empty stack!");
797  RegMap[Stack[--StackTop]] = ~0; // Update state
798 
799  // Check to see if there is a popping version of this instruction...
800  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
801  if (Opcode != -1) {
802  I->setDesc(TII->get(Opcode));
803  if (Opcode == X86::UCOM_FPPr)
804  I->RemoveOperand(0);
805  } else { // Insert an explicit pop
806  I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
807  }
808 }
809 
810 /// freeStackSlotAfter - Free the specified register from the register stack, so
811 /// that it is no longer in a register. If the register is currently at the top
812 /// of the stack, we just pop the current instruction, otherwise we store the
813 /// current top-of-stack into the specified slot, then pop the top of stack.
814 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
815  if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
816  popStackAfter(I);
817  return;
818  }
819 
820  // Otherwise, store the top of stack into the dead slot, killing the operand
821  // without having to add in an explicit xchg then pop.
822  //
823  I = freeStackSlotBefore(++I, FPRegNo);
824 }
825 
826 /// freeStackSlotBefore - Free the specified register without trying any
827 /// folding.
829 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
830  unsigned STReg = getSTReg(FPRegNo);
831  unsigned OldSlot = getSlot(FPRegNo);
832  unsigned TopReg = Stack[StackTop-1];
833  Stack[OldSlot] = TopReg;
834  RegMap[TopReg] = OldSlot;
835  RegMap[FPRegNo] = ~0;
836  Stack[--StackTop] = ~0;
837  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
838  .addReg(STReg)
839  .getInstr();
840 }
841 
842 /// adjustLiveRegs - Kill and revive registers such that exactly the FP
843 /// registers with a bit in Mask are live.
844 void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
845  unsigned Defs = Mask;
846  unsigned Kills = 0;
847  for (unsigned i = 0; i < StackTop; ++i) {
848  unsigned RegNo = Stack[i];
849  if (!(Defs & (1 << RegNo)))
850  // This register is live, but we don't want it.
851  Kills |= (1 << RegNo);
852  else
853  // We don't need to imp-def this live register.
854  Defs &= ~(1 << RegNo);
855  }
856  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
857 
858  // Produce implicit-defs for free by using killed registers.
859  while (Kills && Defs) {
860  unsigned KReg = countTrailingZeros(Kills);
861  unsigned DReg = countTrailingZeros(Defs);
862  DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
863  std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
864  std::swap(RegMap[KReg], RegMap[DReg]);
865  Kills &= ~(1 << KReg);
866  Defs &= ~(1 << DReg);
867  }
868 
869  // Kill registers by popping.
870  if (Kills && I != MBB->begin()) {
871  MachineBasicBlock::iterator I2 = std::prev(I);
872  while (StackTop) {
873  unsigned KReg = getStackEntry(0);
874  if (!(Kills & (1 << KReg)))
875  break;
876  DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
877  popStackAfter(I2);
878  Kills &= ~(1 << KReg);
879  }
880  }
881 
882  // Manually kill the rest.
883  while (Kills) {
884  unsigned KReg = countTrailingZeros(Kills);
885  DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
886  freeStackSlotBefore(I, KReg);
887  Kills &= ~(1 << KReg);
888  }
889 
890  // Load zeros for all the imp-defs.
891  while(Defs) {
892  unsigned DReg = countTrailingZeros(Defs);
893  DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
894  BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
895  pushReg(DReg);
896  Defs &= ~(1 << DReg);
897  }
898 
899  // Now we should have the correct registers live.
900  DEBUG(dumpStack());
901  assert(StackTop == countPopulation(Mask) && "Live count mismatch");
902 }
903 
904 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
905 /// FixCount entries into the order given by FixStack.
906 /// FIXME: Is there a better algorithm than insertion sort?
907 void FPS::shuffleStackTop(const unsigned char *FixStack,
908  unsigned FixCount,
910  // Move items into place, starting from the desired stack bottom.
911  while (FixCount--) {
912  // Old register at position FixCount.
913  unsigned OldReg = getStackEntry(FixCount);
914  // Desired register at position FixCount.
915  unsigned Reg = FixStack[FixCount];
916  if (Reg == OldReg)
917  continue;
918  // (Reg st0) (OldReg st0) = (Reg OldReg st0)
919  moveToTop(Reg, I);
920  if (FixCount > 0)
921  moveToTop(OldReg, I);
922  }
923  DEBUG(dumpStack());
924 }
925 
926 
927 //===----------------------------------------------------------------------===//
928 // Instruction transformation implementation
929 //===----------------------------------------------------------------------===//
930 
931 void FPS::handleCall(MachineBasicBlock::iterator &I) {
932  unsigned STReturns = 0;
933 
934  for (const auto &MO : I->operands()) {
935  if (!MO.isReg())
936  continue;
937 
938  unsigned R = MO.getReg() - X86::FP0;
939 
940  if (R < 8) {
941  assert(MO.isDef() && MO.isImplicit());
942  STReturns |= 1 << R;
943  }
944  }
945 
946  unsigned N = countTrailingOnes(STReturns);
947 
948  // FP registers used for function return must be consecutive starting at
949  // FP0.
950  assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
951 
952  for (unsigned I = 0; I < N; ++I)
953  pushReg(N - I - 1);
954 }
955 
956 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
957 ///
958 void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
959  MachineInstr *MI = I;
960  unsigned DestReg = getFPReg(MI->getOperand(0));
961 
962  // Change from the pseudo instruction to the concrete instruction.
963  MI->RemoveOperand(0); // Remove the explicit ST(0) operand
964  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
965 
966  // Result gets pushed on the stack.
967  pushReg(DestReg);
968 }
969 
970 /// handleOneArgFP - fst <mem>, ST(0)
971 ///
972 void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
973  MachineInstr *MI = I;
974  unsigned NumOps = MI->getDesc().getNumOperands();
975  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
976  "Can only handle fst* & ftst instructions!");
977 
978  // Is this the last use of the source register?
979  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
980  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
981 
982  // FISTP64m is strange because there isn't a non-popping versions.
983  // If we have one _and_ we don't want to pop the operand, duplicate the value
984  // on the stack instead of moving it. This ensure that popping the value is
985  // always ok.
986  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
987  //
988  if (!KillsSrc &&
989  (MI->getOpcode() == X86::IST_Fp64m32 ||
990  MI->getOpcode() == X86::ISTT_Fp16m32 ||
991  MI->getOpcode() == X86::ISTT_Fp32m32 ||
992  MI->getOpcode() == X86::ISTT_Fp64m32 ||
993  MI->getOpcode() == X86::IST_Fp64m64 ||
994  MI->getOpcode() == X86::ISTT_Fp16m64 ||
995  MI->getOpcode() == X86::ISTT_Fp32m64 ||
996  MI->getOpcode() == X86::ISTT_Fp64m64 ||
997  MI->getOpcode() == X86::IST_Fp64m80 ||
998  MI->getOpcode() == X86::ISTT_Fp16m80 ||
999  MI->getOpcode() == X86::ISTT_Fp32m80 ||
1000  MI->getOpcode() == X86::ISTT_Fp64m80 ||
1001  MI->getOpcode() == X86::ST_FpP80m)) {
1002  duplicateToTop(Reg, ScratchFPReg, I);
1003  } else {
1004  moveToTop(Reg, I); // Move to the top of the stack...
1005  }
1006 
1007  // Convert from the pseudo instruction to the concrete instruction.
1008  MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
1009  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1010 
1011  if (MI->getOpcode() == X86::IST_FP64m ||
1012  MI->getOpcode() == X86::ISTT_FP16m ||
1013  MI->getOpcode() == X86::ISTT_FP32m ||
1014  MI->getOpcode() == X86::ISTT_FP64m ||
1015  MI->getOpcode() == X86::ST_FP80m) {
1016  if (StackTop == 0)
1017  report_fatal_error("Stack empty??");
1018  --StackTop;
1019  } else if (KillsSrc) { // Last use of operand?
1020  popStackAfter(I);
1021  }
1022 }
1023 
1024 
1025 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
1026 /// replace the value with a newly computed value. These instructions may have
1027 /// non-fp operands after their FP operands.
1028 ///
1029 /// Examples:
1030 /// R1 = fchs R2
1031 /// R1 = fadd R2, [mem]
1032 ///
1033 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1034  MachineInstr *MI = I;
1035 #ifndef NDEBUG
1036  unsigned NumOps = MI->getDesc().getNumOperands();
1037  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1038 #endif
1039 
1040  // Is this the last use of the source register?
1041  unsigned Reg = getFPReg(MI->getOperand(1));
1042  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
1043 
1044  if (KillsSrc) {
1045  // If this is the last use of the source register, just make sure it's on
1046  // the top of the stack.
1047  moveToTop(Reg, I);
1048  if (StackTop == 0)
1049  report_fatal_error("Stack cannot be empty!");
1050  --StackTop;
1051  pushReg(getFPReg(MI->getOperand(0)));
1052  } else {
1053  // If this is not the last use of the source register, _copy_ it to the top
1054  // of the stack.
1055  duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
1056  }
1057 
1058  // Change from the pseudo instruction to the concrete instruction.
1059  MI->RemoveOperand(1); // Drop the source operand.
1060  MI->RemoveOperand(0); // Drop the destination operand.
1061  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1062 }
1063 
1064 
1065 //===----------------------------------------------------------------------===//
1066 // Define tables of various ways to map pseudo instructions
1067 //
1068 
1069 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1070 static const TableEntry ForwardST0Table[] = {
1071  { X86::ADD_Fp32 , X86::ADD_FST0r },
1072  { X86::ADD_Fp64 , X86::ADD_FST0r },
1073  { X86::ADD_Fp80 , X86::ADD_FST0r },
1074  { X86::DIV_Fp32 , X86::DIV_FST0r },
1075  { X86::DIV_Fp64 , X86::DIV_FST0r },
1076  { X86::DIV_Fp80 , X86::DIV_FST0r },
1077  { X86::MUL_Fp32 , X86::MUL_FST0r },
1078  { X86::MUL_Fp64 , X86::MUL_FST0r },
1079  { X86::MUL_Fp80 , X86::MUL_FST0r },
1080  { X86::SUB_Fp32 , X86::SUB_FST0r },
1081  { X86::SUB_Fp64 , X86::SUB_FST0r },
1082  { X86::SUB_Fp80 , X86::SUB_FST0r },
1083 };
1084 
1085 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1086 static const TableEntry ReverseST0Table[] = {
1087  { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1088  { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1089  { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1090  { X86::DIV_Fp32 , X86::DIVR_FST0r },
1091  { X86::DIV_Fp64 , X86::DIVR_FST0r },
1092  { X86::DIV_Fp80 , X86::DIVR_FST0r },
1093  { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1094  { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1095  { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1096  { X86::SUB_Fp32 , X86::SUBR_FST0r },
1097  { X86::SUB_Fp64 , X86::SUBR_FST0r },
1098  { X86::SUB_Fp80 , X86::SUBR_FST0r },
1099 };
1100 
1101 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1102 static const TableEntry ForwardSTiTable[] = {
1103  { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1104  { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1105  { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1106  { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1107  { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1108  { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1109  { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1110  { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1111  { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1112  { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1113  { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1114  { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1115 };
1116 
1117 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1118 static const TableEntry ReverseSTiTable[] = {
1119  { X86::ADD_Fp32 , X86::ADD_FrST0 },
1120  { X86::ADD_Fp64 , X86::ADD_FrST0 },
1121  { X86::ADD_Fp80 , X86::ADD_FrST0 },
1122  { X86::DIV_Fp32 , X86::DIV_FrST0 },
1123  { X86::DIV_Fp64 , X86::DIV_FrST0 },
1124  { X86::DIV_Fp80 , X86::DIV_FrST0 },
1125  { X86::MUL_Fp32 , X86::MUL_FrST0 },
1126  { X86::MUL_Fp64 , X86::MUL_FrST0 },
1127  { X86::MUL_Fp80 , X86::MUL_FrST0 },
1128  { X86::SUB_Fp32 , X86::SUB_FrST0 },
1129  { X86::SUB_Fp64 , X86::SUB_FrST0 },
1130  { X86::SUB_Fp80 , X86::SUB_FrST0 },
1131 };
1132 
1133 
1134 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1135 /// instructions which need to be simplified and possibly transformed.
1136 ///
1137 /// Result: ST(0) = fsub ST(0), ST(i)
1138 /// ST(i) = fsub ST(0), ST(i)
1139 /// ST(0) = fsubr ST(0), ST(i)
1140 /// ST(i) = fsubr ST(0), ST(i)
1141 ///
1142 void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1145  MachineInstr *MI = I;
1146 
1147  unsigned NumOperands = MI->getDesc().getNumOperands();
1148  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1149  unsigned Dest = getFPReg(MI->getOperand(0));
1150  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1151  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1152  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1153  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1154  DebugLoc dl = MI->getDebugLoc();
1155 
1156  unsigned TOS = getStackEntry(0);
1157 
1158  // One of our operands must be on the top of the stack. If neither is yet, we
1159  // need to move one.
1160  if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1161  // We can choose to move either operand to the top of the stack. If one of
1162  // the operands is killed by this instruction, we want that one so that we
1163  // can update right on top of the old version.
1164  if (KillsOp0) {
1165  moveToTop(Op0, I); // Move dead operand to TOS.
1166  TOS = Op0;
1167  } else if (KillsOp1) {
1168  moveToTop(Op1, I);
1169  TOS = Op1;
1170  } else {
1171  // All of the operands are live after this instruction executes, so we
1172  // cannot update on top of any operand. Because of this, we must
1173  // duplicate one of the stack elements to the top. It doesn't matter
1174  // which one we pick.
1175  //
1176  duplicateToTop(Op0, Dest, I);
1177  Op0 = TOS = Dest;
1178  KillsOp0 = true;
1179  }
1180  } else if (!KillsOp0 && !KillsOp1) {
1181  // If we DO have one of our operands at the top of the stack, but we don't
1182  // have a dead operand, we must duplicate one of the operands to a new slot
1183  // on the stack.
1184  duplicateToTop(Op0, Dest, I);
1185  Op0 = TOS = Dest;
1186  KillsOp0 = true;
1187  }
1188 
1189  // Now we know that one of our operands is on the top of the stack, and at
1190  // least one of our operands is killed by this instruction.
1191  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1192  "Stack conditions not set up right!");
1193 
1194  // We decide which form to use based on what is on the top of the stack, and
1195  // which operand is killed by this instruction.
1196  const TableEntry *InstTable;
1197  bool isForward = TOS == Op0;
1198  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1199  if (updateST0) {
1200  if (isForward)
1201  InstTable = ForwardST0Table;
1202  else
1203  InstTable = ReverseST0Table;
1204  } else {
1205  if (isForward)
1206  InstTable = ForwardSTiTable;
1207  else
1208  InstTable = ReverseSTiTable;
1209  }
1210 
1211  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
1212  MI->getOpcode());
1213  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1214 
1215  // NotTOS - The register which is not on the top of stack...
1216  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1217 
1218  // Replace the old instruction with a new instruction
1219  MBB->remove(I++);
1220  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1221 
1222  // If both operands are killed, pop one off of the stack in addition to
1223  // overwriting the other one.
1224  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1225  assert(!updateST0 && "Should have updated other operand!");
1226  popStackAfter(I); // Pop the top of stack
1227  }
1228 
1229  // Update stack information so that we know the destination register is now on
1230  // the stack.
1231  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1232  assert(UpdatedSlot < StackTop && Dest < 7);
1233  Stack[UpdatedSlot] = Dest;
1234  RegMap[Dest] = UpdatedSlot;
1235  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1236 }
1237 
1238 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1239 /// register arguments and no explicit destinations.
1240 ///
1241 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1244  MachineInstr *MI = I;
1245 
1246  unsigned NumOperands = MI->getDesc().getNumOperands();
1247  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1248  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1249  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1250  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1251  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1252 
1253  // Make sure the first operand is on the top of stack, the other one can be
1254  // anywhere.
1255  moveToTop(Op0, I);
1256 
1257  // Change from the pseudo instruction to the concrete instruction.
1258  MI->getOperand(0).setReg(getSTReg(Op1));
1259  MI->RemoveOperand(1);
1260  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1261 
1262  // If any of the operands are killed by this instruction, free them.
1263  if (KillsOp0) freeStackSlotAfter(I, Op0);
1264  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1265 }
1266 
1267 /// handleCondMovFP - Handle two address conditional move instructions. These
1268 /// instructions move a st(i) register to st(0) iff a condition is true. These
1269 /// instructions require that the first operand is at the top of the stack, but
1270 /// otherwise don't modify the stack at all.
1271 void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1272  MachineInstr *MI = I;
1273 
1274  unsigned Op0 = getFPReg(MI->getOperand(0));
1275  unsigned Op1 = getFPReg(MI->getOperand(2));
1276  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1277 
1278  // The first operand *must* be on the top of the stack.
1279  moveToTop(Op0, I);
1280 
1281  // Change the second operand to the stack register that the operand is in.
1282  // Change from the pseudo instruction to the concrete instruction.
1283  MI->RemoveOperand(0);
1284  MI->RemoveOperand(1);
1285  MI->getOperand(0).setReg(getSTReg(Op1));
1286  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1287 
1288  // If we kill the second operand, make sure to pop it from the stack.
1289  if (Op0 != Op1 && KillsOp1) {
1290  // Get this value off of the register stack.
1291  freeStackSlotAfter(I, Op1);
1292  }
1293 }
1294 
1295 
1296 /// handleSpecialFP - Handle special instructions which behave unlike other
1297 /// floating point instructions. This is primarily intended for use by pseudo
1298 /// instructions.
1299 ///
1300 void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1301  MachineInstr *MI = Inst;
1302 
1303  if (MI->isCall()) {
1304  handleCall(Inst);
1305  return;
1306  }
1307 
1308  switch (MI->getOpcode()) {
1309  default: llvm_unreachable("Unknown SpecialFP instruction!");
1310  case TargetOpcode::COPY: {
1311  // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1312  const MachineOperand &MO1 = MI->getOperand(1);
1313  const MachineOperand &MO0 = MI->getOperand(0);
1314  bool KillsSrc = MI->killsRegister(MO1.getReg());
1315 
1316  // FP <- FP copy.
1317  unsigned DstFP = getFPReg(MO0);
1318  unsigned SrcFP = getFPReg(MO1);
1319  assert(isLive(SrcFP) && "Cannot copy dead register");
1320  if (KillsSrc) {
1321  // If the input operand is killed, we can just change the owner of the
1322  // incoming stack slot into the result.
1323  unsigned Slot = getSlot(SrcFP);
1324  Stack[Slot] = DstFP;
1325  RegMap[DstFP] = Slot;
1326  } else {
1327  // For COPY we just duplicate the specified value to a new stack slot.
1328  // This could be made better, but would require substantial changes.
1329  duplicateToTop(SrcFP, DstFP, Inst);
1330  }
1331  break;
1332  }
1333 
1335  // All FP registers must be explicitly defined, so load a 0 instead.
1336  unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
1337  DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1338  BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0));
1339  pushReg(Reg);
1340  break;
1341  }
1342 
1343  case TargetOpcode::INLINEASM: {
1344  // The inline asm MachineInstr currently only *uses* FP registers for the
1345  // 'f' constraint. These should be turned into the current ST(x) register
1346  // in the machine instr.
1347  //
1348  // There are special rules for x87 inline assembly. The compiler must know
1349  // exactly how many registers are popped and pushed implicitly by the asm.
1350  // Otherwise it is not possible to restore the stack state after the inline
1351  // asm.
1352  //
1353  // There are 3 kinds of input operands:
1354  //
1355  // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1356  // popped input operand must be in a fixed stack slot, and it is either
1357  // tied to an output operand, or in the clobber list. The MI has ST use
1358  // and def operands for these inputs.
1359  //
1360  // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1361  // preserved by the inline asm. The fixed stack slots must be STn-STm
1362  // following the popped inputs. A fixed input operand cannot be tied to
1363  // an output or appear in the clobber list. The MI has ST use operands
1364  // and no defs for these inputs.
1365  //
1366  // 3. Preserved inputs. These inputs use the "f" constraint which is
1367  // represented as an FP register. The inline asm won't change these
1368  // stack slots.
1369  //
1370  // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1371  // registers do not count as output operands. The inline asm changes the
1372  // stack as if it popped all the popped inputs and then pushed all the
1373  // output operands.
1374 
1375  // Scan the assembly for ST registers used, defined and clobbered. We can
1376  // only tell clobbers from defs by looking at the asm descriptor.
1377  unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1378  unsigned NumOps = 0;
1379  SmallSet<unsigned, 1> FRegIdx;
1380  unsigned RCID;
1381 
1382  for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
1383  i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
1384  unsigned Flags = MI->getOperand(i).getImm();
1385 
1386  NumOps = InlineAsm::getNumOperandRegisters(Flags);
1387  if (NumOps != 1)
1388  continue;
1389  const MachineOperand &MO = MI->getOperand(i + 1);
1390  if (!MO.isReg())
1391  continue;
1392  unsigned STReg = MO.getReg() - X86::FP0;
1393  if (STReg >= 8)
1394  continue;
1395 
1396  // If the flag has a register class constraint, this must be an operand
1397  // with constraint "f". Record its index and continue.
1398  if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
1399  FRegIdx.insert(i + 1);
1400  continue;
1401  }
1402 
1403  switch (InlineAsm::getKind(Flags)) {
1405  STUses |= (1u << STReg);
1406  break;
1409  STDefs |= (1u << STReg);
1410  if (MO.isDead())
1411  STDeadDefs |= (1u << STReg);
1412  break;
1414  STClobbers |= (1u << STReg);
1415  break;
1416  default:
1417  break;
1418  }
1419  }
1420 
1421  if (STUses && !isMask_32(STUses))
1422  MI->emitError("fixed input regs must be last on the x87 stack");
1423  unsigned NumSTUses = countTrailingOnes(STUses);
1424 
1425  // Defs must be contiguous from the stack top. ST0-STn.
1426  if (STDefs && !isMask_32(STDefs)) {
1427  MI->emitError("output regs must be last on the x87 stack");
1428  STDefs = NextPowerOf2(STDefs) - 1;
1429  }
1430  unsigned NumSTDefs = countTrailingOnes(STDefs);
1431 
1432  // So must the clobbered stack slots. ST0-STm, m >= n.
1433  if (STClobbers && !isMask_32(STDefs | STClobbers))
1434  MI->emitError("clobbers must be last on the x87 stack");
1435 
1436  // Popped inputs are the ones that are also clobbered or defined.
1437  unsigned STPopped = STUses & (STDefs | STClobbers);
1438  if (STPopped && !isMask_32(STPopped))
1439  MI->emitError("implicitly popped regs must be last on the x87 stack");
1440  unsigned NumSTPopped = countTrailingOnes(STPopped);
1441 
1442  DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1443  << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
1444 
1445 #ifndef NDEBUG
1446  // If any input operand uses constraint "f", all output register
1447  // constraints must be early-clobber defs.
1448  for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I)
1449  if (FRegIdx.count(I)) {
1450  assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 &&
1451  "Operands with constraint \"f\" cannot overlap with defs");
1452  }
1453 #endif
1454 
1455  // Collect all FP registers (register operands with constraints "t", "u",
1456  // and "f") to kill afer the instruction.
1457  unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1458  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1459  MachineOperand &Op = MI->getOperand(i);
1460  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1461  continue;
1462  unsigned FPReg = getFPReg(Op);
1463 
1464  // If we kill this operand, make sure to pop it from the stack after the
1465  // asm. We just remember it for now, and pop them all off at the end in
1466  // a batch.
1467  if (Op.isUse() && Op.isKill())
1468  FPKills |= 1U << FPReg;
1469  }
1470 
1471  // Do not include registers that are implicitly popped by defs/clobbers.
1472  FPKills &= ~(STDefs | STClobbers);
1473 
1474  // Now we can rearrange the live registers to match what was requested.
1475  unsigned char STUsesArray[8];
1476 
1477  for (unsigned I = 0; I < NumSTUses; ++I)
1478  STUsesArray[I] = I;
1479 
1480  shuffleStackTop(STUsesArray, NumSTUses, Inst);
1481  DEBUG({dbgs() << "Before asm: "; dumpStack();});
1482 
1483  // With the stack layout fixed, rewrite the FP registers.
1484  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1485  MachineOperand &Op = MI->getOperand(i);
1486  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1487  continue;
1488 
1489  unsigned FPReg = getFPReg(Op);
1490 
1491  if (FRegIdx.count(i))
1492  // Operand with constraint "f".
1493  Op.setReg(getSTReg(FPReg));
1494  else
1495  // Operand with a single register class constraint ("t" or "u").
1496  Op.setReg(X86::ST0 + FPReg);
1497  }
1498 
1499  // Simulate the inline asm popping its inputs and pushing its outputs.
1500  StackTop -= NumSTPopped;
1501 
1502  for (unsigned i = 0; i < NumSTDefs; ++i)
1503  pushReg(NumSTDefs - i - 1);
1504 
1505  // If this asm kills any FP registers (is the last use of them) we must
1506  // explicitly emit pop instructions for them. Do this now after the asm has
1507  // executed so that the ST(x) numbers are not off (which would happen if we
1508  // did this inline with operand rewriting).
1509  //
1510  // Note: this might be a non-optimal pop sequence. We might be able to do
1511  // better by trying to pop in stack order or something.
1512  while (FPKills) {
1513  unsigned FPReg = countTrailingZeros(FPKills);
1514  if (isLive(FPReg))
1515  freeStackSlotAfter(Inst, FPReg);
1516  FPKills &= ~(1U << FPReg);
1517  }
1518 
1519  // Don't delete the inline asm!
1520  return;
1521  }
1522 
1523  case X86::WIN_FTOL_32:
1524  case X86::WIN_FTOL_64: {
1525  // Push the operand into ST0.
1526  MachineOperand &Op = MI->getOperand(0);
1527  assert(Op.isUse() && Op.isReg() &&
1528  Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
1529  unsigned FPReg = getFPReg(Op);
1530  if (Op.isKill())
1531  moveToTop(FPReg, Inst);
1532  else
1533  duplicateToTop(FPReg, ScratchFPReg, Inst);
1534 
1535  // Emit the call. This will pop the operand.
1536  BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
1537  .addExternalSymbol("_ftol2")
1538  .addReg(X86::ST0, RegState::ImplicitKill)
1539  .addReg(X86::ECX, RegState::ImplicitDefine)
1542  .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
1543  --StackTop;
1544 
1545  break;
1546  }
1547 
1548  case X86::RETQ:
1549  case X86::RETL:
1550  case X86::RETIL:
1551  case X86::RETIQ:
1552  // If RET has an FP register use operand, pass the first one in ST(0) and
1553  // the second one in ST(1).
1554 
1555  // Find the register operands.
1556  unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1557  unsigned LiveMask = 0;
1558 
1559  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1560  MachineOperand &Op = MI->getOperand(i);
1561  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1562  continue;
1563  // FP Register uses must be kills unless there are two uses of the same
1564  // register, in which case only one will be a kill.
1565  assert(Op.isUse() &&
1566  (Op.isKill() || // Marked kill.
1567  getFPReg(Op) == FirstFPRegOp || // Second instance.
1568  MI->killsRegister(Op.getReg())) && // Later use is marked kill.
1569  "Ret only defs operands, and values aren't live beyond it");
1570 
1571  if (FirstFPRegOp == ~0U)
1572  FirstFPRegOp = getFPReg(Op);
1573  else {
1574  assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1575  SecondFPRegOp = getFPReg(Op);
1576  }
1577  LiveMask |= (1 << getFPReg(Op));
1578 
1579  // Remove the operand so that later passes don't see it.
1580  MI->RemoveOperand(i);
1581  --i, --e;
1582  }
1583 
1584  // We may have been carrying spurious live-ins, so make sure only the returned
1585  // registers are left live.
1586  adjustLiveRegs(LiveMask, MI);
1587  if (!LiveMask) return; // Quick check to see if any are possible.
1588 
1589  // There are only four possibilities here:
1590  // 1) we are returning a single FP value. In this case, it has to be in
1591  // ST(0) already, so just declare success by removing the value from the
1592  // FP Stack.
1593  if (SecondFPRegOp == ~0U) {
1594  // Assert that the top of stack contains the right FP register.
1595  assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1596  "Top of stack not the right register for RET!");
1597 
1598  // Ok, everything is good, mark the value as not being on the stack
1599  // anymore so that our assertion about the stack being empty at end of
1600  // block doesn't fire.
1601  StackTop = 0;
1602  return;
1603  }
1604 
1605  // Otherwise, we are returning two values:
1606  // 2) If returning the same value for both, we only have one thing in the FP
1607  // stack. Consider: RET FP1, FP1
1608  if (StackTop == 1) {
1609  assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1610  "Stack misconfiguration for RET!");
1611 
1612  // Duplicate the TOS so that we return it twice. Just pick some other FPx
1613  // register to hold it.
1614  unsigned NewReg = ScratchFPReg;
1615  duplicateToTop(FirstFPRegOp, NewReg, MI);
1616  FirstFPRegOp = NewReg;
1617  }
1618 
1619  /// Okay we know we have two different FPx operands now:
1620  assert(StackTop == 2 && "Must have two values live!");
1621 
1622  /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1623  /// in ST(1). In this case, emit an fxch.
1624  if (getStackEntry(0) == SecondFPRegOp) {
1625  assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1626  moveToTop(FirstFPRegOp, MI);
1627  }
1628 
1629  /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1630  /// ST(1). Just remove both from our understanding of the stack and return.
1631  assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1632  assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1633  StackTop = 0;
1634  return;
1635  }
1636 
1637  Inst = MBB->erase(Inst); // Remove the pseudo instruction
1638 
1639  // We want to leave I pointing to the previous instruction, but what if we
1640  // just erased the first instruction?
1641  if (Inst == MBB->begin()) {
1642  DEBUG(dbgs() << "Inserting dummy KILL\n");
1643  Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1644  } else
1645  --Inst;
1646 }
1647 
1648 void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1649  const TargetRegisterInfo *TRI =
1651  LivePhysRegs LPR(TRI);
1652 
1653  LPR.addLiveOuts(&MBB);
1654 
1655  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
1656  I != E; ++I) {
1657  if (I->isDebugValue())
1658  continue;
1659 
1660  std::bitset<8> Defs;
1662  MachineInstr &MI = *I;
1663 
1664  for (auto &MO : I->operands()) {
1665  if (!MO.isReg())
1666  continue;
1667 
1668  unsigned Reg = MO.getReg() - X86::FP0;
1669 
1670  if (Reg >= 8)
1671  continue;
1672 
1673  if (MO.isDef()) {
1674  Defs.set(Reg);
1675  if (!LPR.contains(MO.getReg()))
1676  MO.setIsDead();
1677  } else
1678  Uses.push_back(&MO);
1679  }
1680 
1681  for (auto *MO : Uses)
1682  if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1683  MO->setIsKill();
1684 
1685  LPR.stepBackward(MI);
1686  }
1687 }
bool isImplicit() const
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries)
static const TableEntry OpcodeTable[]
void push_back(const T &Elt)
Definition: SmallVector.h:222
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
static unsigned getConcreteOpcode(unsigned Opcode)
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
std::vector< unsigned >::const_iterator livein_iterator
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
bool isDead() const
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
static const TableEntry ReverseSTiTable[]
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
livein_iterator livein_begin() const
AnalysisUsage & addRequired()
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Reg
All possible values of the reg field in the ModR/M byte.
static const TableEntry ForwardST0Table[]
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool isKill() const
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
AnalysisUsage & addPreservedID(const void *ID)
int64_t getImm() const
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
static const TableEntry PopTable[]
TargetInstrInfo - Interface to description of machine instruction set.
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
bool isImplicitDef() const
Definition: MachineInstr.h:759
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
size_type size() const
Definition: SmallPtrSet.h:79
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static const TableEntry ForwardSTiTable[]
livein_iterator livein_end() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode)
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:332
bool isCopy() const
Definition: MachineInstr.h:778
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
Represent the analysis usage information of a pass.
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:142
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:311
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
unsigned size() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:582
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void emitError(StringRef Msg) const
Emit an error referring to the source location of this instruction.
void setIsKill(bool Val=true)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:53
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:449
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool isInlineAsm() const
Definition: MachineInstr.h:760
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
FunctionPass * createX86FloatingPointStackifierPass()
createX86FloatingPointStackifierPass - This function returns a pass which converts floating point reg...
StringRef getName() const
getName - Return the name of the corresponding LLVM basic block, or "(null)".
bool isMask_32(uint32_t Value)
isMask_32 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:328
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
KILL - This instruction is a noop that is used only to adjust the liveness of registers.
Definition: TargetOpcodes.h:35
static bool hasRegClassConstraint(unsigned Flag, unsigned &RC)
hasRegClassConstraint - Returns true if the flag contains a register class constraint.
Definition: InlineAsm.h:347
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1030
Representation of each machine instruction.
Definition: MachineInstr.h:51
static const TableEntry ReverseST0Table[]
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of live physical registers with functions to track liveness when walking backward/forward throu...
Definition: LivePhysRegs.h:43
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:403
unsigned getReg() const
getReg - Returns the register number.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
Definition: MachineInstr.h:857
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
virtual const TargetInstrInfo * getInstrInfo() const
std::reverse_iterator< iterator > reverse_iterator
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
#define ASSERT_SORTED(TABLE)
bool isPhysRegUsed(unsigned Reg) const
isPhysRegUsed - Return true if the specified register is used in this function.
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
void initializeEdgeBundlesPass(PassRegistry &)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...