LLVM 22.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
70 AU.addRequired<EdgeBundlesWrapperLegacy>();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
78 MachineFunctionProperties getRequiredProperties() const override {
79 return MachineFunctionProperties().setNoVRegs();
80 }
81
82 StringRef getPassName() const override { return "X86 FP Stackifier"; }
83
84 private:
85 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
86
87 // Two CFG edges are related if they leave the same block, or enter the same
88 // block. The transitive closure of an edge under this relation is a
89 // LiveBundle. It represents a set of CFG edges where the live FP stack
90 // registers must be allocated identically in the x87 stack.
91 //
92 // A LiveBundle is usually all the edges leaving a block, or all the edges
93 // entering a block, but it can contain more edges if critical edges are
94 // present.
95 //
96 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
97 // but the exact mapping of FP registers to stack slots is fixed later.
98 struct LiveBundle {
99 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
100 unsigned Mask = 0;
101
102 // Number of pre-assigned live registers in FixStack. This is 0 when the
103 // stack order has not yet been fixed.
104 unsigned FixCount = 0;
105
106 // Assigned stack order for live-in registers.
107 // FixStack[i] == getStackEntry(i) for all i < FixCount.
108 unsigned char FixStack[8];
109
110 LiveBundle() = default;
111
112 // Have the live registers been assigned a stack order yet?
113 bool isFixed() const { return !Mask || FixCount; }
114 };
115
116 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
117 // with no live FP registers.
118 SmallVector<LiveBundle, 8> LiveBundles;
119
120 // The edge bundle analysis provides indices into the LiveBundles vector.
121 EdgeBundles *Bundles = nullptr;
122
123 // Return a bitmask of FP registers in block's live-in list.
124 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
125 unsigned Mask = 0;
126 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin();
127 I != MBB->livein_end(); ) {
128 MCPhysReg Reg = I->PhysReg;
129 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
130 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
131 Mask |= 1 << (Reg - X86::FP0);
132 if (RemoveFPs) {
133 I = MBB->removeLiveIn(I);
134 continue;
135 }
136 }
137 ++I;
138 }
139 return Mask;
140 }
141
142 // Partition all the CFG edges into LiveBundles.
143 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
144
145 MachineBasicBlock *MBB = nullptr; // Current basic block
146
147 // The hardware keeps track of how many FP registers are live, so we have
148 // to model that exactly. Usually, each live register corresponds to an
149 // FP<n> register, but when dealing with calls, returns, and inline
150 // assembly, it is sometimes necessary to have live scratch registers.
151 unsigned Stack[8]; // FP<n> Registers in each stack slot...
152 unsigned StackTop = 0; // The current top of the FP stack.
153
154 enum {
155 NumFPRegs = 8 // Including scratch pseudo-registers.
156 };
157
158 // For each live FP<n> register, point to its Stack[] entry.
159 // The first entries correspond to FP0-FP6, the rest are scratch registers
160 // used when we need slightly different live registers than what the
161 // register allocator thinks.
162 unsigned RegMap[NumFPRegs];
163
164 // Set up our stack model to match the incoming registers to MBB.
165 void setupBlockStack();
166
167 // Shuffle live registers to match the expectations of successor blocks.
168 void finishBlockStack();
169
170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
171 void dumpStack() const {
172 dbgs() << "Stack contents:";
173 for (unsigned i = 0; i != StackTop; ++i) {
174 dbgs() << " FP" << Stack[i];
175 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
176 }
177 }
178#endif
179
180 /// getSlot - Return the stack slot number a particular register number is
181 /// in.
182 unsigned getSlot(unsigned RegNo) const {
183 assert(RegNo < NumFPRegs && "Regno out of range!");
184 return RegMap[RegNo];
185 }
186
187 /// isLive - Is RegNo currently live in the stack?
188 bool isLive(unsigned RegNo) const {
189 unsigned Slot = getSlot(RegNo);
190 return Slot < StackTop && Stack[Slot] == RegNo;
191 }
192
193 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
194 unsigned getStackEntry(unsigned STi) const {
195 if (STi >= StackTop)
196 report_fatal_error("Access past stack top!");
197 return Stack[StackTop-1-STi];
198 }
199
200 /// getSTReg - Return the X86::ST(i) register which contains the specified
201 /// FP<RegNo> register.
202 unsigned getSTReg(unsigned RegNo) const {
203 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
204 }
205
206 // pushReg - Push the specified FP<n> register onto the stack.
207 void pushReg(unsigned Reg) {
208 assert(Reg < NumFPRegs && "Register number out of range!");
209 if (StackTop >= 8)
210 report_fatal_error("Stack overflow!");
211 Stack[StackTop] = Reg;
212 RegMap[Reg] = StackTop++;
213 }
214
215 // popReg - Pop a register from the stack.
216 void popReg() {
217 if (StackTop == 0)
218 report_fatal_error("Cannot pop empty stack!");
219 RegMap[Stack[--StackTop]] = ~0; // Update state
220 }
221
222 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
223 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
224 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
225 if (isAtTop(RegNo)) return;
226
227 unsigned STReg = getSTReg(RegNo);
228 unsigned RegOnTop = getStackEntry(0);
229
230 // Swap the slots the regs are in.
231 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
232
233 // Swap stack slot contents.
234 if (RegMap[RegOnTop] >= StackTop)
235 report_fatal_error("Access past stack top!");
236 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
237
238 // Emit an fxch to update the runtime processors version of the state.
239 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
240 ++NumFXCH;
241 }
242
243 void duplicateToTop(unsigned RegNo, unsigned AsReg,
245 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
246 unsigned STReg = getSTReg(RegNo);
247 pushReg(AsReg); // New register on top of stack
248
249 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
250 }
251
252 /// popStackAfter - Pop the current value off of the top of the FP stack
253 /// after the specified instruction.
254 void popStackAfter(MachineBasicBlock::iterator &I);
255
256 /// freeStackSlotAfter - Free the specified register from the register
257 /// stack, so that it is no longer in a register. If the register is
258 /// currently at the top of the stack, we just pop the current instruction,
259 /// otherwise we store the current top-of-stack into the specified slot,
260 /// then pop the top of stack.
261 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
262
263 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
264 /// instruction.
266 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
267
268 /// Adjust the live registers to be the set in Mask.
269 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
270
271 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
272 /// st(0), FP reg FixStack[1] is st(1) etc.
273 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
275
276 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
277
278 void handleCall(MachineBasicBlock::iterator &I);
279 void handleReturn(MachineBasicBlock::iterator &I);
280 void handleZeroArgFP(MachineBasicBlock::iterator &I);
281 void handleOneArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
283 void handleTwoArgFP(MachineBasicBlock::iterator &I);
284 void handleCompareFP(MachineBasicBlock::iterator &I);
285 void handleCondMovFP(MachineBasicBlock::iterator &I);
286 void handleSpecialFP(MachineBasicBlock::iterator &I);
287
288 // Check if a COPY instruction is using FP registers.
289 static bool isFPCopy(MachineInstr &MI) {
290 Register DstReg = MI.getOperand(0).getReg();
291 Register SrcReg = MI.getOperand(1).getReg();
292
293 return X86::RFP80RegClass.contains(DstReg) ||
294 X86::RFP80RegClass.contains(SrcReg);
295 }
296
297 void setKillFlags(MachineBasicBlock &MBB) const;
298 };
299}
300
301char FPS::ID = 0;
302
303INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
304 false, false)
306INITIALIZE_PASS_END(FPS, DEBUG_TYPE, "X86 FP Stackifier",
308
310
311/// getFPReg - Return the X86::FPx register number for the specified operand.
312/// For example, this returns 3 for X86::FP3.
313static unsigned getFPReg(const MachineOperand &MO) {
314 assert(MO.isReg() && "Expected an FP register!");
315 Register Reg = MO.getReg();
316 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
317 return Reg - X86::FP0;
318}
319
320/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
321/// register references into FP stack references.
322///
323bool FPS::runOnMachineFunction(MachineFunction &MF) {
324 // We only need to run this pass if there are any FP registers used in this
325 // function. If it is all integer, there is nothing for us to do!
326 bool FPIsUsed = false;
327
328 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
329 const MachineRegisterInfo &MRI = MF.getRegInfo();
330 for (unsigned i = 0; i <= 6; ++i)
331 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
332 FPIsUsed = true;
333 break;
334 }
335
336 // Early exit.
337 if (!FPIsUsed) return false;
338
339 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
341
342 // Prepare cross-MBB liveness.
343 bundleCFGRecomputeKillFlags(MF);
344
345 StackTop = 0;
346
347 // Process the function in depth first order so that we process at least one
348 // of the predecessors for every reachable block in the function.
349 df_iterator_default_set<MachineBasicBlock*> Processed;
350 MachineBasicBlock *Entry = &MF.front();
351
352 LiveBundle &Bundle =
353 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
354
355 // In regcall convention, some FP registers may not be passed through
356 // the stack, so they will need to be assigned to the stack first
357 if ((Entry->getParent()->getFunction().getCallingConv() ==
358 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
359 // In the register calling convention, up to one FP argument could be
360 // saved in the first FP register.
361 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
362 // that the FP registers contain arguments.
363 // The actual value is passed in FP0.
364 // Here we fix the stack and mark FP0 as pre-assigned register.
365 assert((Bundle.Mask & 0xFE) == 0 &&
366 "Only FP0 could be passed as an argument");
367 Bundle.FixCount = 1;
368 Bundle.FixStack[0] = 0;
369 }
370
371 bool Changed = false;
372 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
373 Changed |= processBasicBlock(MF, *BB);
374
375 // Process any unreachable blocks in arbitrary order now.
376 if (MF.size() != Processed.size())
377 for (MachineBasicBlock &BB : MF)
378 if (Processed.insert(&BB).second)
379 Changed |= processBasicBlock(MF, BB);
380
381 LiveBundles.clear();
382
383 return Changed;
384}
385
386/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
387/// live-out sets for the FP registers. Consistent means that the set of
388/// registers live-out from a block is identical to the live-in set of all
389/// successors. This is not enforced by the normal live-in lists since
390/// registers may be implicitly defined, or not used by all successors.
391void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
392 assert(LiveBundles.empty() && "Stale data in LiveBundles");
393 LiveBundles.resize(Bundles->getNumBundles());
394
395 // Gather the actual live-in masks for all MBBs.
396 for (MachineBasicBlock &MBB : MF) {
397 setKillFlags(MBB);
398
399 const unsigned Mask = calcLiveInMask(&MBB, false);
400 if (!Mask)
401 continue;
402 // Update MBB ingoing bundle mask.
403 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
404 }
405}
406
407/// processBasicBlock - Loop over all of the instructions in the basic block,
408/// transforming FP instructions into their stack form.
409///
410bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
411 bool Changed = false;
412 MBB = &BB;
413
414 setupBlockStack();
415
416 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
417 MachineInstr &MI = *I;
418 uint64_t Flags = MI.getDesc().TSFlags;
419
420 unsigned FPInstClass = Flags & X86II::FPTypeMask;
421 if (MI.isInlineAsm())
422 FPInstClass = X86II::SpecialFP;
423
424 if (MI.isCopy() && isFPCopy(MI))
425 FPInstClass = X86II::SpecialFP;
426
427 if (MI.isImplicitDef() &&
428 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
429 FPInstClass = X86II::SpecialFP;
430
431 if (MI.isCall())
432 FPInstClass = X86II::SpecialFP;
433
434 // A fake_use with a floating point pseudo register argument that is
435 // killed must behave like any other floating point operation and pop
436 // the floating point stack (this is done in handleSpecialFP()).
437 // Fake_use is, however, unusual, in that sometimes its operand is not
438 // killed because a later instruction (probably a return) will use it.
439 // It is this instruction that will pop the stack.
440 // In this scenario we can safely remove the fake_use's operand
441 // (it is live anyway).
442 if (MI.isFakeUse()) {
443 const MachineOperand &MO = MI.getOperand(0);
444 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
445 if (MO.isKill())
446 FPInstClass = X86II::SpecialFP;
447 else
448 MI.removeOperand(0);
449 }
450 }
451
452 if (FPInstClass == X86II::NotFP)
453 continue; // Efficiently ignore non-fp insts!
454
455 MachineInstr *PrevMI = nullptr;
456 if (I != BB.begin())
457 PrevMI = &*std::prev(I);
458
459 ++NumFP; // Keep track of # of pseudo instrs
460 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
461
462 // Get dead variables list now because the MI pointer may be deleted as part
463 // of processing!
465 for (const MachineOperand &MO : MI.operands())
466 if (MO.isReg() && MO.isDead())
467 DeadRegs.push_back(MO.getReg());
468
469 switch (FPInstClass) {
470 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
471 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
472 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
473 case X86II::TwoArgFP: handleTwoArgFP(I); break;
474 case X86II::CompareFP: handleCompareFP(I); break;
475 case X86II::CondMovFP: handleCondMovFP(I); break;
476 case X86II::SpecialFP: handleSpecialFP(I); break;
477 default: llvm_unreachable("Unknown FP Type!");
478 }
479
480 // Check to see if any of the values defined by this instruction are dead
481 // after definition. If so, pop them.
482 for (Register Reg : DeadRegs) {
483 // Check if Reg is live on the stack. An inline-asm register operand that
484 // is in the clobber list and marked dead might not be live on the stack.
485 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
486 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
487 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
488 freeStackSlotAfter(I, Reg-X86::FP0);
489 }
490 }
491
492 // Print out all of the instructions expanded to if -debug
493 LLVM_DEBUG({
494 MachineBasicBlock::iterator PrevI = PrevMI;
495 if (I == PrevI) {
496 dbgs() << "Just deleted pseudo instruction\n";
497 } else {
499 // Rewind to first instruction newly inserted.
500 while (Start != BB.begin() && std::prev(Start) != PrevI)
501 --Start;
502 dbgs() << "Inserted instructions:\n\t";
503 Start->print(dbgs());
504 while (++Start != std::next(I)) {
505 }
506 }
507 dumpStack();
508 });
509 (void)PrevMI;
510
511 Changed = true;
512 }
513
514 finishBlockStack();
515
516 return Changed;
517}
518
519/// setupBlockStack - Use the live bundles to set up our model of the stack
520/// to match predecessors' live out stack.
521void FPS::setupBlockStack() {
522 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
523 << " derived from " << MBB->getName() << ".\n");
524 StackTop = 0;
525 // Get the live-in bundle for MBB.
526 const LiveBundle &Bundle =
527 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
528
529 if (!Bundle.Mask) {
530 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
531 return;
532 }
533
534 // Depth-first iteration should ensure that we always have an assigned stack.
535 assert(Bundle.isFixed() && "Reached block before any predecessors");
536
537 // Push the fixed live-in registers.
538 for (unsigned i = Bundle.FixCount; i > 0; --i) {
539 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
540 << unsigned(Bundle.FixStack[i - 1]) << '\n');
541 pushReg(Bundle.FixStack[i-1]);
542 }
543
544 // Kill off unwanted live-ins. This can happen with a critical edge.
545 // FIXME: We could keep these live registers around as zombies. They may need
546 // to be revived at the end of a short block. It might save a few instrs.
547 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
548 adjustLiveRegs(Mask, MBB->begin());
549 LLVM_DEBUG(MBB->dump());
550}
551
552/// finishBlockStack - Revive live-outs that are implicitly defined out of
553/// MBB. Shuffle live registers to match the expected fixed stack of any
554/// predecessors, and ensure that all predecessors are expecting the same
555/// stack.
556void FPS::finishBlockStack() {
557 // The RET handling below takes care of return blocks for us.
558 if (MBB->succ_empty())
559 return;
560
561 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
562 << " derived from " << MBB->getName() << ".\n");
563
564 // Get MBB's live-out bundle.
565 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
566 LiveBundle &Bundle = LiveBundles[BundleIdx];
567
568 // We may need to kill and define some registers to match successors.
569 // FIXME: This can probably be combined with the shuffle below.
571 adjustLiveRegs(Bundle.Mask, Term);
572
573 if (!Bundle.Mask) {
574 LLVM_DEBUG(dbgs() << "No live-outs.\n");
575 return;
576 }
577
578 // Has the stack order been fixed yet?
579 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
580 if (Bundle.isFixed()) {
581 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
582 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
583 } else {
584 // Not fixed yet, we get to choose.
585 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
586 Bundle.FixCount = StackTop;
587 for (unsigned i = 0; i < StackTop; ++i)
588 Bundle.FixStack[i] = getStackEntry(i);
589 }
590}
591
592
593//===----------------------------------------------------------------------===//
594// Efficient Lookup Table Support
595//===----------------------------------------------------------------------===//
596
597namespace {
598 struct TableEntry {
599 uint16_t from;
600 uint16_t to;
601 bool operator<(const TableEntry &TE) const { return from < TE.from; }
602 friend bool operator<(const TableEntry &TE, unsigned V) {
603 return TE.from < V;
604 }
605 [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
606 return V < TE.from;
607 }
608 };
609}
610
611static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
612 const TableEntry *I = llvm::lower_bound(Table, Opcode);
613 if (I != Table.end() && I->from == Opcode)
614 return I->to;
615 return -1;
616}
617
618#ifdef NDEBUG
619#define ASSERT_SORTED(TABLE)
620#else
621#define ASSERT_SORTED(TABLE) \
622 { \
623 static std::atomic<bool> TABLE##Checked(false); \
624 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
625 assert(is_sorted(TABLE) && \
626 "All lookup tables must be sorted for efficient access!"); \
627 TABLE##Checked.store(true, std::memory_order_relaxed); \
628 } \
629 }
630#endif
631
632//===----------------------------------------------------------------------===//
633// Register File -> Register Stack Mapping Methods
634//===----------------------------------------------------------------------===//
635
636// OpcodeTable - Sorted map of register instructions to their stack version.
637// The first element is an register file pseudo instruction, the second is the
638// concrete X86 instruction which uses the register stack.
639//
640static const TableEntry OpcodeTable[] = {
641 { X86::ABS_Fp32 , X86::ABS_F },
642 { X86::ABS_Fp64 , X86::ABS_F },
643 { X86::ABS_Fp80 , X86::ABS_F },
644 { X86::ADD_Fp32m , X86::ADD_F32m },
645 { X86::ADD_Fp64m , X86::ADD_F64m },
646 { X86::ADD_Fp64m32 , X86::ADD_F32m },
647 { X86::ADD_Fp80m32 , X86::ADD_F32m },
648 { X86::ADD_Fp80m64 , X86::ADD_F64m },
649 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
650 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
651 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
652 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
653 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
654 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
655 { X86::CHS_Fp32 , X86::CHS_F },
656 { X86::CHS_Fp64 , X86::CHS_F },
657 { X86::CHS_Fp80 , X86::CHS_F },
658 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
659 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
660 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
661 { X86::CMOVB_Fp32 , X86::CMOVB_F },
662 { X86::CMOVB_Fp64 , X86::CMOVB_F },
663 { X86::CMOVB_Fp80 , X86::CMOVB_F },
664 { X86::CMOVE_Fp32 , X86::CMOVE_F },
665 { X86::CMOVE_Fp64 , X86::CMOVE_F },
666 { X86::CMOVE_Fp80 , X86::CMOVE_F },
667 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
668 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
669 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
670 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
671 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
672 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
673 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
674 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
675 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
676 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
677 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
678 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
679 { X86::CMOVP_Fp32 , X86::CMOVP_F },
680 { X86::CMOVP_Fp64 , X86::CMOVP_F },
681 { X86::CMOVP_Fp80 , X86::CMOVP_F },
682 { X86::COM_FpIr32 , X86::COM_FIr },
683 { X86::COM_FpIr64 , X86::COM_FIr },
684 { X86::COM_FpIr80 , X86::COM_FIr },
685 { X86::COM_Fpr32 , X86::COM_FST0r },
686 { X86::COM_Fpr64 , X86::COM_FST0r },
687 { X86::COM_Fpr80 , X86::COM_FST0r },
688 { X86::DIVR_Fp32m , X86::DIVR_F32m },
689 { X86::DIVR_Fp64m , X86::DIVR_F64m },
690 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
691 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
692 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
693 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
694 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
695 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
696 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
697 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
698 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
699 { X86::DIV_Fp32m , X86::DIV_F32m },
700 { X86::DIV_Fp64m , X86::DIV_F64m },
701 { X86::DIV_Fp64m32 , X86::DIV_F32m },
702 { X86::DIV_Fp80m32 , X86::DIV_F32m },
703 { X86::DIV_Fp80m64 , X86::DIV_F64m },
704 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
705 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
706 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
707 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
708 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
709 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
710 { X86::ILD_Fp16m32 , X86::ILD_F16m },
711 { X86::ILD_Fp16m64 , X86::ILD_F16m },
712 { X86::ILD_Fp16m80 , X86::ILD_F16m },
713 { X86::ILD_Fp32m32 , X86::ILD_F32m },
714 { X86::ILD_Fp32m64 , X86::ILD_F32m },
715 { X86::ILD_Fp32m80 , X86::ILD_F32m },
716 { X86::ILD_Fp64m32 , X86::ILD_F64m },
717 { X86::ILD_Fp64m64 , X86::ILD_F64m },
718 { X86::ILD_Fp64m80 , X86::ILD_F64m },
719 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
720 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
721 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
722 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
723 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
724 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
725 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
726 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
727 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
728 { X86::IST_Fp16m32 , X86::IST_F16m },
729 { X86::IST_Fp16m64 , X86::IST_F16m },
730 { X86::IST_Fp16m80 , X86::IST_F16m },
731 { X86::IST_Fp32m32 , X86::IST_F32m },
732 { X86::IST_Fp32m64 , X86::IST_F32m },
733 { X86::IST_Fp32m80 , X86::IST_F32m },
734 { X86::IST_Fp64m32 , X86::IST_FP64m },
735 { X86::IST_Fp64m64 , X86::IST_FP64m },
736 { X86::IST_Fp64m80 , X86::IST_FP64m },
737 { X86::LD_Fp032 , X86::LD_F0 },
738 { X86::LD_Fp064 , X86::LD_F0 },
739 { X86::LD_Fp080 , X86::LD_F0 },
740 { X86::LD_Fp132 , X86::LD_F1 },
741 { X86::LD_Fp164 , X86::LD_F1 },
742 { X86::LD_Fp180 , X86::LD_F1 },
743 { X86::LD_Fp32m , X86::LD_F32m },
744 { X86::LD_Fp32m64 , X86::LD_F32m },
745 { X86::LD_Fp32m80 , X86::LD_F32m },
746 { X86::LD_Fp64m , X86::LD_F64m },
747 { X86::LD_Fp64m80 , X86::LD_F64m },
748 { X86::LD_Fp80m , X86::LD_F80m },
749 { X86::MUL_Fp32m , X86::MUL_F32m },
750 { X86::MUL_Fp64m , X86::MUL_F64m },
751 { X86::MUL_Fp64m32 , X86::MUL_F32m },
752 { X86::MUL_Fp80m32 , X86::MUL_F32m },
753 { X86::MUL_Fp80m64 , X86::MUL_F64m },
754 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
755 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
756 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
757 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
758 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
759 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
760 { X86::SQRT_Fp32 , X86::SQRT_F },
761 { X86::SQRT_Fp64 , X86::SQRT_F },
762 { X86::SQRT_Fp80 , X86::SQRT_F },
763 { X86::ST_Fp32m , X86::ST_F32m },
764 { X86::ST_Fp64m , X86::ST_F64m },
765 { X86::ST_Fp64m32 , X86::ST_F32m },
766 { X86::ST_Fp80m32 , X86::ST_F32m },
767 { X86::ST_Fp80m64 , X86::ST_F64m },
768 { X86::ST_FpP80m , X86::ST_FP80m },
769 { X86::SUBR_Fp32m , X86::SUBR_F32m },
770 { X86::SUBR_Fp64m , X86::SUBR_F64m },
771 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
772 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
773 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
774 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
775 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
776 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
777 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
778 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
779 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
780 { X86::SUB_Fp32m , X86::SUB_F32m },
781 { X86::SUB_Fp64m , X86::SUB_F64m },
782 { X86::SUB_Fp64m32 , X86::SUB_F32m },
783 { X86::SUB_Fp80m32 , X86::SUB_F32m },
784 { X86::SUB_Fp80m64 , X86::SUB_F64m },
785 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
786 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
787 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
788 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
789 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
790 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
791 { X86::TST_Fp32 , X86::TST_F },
792 { X86::TST_Fp64 , X86::TST_F },
793 { X86::TST_Fp80 , X86::TST_F },
794 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
795 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
796 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
797 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
798 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
799 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
800 { X86::XAM_Fp32 , X86::XAM_F },
801 { X86::XAM_Fp64 , X86::XAM_F },
802 { X86::XAM_Fp80 , X86::XAM_F },
803};
804
805static unsigned getConcreteOpcode(unsigned Opcode) {
807 int Opc = Lookup(OpcodeTable, Opcode);
808 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
809 return Opc;
810}
811
812//===----------------------------------------------------------------------===//
813// Helper Methods
814//===----------------------------------------------------------------------===//
815
816// PopTable - Sorted map of instructions to their popping version. The first
817// element is an instruction, the second is the version which pops.
818//
819static const TableEntry PopTable[] = {
820 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
821
822 { X86::COMP_FST0r, X86::FCOMPP },
823 { X86::COM_FIr , X86::COM_FIPr },
824 { X86::COM_FST0r , X86::COMP_FST0r },
825
826 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
827 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
828
829 { X86::IST_F16m , X86::IST_FP16m },
830 { X86::IST_F32m , X86::IST_FP32m },
831
832 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
833
834 { X86::ST_F32m , X86::ST_FP32m },
835 { X86::ST_F64m , X86::ST_FP64m },
836 { X86::ST_Frr , X86::ST_FPrr },
837
838 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
839 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
840
841 { X86::UCOM_FIr , X86::UCOM_FIPr },
842
843 { X86::UCOM_FPr , X86::UCOM_FPPr },
844 { X86::UCOM_Fr , X86::UCOM_FPr },
845};
846
848 if (const MachineOperand *MO =
849 MI.findRegisterDefOperand(X86::FPSW, /*TRI=*/nullptr))
850 if (!MO->isDead())
851 return true;
852 return false;
853}
854
857 MachineBasicBlock &MBB = *I->getParent();
858 while (++I != MBB.end()) {
859 MachineInstr &MI = *I;
861 return I;
862 }
863 return MBB.end();
864}
865
866/// popStackAfter - Pop the current value off of the top of the FP stack after
867/// the specified instruction. This attempts to be sneaky and combine the pop
868/// into the instruction itself if possible. The iterator is left pointing to
869/// the last instruction, be it a new pop instruction inserted, or the old
870/// instruction if it was modified in place.
871///
872void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
873 MachineInstr &MI = *I;
874 const DebugLoc &dl = MI.getDebugLoc();
876
877 popReg();
878
879 // Check to see if there is a popping version of this instruction...
880 int Opcode = Lookup(PopTable, I->getOpcode());
881 if (Opcode != -1) {
882 I->setDesc(TII->get(Opcode));
883 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
884 I->removeOperand(0);
885 MI.dropDebugNumber();
886 } else { // Insert an explicit pop
887 // If this instruction sets FPSW, which is read in following instruction,
888 // insert pop after that reader.
890 MachineBasicBlock &MBB = *MI.getParent();
892 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, /*TRI=*/nullptr))
893 I = Next;
894 }
895 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
896 }
897}
898
899/// freeStackSlotAfter - Free the specified register from the register stack, so
900/// that it is no longer in a register. If the register is currently at the top
901/// of the stack, we just pop the current instruction, otherwise we store the
902/// current top-of-stack into the specified slot, then pop the top of stack.
903void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
904 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
905 popStackAfter(I);
906 return;
907 }
908
909 // Otherwise, store the top of stack into the dead slot, killing the operand
910 // without having to add in an explicit xchg then pop.
911 //
912 I = freeStackSlotBefore(++I, FPRegNo);
913}
914
915/// freeStackSlotBefore - Free the specified register without trying any
916/// folding.
918FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
919 unsigned STReg = getSTReg(FPRegNo);
920 unsigned OldSlot = getSlot(FPRegNo);
921 unsigned TopReg = Stack[StackTop-1];
922 Stack[OldSlot] = TopReg;
923 RegMap[TopReg] = OldSlot;
924 RegMap[FPRegNo] = ~0;
925 Stack[--StackTop] = ~0;
926 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
927 .addReg(STReg)
928 .getInstr();
929}
930
931/// adjustLiveRegs - Kill and revive registers such that exactly the FP
932/// registers with a bit in Mask are live.
933void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
934 unsigned Defs = Mask;
935 unsigned Kills = 0;
936 for (unsigned i = 0; i < StackTop; ++i) {
937 unsigned RegNo = Stack[i];
938 if (!(Defs & (1 << RegNo)))
939 // This register is live, but we don't want it.
940 Kills |= (1 << RegNo);
941 else
942 // We don't need to imp-def this live register.
943 Defs &= ~(1 << RegNo);
944 }
945 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
946
947 // Produce implicit-defs for free by using killed registers.
948 while (Kills && Defs) {
949 unsigned KReg = llvm::countr_zero(Kills);
950 unsigned DReg = llvm::countr_zero(Defs);
951 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
952 << "\n");
953 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
954 std::swap(RegMap[KReg], RegMap[DReg]);
955 Kills &= ~(1 << KReg);
956 Defs &= ~(1 << DReg);
957 }
958
959 // Kill registers by popping.
960 if (Kills && I != MBB->begin()) {
961 MachineBasicBlock::iterator I2 = std::prev(I);
962 while (StackTop) {
963 unsigned KReg = getStackEntry(0);
964 if (!(Kills & (1 << KReg)))
965 break;
966 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
967 popStackAfter(I2);
968 Kills &= ~(1 << KReg);
969 }
970 }
971
972 // Manually kill the rest.
973 while (Kills) {
974 unsigned KReg = llvm::countr_zero(Kills);
975 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
976 freeStackSlotBefore(I, KReg);
977 Kills &= ~(1 << KReg);
978 }
979
980 // Load zeros for all the imp-defs.
981 while(Defs) {
982 unsigned DReg = llvm::countr_zero(Defs);
983 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
984 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
985 pushReg(DReg);
986 Defs &= ~(1 << DReg);
987 }
988
989 // Now we should have the correct registers live.
990 LLVM_DEBUG(dumpStack());
991 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
992}
993
994/// shuffleStackTop - emit fxch instructions before I to shuffle the top
995/// FixCount entries into the order given by FixStack.
996/// FIXME: Is there a better algorithm than insertion sort?
997void FPS::shuffleStackTop(const unsigned char *FixStack,
998 unsigned FixCount,
1000 // Move items into place, starting from the desired stack bottom.
1001 while (FixCount--) {
1002 // Old register at position FixCount.
1003 unsigned OldReg = getStackEntry(FixCount);
1004 // Desired register at position FixCount.
1005 unsigned Reg = FixStack[FixCount];
1006 if (Reg == OldReg)
1007 continue;
1008 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
1009 moveToTop(Reg, I);
1010 if (FixCount > 0)
1011 moveToTop(OldReg, I);
1012 }
1013 LLVM_DEBUG(dumpStack());
1014}
1015
1016
1017//===----------------------------------------------------------------------===//
1018// Instruction transformation implementation
1019//===----------------------------------------------------------------------===//
1020
1021void FPS::handleCall(MachineBasicBlock::iterator &I) {
1022 MachineInstr &MI = *I;
1023 unsigned STReturns = 0;
1024
1025 bool ClobbersFPStack = false;
1026 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1027 MachineOperand &Op = MI.getOperand(i);
1028 // Check if this call clobbers the FP stack.
1029 // is sufficient.
1030 if (Op.isRegMask()) {
1031 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1032#ifndef NDEBUG
1033 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1034 for (unsigned i = 1; i != 8; ++i)
1035 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1036 "Inconsistent FP register clobber");
1037#endif
1038
1039 if (ClobbersFP0)
1040 ClobbersFPStack = true;
1041 }
1042
1043 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1044 continue;
1045
1046 assert(Op.isImplicit() && "Expected implicit def/use");
1047
1048 if (Op.isDef())
1049 STReturns |= 1 << getFPReg(Op);
1050
1051 // Remove the operand so that later passes don't see it.
1052 MI.removeOperand(i);
1053 --i;
1054 --e;
1055 }
1056
1057 // Most calls should have a regmask that clobbers the FP registers. If it
1058 // isn't present then the register allocator didn't spill the FP registers
1059 // so they are still on the stack.
1060 assert((ClobbersFPStack || STReturns == 0) &&
1061 "ST returns without FP stack clobber");
1062 if (!ClobbersFPStack)
1063 return;
1064
1065 unsigned N = llvm::countr_one(STReturns);
1066
1067 // FP registers used for function return must be consecutive starting at
1068 // FP0
1069 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1070
1071 // Reset the FP Stack - It is required because of possible leftovers from
1072 // passed arguments. The caller should assume that the FP stack is
1073 // returned empty (unless the callee returns values on FP stack).
1074 while (StackTop > 0)
1075 popReg();
1076
1077 for (unsigned I = 0; I < N; ++I)
1078 pushReg(N - I - 1);
1079
1080 // If this call has been modified, drop all variable values defined by it.
1081 // We can't track them once they've been stackified.
1082 if (STReturns)
1083 I->dropDebugNumber();
1084}
1085
1086/// If RET has an FP register use operand, pass the first one in ST(0) and
1087/// the second one in ST(1).
1088void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1089 MachineInstr &MI = *I;
1090
1091 // Find the register operands.
1092 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1093 unsigned LiveMask = 0;
1094
1095 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1096 MachineOperand &Op = MI.getOperand(i);
1097 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1098 continue;
1099 // FP Register uses must be kills unless there are two uses of the same
1100 // register, in which case only one will be a kill.
1101 assert(Op.isUse() &&
1102 (Op.isKill() || // Marked kill.
1103 getFPReg(Op) == FirstFPRegOp || // Second instance.
1104 MI.killsRegister(Op.getReg(),
1105 /*TRI=*/nullptr)) && // Later use is marked kill.
1106 "Ret only defs operands, and values aren't live beyond it");
1107
1108 if (FirstFPRegOp == ~0U)
1109 FirstFPRegOp = getFPReg(Op);
1110 else {
1111 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1112 SecondFPRegOp = getFPReg(Op);
1113 }
1114 LiveMask |= (1 << getFPReg(Op));
1115
1116 // Remove the operand so that later passes don't see it.
1117 MI.removeOperand(i);
1118 --i;
1119 --e;
1120 }
1121
1122 // We may have been carrying spurious live-ins, so make sure only the
1123 // returned registers are left live.
1124 adjustLiveRegs(LiveMask, MI);
1125 if (!LiveMask) return; // Quick check to see if any are possible.
1126
1127 // There are only four possibilities here:
1128 // 1) we are returning a single FP value. In this case, it has to be in
1129 // ST(0) already, so just declare success by removing the value from the
1130 // FP Stack.
1131 if (SecondFPRegOp == ~0U) {
1132 // Assert that the top of stack contains the right FP register.
1133 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1134 "Top of stack not the right register for RET!");
1135
1136 // Ok, everything is good, mark the value as not being on the stack
1137 // anymore so that our assertion about the stack being empty at end of
1138 // block doesn't fire.
1139 StackTop = 0;
1140 return;
1141 }
1142
1143 // Otherwise, we are returning two values:
1144 // 2) If returning the same value for both, we only have one thing in the FP
1145 // stack. Consider: RET FP1, FP1
1146 if (StackTop == 1) {
1147 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1148 "Stack misconfiguration for RET!");
1149
1150 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1151 // register to hold it.
1152 unsigned NewReg = ScratchFPReg;
1153 duplicateToTop(FirstFPRegOp, NewReg, MI);
1154 FirstFPRegOp = NewReg;
1155 }
1156
1157 /// Okay we know we have two different FPx operands now:
1158 assert(StackTop == 2 && "Must have two values live!");
1159
1160 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1161 /// in ST(1). In this case, emit an fxch.
1162 if (getStackEntry(0) == SecondFPRegOp) {
1163 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1164 moveToTop(FirstFPRegOp, MI);
1165 }
1166
1167 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1168 /// ST(1). Just remove both from our understanding of the stack and return.
1169 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1170 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1171 StackTop = 0;
1172}
1173
1174/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1175///
1176void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1177 MachineInstr &MI = *I;
1178 unsigned DestReg = getFPReg(MI.getOperand(0));
1179
1180 // Change from the pseudo instruction to the concrete instruction.
1181 MI.removeOperand(0); // Remove the explicit ST(0) operand
1182 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1183 MI.addOperand(
1184 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1185
1186 // Result gets pushed on the stack.
1187 pushReg(DestReg);
1188
1189 MI.dropDebugNumber();
1190}
1191
1192/// handleOneArgFP - fst <mem>, ST(0)
1193///
1194void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1195 MachineInstr &MI = *I;
1196 unsigned NumOps = MI.getDesc().getNumOperands();
1197 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1198 "Can only handle fst* & ftst instructions!");
1199
1200 // Is this the last use of the source register?
1201 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1202 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1203
1204 // FISTP64m is strange because there isn't a non-popping versions.
1205 // If we have one _and_ we don't want to pop the operand, duplicate the value
1206 // on the stack instead of moving it. This ensure that popping the value is
1207 // always ok.
1208 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1209 //
1210 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1211 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1212 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1213 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1214 MI.getOpcode() == X86::IST_Fp64m64 ||
1215 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1216 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1217 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1218 MI.getOpcode() == X86::IST_Fp64m80 ||
1219 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1220 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1221 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1222 MI.getOpcode() == X86::ST_FpP80m)) {
1223 duplicateToTop(Reg, ScratchFPReg, I);
1224 } else {
1225 moveToTop(Reg, I); // Move to the top of the stack...
1226 }
1227
1228 // Convert from the pseudo instruction to the concrete instruction.
1229 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1230 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1231 MI.addOperand(
1232 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1233
1234 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1235 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1236 MI.getOpcode() == X86::ST_FP80m) {
1237 if (StackTop == 0)
1238 report_fatal_error("Stack empty??");
1239 --StackTop;
1240 } else if (KillsSrc) { // Last use of operand?
1241 popStackAfter(I);
1242 }
1243
1244 MI.dropDebugNumber();
1245}
1246
1247
1248/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1249/// replace the value with a newly computed value. These instructions may have
1250/// non-fp operands after their FP operands.
1251///
1252/// Examples:
1253/// R1 = fchs R2
1254/// R1 = fadd R2, [mem]
1255///
1256void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1257 MachineInstr &MI = *I;
1258#ifndef NDEBUG
1259 unsigned NumOps = MI.getDesc().getNumOperands();
1260 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1261#endif
1262
1263 // Is this the last use of the source register?
1264 unsigned Reg = getFPReg(MI.getOperand(1));
1265 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1266
1267 if (KillsSrc) {
1268 // If this is the last use of the source register, just make sure it's on
1269 // the top of the stack.
1270 moveToTop(Reg, I);
1271 if (StackTop == 0)
1272 report_fatal_error("Stack cannot be empty!");
1273 --StackTop;
1274 pushReg(getFPReg(MI.getOperand(0)));
1275 } else {
1276 // If this is not the last use of the source register, _copy_ it to the top
1277 // of the stack.
1278 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1279 }
1280
1281 // Change from the pseudo instruction to the concrete instruction.
1282 MI.removeOperand(1); // Drop the source operand.
1283 MI.removeOperand(0); // Drop the destination operand.
1284 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1285 MI.dropDebugNumber();
1286}
1287
1288
1289//===----------------------------------------------------------------------===//
1290// Define tables of various ways to map pseudo instructions
1291//
1292
1293// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1294static const TableEntry ForwardST0Table[] = {
1295 { X86::ADD_Fp32 , X86::ADD_FST0r },
1296 { X86::ADD_Fp64 , X86::ADD_FST0r },
1297 { X86::ADD_Fp80 , X86::ADD_FST0r },
1298 { X86::DIV_Fp32 , X86::DIV_FST0r },
1299 { X86::DIV_Fp64 , X86::DIV_FST0r },
1300 { X86::DIV_Fp80 , X86::DIV_FST0r },
1301 { X86::MUL_Fp32 , X86::MUL_FST0r },
1302 { X86::MUL_Fp64 , X86::MUL_FST0r },
1303 { X86::MUL_Fp80 , X86::MUL_FST0r },
1304 { X86::SUB_Fp32 , X86::SUB_FST0r },
1305 { X86::SUB_Fp64 , X86::SUB_FST0r },
1306 { X86::SUB_Fp80 , X86::SUB_FST0r },
1307};
1308
1309// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1310static const TableEntry ReverseST0Table[] = {
1311 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1312 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1313 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1314 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1315 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1316 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1317 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1318 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1319 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1320 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1321 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1322 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1323};
1324
1325// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1326static const TableEntry ForwardSTiTable[] = {
1327 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1328 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1329 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1330 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1331 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1332 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1333 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1334 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1335 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1336 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1337 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1338 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1339};
1340
1341// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1342static const TableEntry ReverseSTiTable[] = {
1343 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1344 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1345 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1346 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1347 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1348 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1349 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1350 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1351 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1352 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1353 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1354 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1355};
1356
1357
1358/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1359/// instructions which need to be simplified and possibly transformed.
1360///
1361/// Result: ST(0) = fsub ST(0), ST(i)
1362/// ST(i) = fsub ST(0), ST(i)
1363/// ST(0) = fsubr ST(0), ST(i)
1364/// ST(i) = fsubr ST(0), ST(i)
1365///
1366void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1369 MachineInstr &MI = *I;
1370
1371 unsigned NumOperands = MI.getDesc().getNumOperands();
1372 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1373 unsigned Dest = getFPReg(MI.getOperand(0));
1374 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1375 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1376 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1377 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1378 const DebugLoc &dl = MI.getDebugLoc();
1379
1380 unsigned TOS = getStackEntry(0);
1381
1382 // One of our operands must be on the top of the stack. If neither is yet, we
1383 // need to move one.
1384 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1385 // We can choose to move either operand to the top of the stack. If one of
1386 // the operands is killed by this instruction, we want that one so that we
1387 // can update right on top of the old version.
1388 if (KillsOp0) {
1389 moveToTop(Op0, I); // Move dead operand to TOS.
1390 TOS = Op0;
1391 } else if (KillsOp1) {
1392 moveToTop(Op1, I);
1393 TOS = Op1;
1394 } else {
1395 // All of the operands are live after this instruction executes, so we
1396 // cannot update on top of any operand. Because of this, we must
1397 // duplicate one of the stack elements to the top. It doesn't matter
1398 // which one we pick.
1399 //
1400 duplicateToTop(Op0, Dest, I);
1401 Op0 = TOS = Dest;
1402 KillsOp0 = true;
1403 }
1404 } else if (!KillsOp0 && !KillsOp1) {
1405 // If we DO have one of our operands at the top of the stack, but we don't
1406 // have a dead operand, we must duplicate one of the operands to a new slot
1407 // on the stack.
1408 duplicateToTop(Op0, Dest, I);
1409 Op0 = TOS = Dest;
1410 KillsOp0 = true;
1411 }
1412
1413 // Now we know that one of our operands is on the top of the stack, and at
1414 // least one of our operands is killed by this instruction.
1415 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1416 "Stack conditions not set up right!");
1417
1418 // We decide which form to use based on what is on the top of the stack, and
1419 // which operand is killed by this instruction.
1420 ArrayRef<TableEntry> InstTable;
1421 bool isForward = TOS == Op0;
1422 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1423 if (updateST0) {
1424 if (isForward)
1425 InstTable = ForwardST0Table;
1426 else
1427 InstTable = ReverseST0Table;
1428 } else {
1429 if (isForward)
1430 InstTable = ForwardSTiTable;
1431 else
1432 InstTable = ReverseSTiTable;
1433 }
1434
1435 int Opcode = Lookup(InstTable, MI.getOpcode());
1436 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1437
1438 // NotTOS - The register which is not on the top of stack...
1439 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1440
1441 // Replace the old instruction with a new instruction
1442 MBB->remove(&*I++);
1443 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1444
1445 if (!MI.mayRaiseFPException())
1446 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1447
1448 // If both operands are killed, pop one off of the stack in addition to
1449 // overwriting the other one.
1450 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1451 assert(!updateST0 && "Should have updated other operand!");
1452 popStackAfter(I); // Pop the top of stack
1453 }
1454
1455 // Update stack information so that we know the destination register is now on
1456 // the stack.
1457 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1458 assert(UpdatedSlot < StackTop && Dest < 7);
1459 Stack[UpdatedSlot] = Dest;
1460 RegMap[Dest] = UpdatedSlot;
1461 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1462}
1463
1464/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1465/// register arguments and no explicit destinations.
1466///
1467void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1468 MachineInstr &MI = *I;
1469
1470 unsigned NumOperands = MI.getDesc().getNumOperands();
1471 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1472 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1473 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1474 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1475 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1476
1477 // Make sure the first operand is on the top of stack, the other one can be
1478 // anywhere.
1479 moveToTop(Op0, I);
1480
1481 // Change from the pseudo instruction to the concrete instruction.
1482 MI.getOperand(0).setReg(getSTReg(Op1));
1483 MI.removeOperand(1);
1484 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1485 MI.dropDebugNumber();
1486
1487 // If any of the operands are killed by this instruction, free them.
1488 if (KillsOp0) freeStackSlotAfter(I, Op0);
1489 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1490}
1491
1492/// handleCondMovFP - Handle two address conditional move instructions. These
1493/// instructions move a st(i) register to st(0) iff a condition is true. These
1494/// instructions require that the first operand is at the top of the stack, but
1495/// otherwise don't modify the stack at all.
1496void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1497 MachineInstr &MI = *I;
1498
1499 unsigned Op0 = getFPReg(MI.getOperand(0));
1500 unsigned Op1 = getFPReg(MI.getOperand(2));
1501 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1502
1503 // The first operand *must* be on the top of the stack.
1504 moveToTop(Op0, I);
1505
1506 // Change the second operand to the stack register that the operand is in.
1507 // Change from the pseudo instruction to the concrete instruction.
1508 MI.removeOperand(0);
1509 MI.removeOperand(1);
1510 MI.getOperand(0).setReg(getSTReg(Op1));
1511 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1512 MI.dropDebugNumber();
1513
1514 // If we kill the second operand, make sure to pop it from the stack.
1515 if (Op0 != Op1 && KillsOp1) {
1516 // Get this value off of the register stack.
1517 freeStackSlotAfter(I, Op1);
1518 }
1519}
1520
1521
1522/// handleSpecialFP - Handle special instructions which behave unlike other
1523/// floating point instructions. This is primarily intended for use by pseudo
1524/// instructions.
1525///
1526void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1527 MachineInstr &MI = *Inst;
1528
1529 if (MI.isCall()) {
1530 handleCall(Inst);
1531 return;
1532 }
1533
1534 if (MI.isReturn()) {
1535 handleReturn(Inst);
1536 return;
1537 }
1538
1539 switch (MI.getOpcode()) {
1540 default: llvm_unreachable("Unknown SpecialFP instruction!");
1541 case TargetOpcode::COPY: {
1542 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1543 const MachineOperand &MO1 = MI.getOperand(1);
1544 const MachineOperand &MO0 = MI.getOperand(0);
1545 bool KillsSrc = MI.killsRegister(MO1.getReg(), /*TRI=*/nullptr);
1546
1547 // FP <- FP copy.
1548 unsigned DstFP = getFPReg(MO0);
1549 unsigned SrcFP = getFPReg(MO1);
1550 assert(isLive(SrcFP) && "Cannot copy dead register");
1551 if (KillsSrc) {
1552 // If the input operand is killed, we can just change the owner of the
1553 // incoming stack slot into the result.
1554 unsigned Slot = getSlot(SrcFP);
1555 Stack[Slot] = DstFP;
1556 RegMap[DstFP] = Slot;
1557 } else {
1558 // For COPY we just duplicate the specified value to a new stack slot.
1559 // This could be made better, but would require substantial changes.
1560 duplicateToTop(SrcFP, DstFP, Inst);
1561 }
1562 break;
1563 }
1564
1565 case TargetOpcode::IMPLICIT_DEF: {
1566 // All FP registers must be explicitly defined, so load a 0 instead.
1567 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1568 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1569 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1570 pushReg(Reg);
1571 break;
1572 }
1573
1574 case TargetOpcode::INLINEASM:
1575 case TargetOpcode::INLINEASM_BR: {
1576 // The inline asm MachineInstr currently only *uses* FP registers for the
1577 // 'f' constraint. These should be turned into the current ST(x) register
1578 // in the machine instr.
1579 //
1580 // There are special rules for x87 inline assembly. The compiler must know
1581 // exactly how many registers are popped and pushed implicitly by the asm.
1582 // Otherwise it is not possible to restore the stack state after the inline
1583 // asm.
1584 //
1585 // There are 3 kinds of input operands:
1586 //
1587 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1588 // popped input operand must be in a fixed stack slot, and it is either
1589 // tied to an output operand, or in the clobber list. The MI has ST use
1590 // and def operands for these inputs.
1591 //
1592 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1593 // preserved by the inline asm. The fixed stack slots must be STn-STm
1594 // following the popped inputs. A fixed input operand cannot be tied to
1595 // an output or appear in the clobber list. The MI has ST use operands
1596 // and no defs for these inputs.
1597 //
1598 // 3. Preserved inputs. These inputs use the "f" constraint which is
1599 // represented as an FP register. The inline asm won't change these
1600 // stack slots.
1601 //
1602 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1603 // registers do not count as output operands. The inline asm changes the
1604 // stack as if it popped all the popped inputs and then pushed all the
1605 // output operands.
1606
1607 // Scan the assembly for ST registers used, defined and clobbered. We can
1608 // only tell clobbers from defs by looking at the asm descriptor.
1609 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1610 unsigned NumOps = 0;
1611 SmallSet<unsigned, 1> FRegIdx;
1612 unsigned RCID;
1613
1614 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1615 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1616 unsigned Flags = MI.getOperand(i).getImm();
1617 const InlineAsm::Flag F(Flags);
1618
1619 NumOps = F.getNumOperandRegisters();
1620 if (NumOps != 1)
1621 continue;
1622 const MachineOperand &MO = MI.getOperand(i + 1);
1623 if (!MO.isReg())
1624 continue;
1625 unsigned STReg = MO.getReg() - X86::FP0;
1626 if (STReg >= 8)
1627 continue;
1628
1629 // If the flag has a register class constraint, this must be an operand
1630 // with constraint "f". Record its index and continue.
1631 if (F.hasRegClassConstraint(RCID)) {
1632 FRegIdx.insert(i + 1);
1633 continue;
1634 }
1635
1636 switch (F.getKind()) {
1637 case InlineAsm::Kind::RegUse:
1638 STUses |= (1u << STReg);
1639 break;
1640 case InlineAsm::Kind::RegDef:
1641 case InlineAsm::Kind::RegDefEarlyClobber:
1642 STDefs |= (1u << STReg);
1643 break;
1644 case InlineAsm::Kind::Clobber:
1645 STClobbers |= (1u << STReg);
1646 break;
1647 default:
1648 break;
1649 }
1650 }
1651
1652 if (STUses && !isMask_32(STUses))
1653 MI.emitGenericError("fixed input regs must be last on the x87 stack");
1654 unsigned NumSTUses = llvm::countr_one(STUses);
1655
1656 // Defs must be contiguous from the stack top. ST0-STn.
1657 if (STDefs && !isMask_32(STDefs)) {
1658 MI.emitGenericError("output regs must be last on the x87 stack");
1659 STDefs = NextPowerOf2(STDefs) - 1;
1660 }
1661 unsigned NumSTDefs = llvm::countr_one(STDefs);
1662
1663 // So must the clobbered stack slots. ST0-STm, m >= n.
1664 if (STClobbers && !isMask_32(STDefs | STClobbers))
1665 MI.emitGenericError("clobbers must be last on the x87 stack");
1666
1667 // Popped inputs are the ones that are also clobbered or defined.
1668 unsigned STPopped = STUses & (STDefs | STClobbers);
1669 if (STPopped && !isMask_32(STPopped))
1670 MI.emitGenericError(
1671 "implicitly popped regs must be last on the x87 stack");
1672 unsigned NumSTPopped = llvm::countr_one(STPopped);
1673
1674 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1675 << NumSTPopped << ", and defines " << NumSTDefs
1676 << " regs.\n");
1677
1678#ifndef NDEBUG
1679 // If any input operand uses constraint "f", all output register
1680 // constraints must be early-clobber defs.
1681 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1682 if (FRegIdx.count(I)) {
1683 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1684 "Operands with constraint \"f\" cannot overlap with defs");
1685 }
1686#endif
1687
1688 // Collect all FP registers (register operands with constraints "t", "u",
1689 // and "f") to kill afer the instruction.
1690 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1691 for (const MachineOperand &Op : MI.operands()) {
1692 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1693 continue;
1694 unsigned FPReg = getFPReg(Op);
1695
1696 // If we kill this operand, make sure to pop it from the stack after the
1697 // asm. We just remember it for now, and pop them all off at the end in
1698 // a batch.
1699 if (Op.isUse() && Op.isKill())
1700 FPKills |= 1U << FPReg;
1701 }
1702
1703 // Do not include registers that are implicitly popped by defs/clobbers.
1704 FPKills &= ~(STDefs | STClobbers);
1705
1706 // Now we can rearrange the live registers to match what was requested.
1707 unsigned char STUsesArray[8];
1708
1709 for (unsigned I = 0; I < NumSTUses; ++I)
1710 STUsesArray[I] = I;
1711
1712 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1713 LLVM_DEBUG({
1714 dbgs() << "Before asm: ";
1715 dumpStack();
1716 });
1717
1718 // With the stack layout fixed, rewrite the FP registers.
1719 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1720 MachineOperand &Op = MI.getOperand(i);
1721 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1722 continue;
1723
1724 unsigned FPReg = getFPReg(Op);
1725
1726 if (FRegIdx.count(i))
1727 // Operand with constraint "f".
1728 Op.setReg(getSTReg(FPReg));
1729 else
1730 // Operand with a single register class constraint ("t" or "u").
1731 Op.setReg(X86::ST0 + FPReg);
1732 }
1733
1734 // Simulate the inline asm popping its inputs and pushing its outputs.
1735 StackTop -= NumSTPopped;
1736
1737 for (unsigned i = 0; i < NumSTDefs; ++i)
1738 pushReg(NumSTDefs - i - 1);
1739
1740 // If this asm kills any FP registers (is the last use of them) we must
1741 // explicitly emit pop instructions for them. Do this now after the asm has
1742 // executed so that the ST(x) numbers are not off (which would happen if we
1743 // did this inline with operand rewriting).
1744 //
1745 // Note: this might be a non-optimal pop sequence. We might be able to do
1746 // better by trying to pop in stack order or something.
1747 while (FPKills) {
1748 unsigned FPReg = llvm::countr_zero(FPKills);
1749 if (isLive(FPReg))
1750 freeStackSlotAfter(Inst, FPReg);
1751 FPKills &= ~(1U << FPReg);
1752 }
1753
1754 // Don't delete the inline asm!
1755 return;
1756 }
1757
1758 // FAKE_USE must pop its register operand off the stack if it is killed,
1759 // because this constitutes the register's last use. If the operand
1760 // is not killed, it will have its last use later, so we leave it alone.
1761 // In either case we remove the operand so later passes don't see it.
1762 case TargetOpcode::FAKE_USE: {
1763 assert(MI.getNumExplicitOperands() == 1 &&
1764 "FAKE_USE must have exactly one operand");
1765 if (MI.getOperand(0).isKill()) {
1766 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
1767 }
1768 MI.removeOperand(0);
1769 return;
1770 }
1771 }
1772
1773 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1774
1775 // We want to leave I pointing to the previous instruction, but what if we
1776 // just erased the first instruction?
1777 if (Inst == MBB->begin()) {
1778 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1779 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1780 } else
1781 --Inst;
1782}
1783
1784void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1785 const TargetRegisterInfo &TRI =
1787 LiveRegUnits LPR(TRI);
1788
1789 LPR.addLiveOuts(MBB);
1790
1791 for (MachineInstr &MI : llvm::reverse(MBB)) {
1792 if (MI.isDebugInstr())
1793 continue;
1794
1795 std::bitset<8> Defs;
1797
1798 for (auto &MO : MI.operands()) {
1799 if (!MO.isReg())
1800 continue;
1801
1802 unsigned Reg = MO.getReg() - X86::FP0;
1803
1804 if (Reg >= 8)
1805 continue;
1806
1807 if (MO.isDef()) {
1808 Defs.set(Reg);
1809 if (LPR.available(MO.getReg()))
1810 MO.setIsDead();
1811 } else
1812 Uses.push_back(&MO);
1813 }
1814
1815 for (auto *MO : Uses)
1816 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1817 MO->setIsKill();
1818
1819 LPR.stepBackward(MI);
1820 }
1821}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Register getFPReg(const CSKYSubtarget &STI)
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static constexpr MCPhysReg FPReg
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void setFlag(MIFlag Flag)
Set a MI flag.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
size_type size() const
Definition SmallPtrSet.h:99
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void resize(size_type N)
void push_back(const T &Elt)
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
@ AddrNumOperands
Definition X86BaseInfo.h:36
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
constexpr double e
Definition MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:264
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1994
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
std::pair< iterator, bool > insert(NodeRef N)