41 #include "llvm/Config/llvm-config.h"
52 #define DEBUG_TYPE "x86-codegen"
54 STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
55 STATISTIC(NumFP ,
"Number of floating point instructions");
58 const unsigned ScratchFPReg = 7;
65 memset(Stack, 0,
sizeof(Stack));
66 memset(RegMap, 0,
sizeof(RegMap));
84 StringRef getPassName()
const override {
return "X86 FP Stackifier"; }
110 unsigned char FixStack[8];
112 LiveBundle() :
Mask(0), FixCount(0) {}
115 bool isFixed()
const {
return !
Mask || FixCount; }
131 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
132 if (
Reg >= X86::FP0 &&
Reg <= X86::FP6) {
154 unsigned StackTop = 0;
164 unsigned RegMap[NumFPRegs];
167 void setupBlockStack();
170 void finishBlockStack();
172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
173 void dumpStack()
const {
174 dbgs() <<
"Stack contents:";
175 for (
unsigned i = 0;
i != StackTop; ++
i) {
177 assert(RegMap[Stack[
i]] ==
i &&
"Stack[] doesn't match RegMap[]!");
184 unsigned getSlot(
unsigned RegNo)
const {
185 assert(RegNo < NumFPRegs &&
"Regno out of range!");
186 return RegMap[RegNo];
190 bool isLive(
unsigned RegNo)
const {
191 unsigned Slot = getSlot(RegNo);
196 unsigned getStackEntry(
unsigned STi)
const {
199 return Stack[StackTop-1-STi];
204 unsigned getSTReg(
unsigned RegNo)
const {
205 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
209 void pushReg(
unsigned Reg) {
210 assert(
Reg < NumFPRegs &&
"Register number out of range!");
214 RegMap[
Reg] = StackTop++;
221 RegMap[
Stack[--StackTop]] = ~0;
224 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
227 if (isAtTop(RegNo))
return;
229 unsigned STReg = getSTReg(RegNo);
230 unsigned RegOnTop = getStackEntry(0);
233 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
236 if (RegMap[RegOnTop] >= StackTop)
238 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
245 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
248 unsigned STReg = getSTReg(RegNo);
275 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
295 return X86::RFP80RegClass.contains(DstReg) ||
296 X86::RFP80RegClass.contains(SrcReg);
318 assert(
Reg >= X86::FP0 &&
Reg <= X86::FP6 &&
"Expected FP register!");
319 return Reg - X86::FP0;
328 bool FPIsUsed =
false;
330 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
332 for (
unsigned i = 0;
i <= 6; ++
i)
339 if (!FPIsUsed)
return false;
341 Bundles = &getAnalysis<EdgeBundles>();
345 bundleCFGRecomputeKillFlags(MF);
355 LiveBundles[Bundles->
getBundle(Entry->getNumber(),
false)];
359 if ((Entry->getParent()->getFunction().getCallingConv() ==
367 assert((Bundle.Mask & 0xFE) == 0 &&
368 "Only FP0 could be passed as an argument");
370 Bundle.FixStack[0] = 0;
373 bool Changed =
false;
375 Changed |= processBasicBlock(MF, *
BB);
381 Changed |= processBasicBlock(MF,
BB);
394 assert(LiveBundles.empty() &&
"Stale data in LiveBundles");
401 const unsigned Mask = calcLiveInMask(&
MBB,
false);
413 bool Changed =
false;
420 uint64_t Flags =
MI.getDesc().TSFlags;
423 if (
MI.isInlineAsm())
426 if (
MI.isCopy() && isFPCopy(
MI))
429 if (
MI.isImplicitDef() &&
430 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
441 PrevMI = &*std::prev(
I);
449 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
452 DeadRegs.push_back(MO.
getReg());
455 switch (FPInstClass) {
468 for (
unsigned i = 0,
e = DeadRegs.size();
i !=
e; ++
i) {
469 unsigned Reg = DeadRegs[
i];
472 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
473 if (
Reg >= X86::FP0 &&
Reg <= X86::FP6 && isLive(
Reg-X86::FP0)) {
475 freeStackSlotAfter(
I,
Reg-X86::FP0);
483 dbgs() <<
"Just deleted pseudo instruction\n";
487 while (Start !=
BB.begin() && std::prev(Start) != PrevI)
489 dbgs() <<
"Inserted instructions:\n\t";
490 Start->print(
dbgs());
491 while (++Start != std::next(
I)) {
508 void FPS::setupBlockStack() {
510 <<
" derived from " <<
MBB->
getName() <<
".\n");
513 const LiveBundle &Bundle =
522 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
525 for (
unsigned i = Bundle.FixCount;
i > 0; --
i) {
527 <<
unsigned(Bundle.FixStack[
i - 1]) <<
'\n');
528 pushReg(Bundle.FixStack[
i-1]);
534 unsigned Mask = calcLiveInMask(
MBB,
true);
543 void FPS::finishBlockStack() {
549 <<
" derived from " <<
MBB->
getName() <<
".\n");
553 LiveBundle &Bundle = LiveBundles[BundleIdx];
558 adjustLiveRegs(Bundle.Mask, Term);
567 if (Bundle.isFixed()) {
569 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
573 Bundle.FixCount = StackTop;
574 for (
unsigned i = 0;
i < StackTop; ++
i)
575 Bundle.FixStack[
i] = getStackEntry(
i);
588 bool operator<(
const TableEntry &TE)
const {
return from <
TE.from; }
589 friend bool operator<(
const TableEntry &TE,
unsigned V) {
593 const TableEntry &TE) {
601 if (
I != Table.
end() &&
I->from == Opcode)
607 #define ASSERT_SORTED(TABLE)
609 #define ASSERT_SORTED(TABLE) \
611 static std::atomic<bool> TABLE##Checked(false); \
612 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
613 assert(is_sorted(TABLE) && \
614 "All lookup tables must be sorted for efficient access!"); \
615 TABLE##Checked.store(true, std::memory_order_relaxed); \
629 { X86::ABS_Fp32 , X86::ABS_F },
630 { X86::ABS_Fp64 , X86::ABS_F },
631 { X86::ABS_Fp80 , X86::ABS_F },
632 { X86::ADD_Fp32m , X86::ADD_F32m },
633 { X86::ADD_Fp64m , X86::ADD_F64m },
634 { X86::ADD_Fp64m32 , X86::ADD_F32m },
635 { X86::ADD_Fp80m32 , X86::ADD_F32m },
636 { X86::ADD_Fp80m64 , X86::ADD_F64m },
637 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
638 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
639 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
640 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
641 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
642 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
643 { X86::CHS_Fp32 , X86::CHS_F },
644 { X86::CHS_Fp64 , X86::CHS_F },
645 { X86::CHS_Fp80 , X86::CHS_F },
646 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
647 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
648 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
649 { X86::CMOVB_Fp32 , X86::CMOVB_F },
650 { X86::CMOVB_Fp64 , X86::CMOVB_F },
651 { X86::CMOVB_Fp80 , X86::CMOVB_F },
652 { X86::CMOVE_Fp32 , X86::CMOVE_F },
653 { X86::CMOVE_Fp64 , X86::CMOVE_F },
654 { X86::CMOVE_Fp80 , X86::CMOVE_F },
655 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
656 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
657 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
658 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
659 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
660 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
661 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
662 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
663 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
664 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
665 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
666 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
667 { X86::CMOVP_Fp32 , X86::CMOVP_F },
668 { X86::CMOVP_Fp64 , X86::CMOVP_F },
669 { X86::CMOVP_Fp80 , X86::CMOVP_F },
670 { X86::COM_FpIr32 , X86::COM_FIr },
671 { X86::COM_FpIr64 , X86::COM_FIr },
672 { X86::COM_FpIr80 , X86::COM_FIr },
673 { X86::COM_Fpr32 , X86::COM_FST0r },
674 { X86::COM_Fpr64 , X86::COM_FST0r },
675 { X86::COM_Fpr80 , X86::COM_FST0r },
676 { X86::DIVR_Fp32m , X86::DIVR_F32m },
677 { X86::DIVR_Fp64m , X86::DIVR_F64m },
678 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
679 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
680 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
681 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
682 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
683 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
684 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
685 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
686 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
687 { X86::DIV_Fp32m , X86::DIV_F32m },
688 { X86::DIV_Fp64m , X86::DIV_F64m },
689 { X86::DIV_Fp64m32 , X86::DIV_F32m },
690 { X86::DIV_Fp80m32 , X86::DIV_F32m },
691 { X86::DIV_Fp80m64 , X86::DIV_F64m },
692 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
693 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
694 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
695 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
696 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
697 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
698 { X86::ILD_Fp16m32 , X86::ILD_F16m },
699 { X86::ILD_Fp16m64 , X86::ILD_F16m },
700 { X86::ILD_Fp16m80 , X86::ILD_F16m },
701 { X86::ILD_Fp32m32 , X86::ILD_F32m },
702 { X86::ILD_Fp32m64 , X86::ILD_F32m },
703 { X86::ILD_Fp32m80 , X86::ILD_F32m },
704 { X86::ILD_Fp64m32 , X86::ILD_F64m },
705 { X86::ILD_Fp64m64 , X86::ILD_F64m },
706 { X86::ILD_Fp64m80 , X86::ILD_F64m },
707 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
708 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
709 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
710 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
711 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
712 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
713 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
714 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
715 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
716 { X86::IST_Fp16m32 , X86::IST_F16m },
717 { X86::IST_Fp16m64 , X86::IST_F16m },
718 { X86::IST_Fp16m80 , X86::IST_F16m },
719 { X86::IST_Fp32m32 , X86::IST_F32m },
720 { X86::IST_Fp32m64 , X86::IST_F32m },
721 { X86::IST_Fp32m80 , X86::IST_F32m },
722 { X86::IST_Fp64m32 , X86::IST_FP64m },
723 { X86::IST_Fp64m64 , X86::IST_FP64m },
724 { X86::IST_Fp64m80 , X86::IST_FP64m },
725 { X86::LD_Fp032 , X86::LD_F0 },
726 { X86::LD_Fp064 , X86::LD_F0 },
727 { X86::LD_Fp080 , X86::LD_F0 },
728 { X86::LD_Fp132 , X86::LD_F1 },
729 { X86::LD_Fp164 , X86::LD_F1 },
730 { X86::LD_Fp180 , X86::LD_F1 },
731 { X86::LD_Fp32m , X86::LD_F32m },
732 { X86::LD_Fp32m64 , X86::LD_F32m },
733 { X86::LD_Fp32m80 , X86::LD_F32m },
734 { X86::LD_Fp64m , X86::LD_F64m },
735 { X86::LD_Fp64m80 , X86::LD_F64m },
736 { X86::LD_Fp80m , X86::LD_F80m },
737 { X86::MUL_Fp32m , X86::MUL_F32m },
738 { X86::MUL_Fp64m , X86::MUL_F64m },
739 { X86::MUL_Fp64m32 , X86::MUL_F32m },
740 { X86::MUL_Fp80m32 , X86::MUL_F32m },
741 { X86::MUL_Fp80m64 , X86::MUL_F64m },
742 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
743 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
744 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
745 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
746 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
747 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
748 { X86::SQRT_Fp32 , X86::SQRT_F },
749 { X86::SQRT_Fp64 , X86::SQRT_F },
750 { X86::SQRT_Fp80 , X86::SQRT_F },
751 { X86::ST_Fp32m , X86::ST_F32m },
752 { X86::ST_Fp64m , X86::ST_F64m },
753 { X86::ST_Fp64m32 , X86::ST_F32m },
754 { X86::ST_Fp80m32 , X86::ST_F32m },
755 { X86::ST_Fp80m64 , X86::ST_F64m },
756 { X86::ST_FpP80m , X86::ST_FP80m },
757 { X86::SUBR_Fp32m , X86::SUBR_F32m },
758 { X86::SUBR_Fp64m , X86::SUBR_F64m },
759 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
760 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
761 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
762 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
763 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
764 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
765 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
766 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
767 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
768 { X86::SUB_Fp32m , X86::SUB_F32m },
769 { X86::SUB_Fp64m , X86::SUB_F64m },
770 { X86::SUB_Fp64m32 , X86::SUB_F32m },
771 { X86::SUB_Fp80m32 , X86::SUB_F32m },
772 { X86::SUB_Fp80m64 , X86::SUB_F64m },
773 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
774 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
775 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
776 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
777 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
778 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
779 { X86::TST_Fp32 , X86::TST_F },
780 { X86::TST_Fp64 , X86::TST_F },
781 { X86::TST_Fp80 , X86::TST_F },
782 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
783 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
784 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
785 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
786 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
787 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
793 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
805 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
807 { X86::COMP_FST0r, X86::FCOMPP },
808 { X86::COM_FIr , X86::COM_FIPr },
809 { X86::COM_FST0r , X86::COMP_FST0r },
811 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
812 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
814 { X86::IST_F16m , X86::IST_FP16m },
815 { X86::IST_F32m , X86::IST_FP32m },
817 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
819 { X86::ST_F32m , X86::ST_FP32m },
820 { X86::ST_F64m , X86::ST_FP64m },
821 { X86::ST_Frr , X86::ST_FPrr },
823 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
824 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
826 { X86::UCOM_FIr , X86::UCOM_FIPr },
828 { X86::UCOM_FPr , X86::UCOM_FPPr },
829 { X86::UCOM_Fr , X86::UCOM_FPr },
848 I->setDesc(
TII->get(Opcode));
849 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
861 if (getStackEntry(0) == FPRegNo) {
869 I = freeStackSlotBefore(++
I, FPRegNo);
876 unsigned STReg = getSTReg(FPRegNo);
877 unsigned OldSlot = getSlot(FPRegNo);
878 unsigned TopReg =
Stack[StackTop-1];
879 Stack[OldSlot] = TopReg;
880 RegMap[TopReg] = OldSlot;
881 RegMap[FPRegNo] = ~0;
882 Stack[--StackTop] = ~0;
891 unsigned Defs =
Mask;
893 for (
unsigned i = 0;
i < StackTop; ++
i) {
894 unsigned RegNo =
Stack[
i];
895 if (!(Defs & (1 << RegNo)))
897 Kills |= (1 << RegNo);
900 Defs &= ~(1 << RegNo);
902 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
905 while (Kills && Defs) {
908 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
910 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
912 Kills &= ~(1 << KReg);
913 Defs &= ~(1 << DReg);
920 unsigned KReg = getStackEntry(0);
921 if (!(Kills & (1 << KReg)))
925 Kills &= ~(1 << KReg);
933 freeStackSlotBefore(
I, KReg);
934 Kills &= ~(1 << KReg);
943 Defs &= ~(1 << DReg);
954 void FPS::shuffleStackTop(
const unsigned char *FixStack,
960 unsigned OldReg = getStackEntry(FixCount);
962 unsigned Reg = FixStack[FixCount];
968 moveToTop(OldReg,
I);
980 unsigned STReturns = 0;
982 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
984 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
987 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1007 while (StackTop > 0)
1010 for (
unsigned I = 0;
I <
N; ++
I)
1020 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1021 unsigned LiveMask = 0;
1023 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1025 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1032 MI.killsRegister(
Op.getReg())) &&
1033 "Ret only defs operands, and values aren't live beyond it");
1035 if (FirstFPRegOp == ~0U)
1038 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1044 MI.RemoveOperand(
i);
1051 adjustLiveRegs(LiveMask,
MI);
1052 if (!LiveMask)
return;
1058 if (SecondFPRegOp == ~0U) {
1060 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1061 "Top of stack not the right register for RET!");
1073 if (StackTop == 1) {
1074 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1075 "Stack misconfiguration for RET!");
1079 unsigned NewReg = ScratchFPReg;
1080 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1081 FirstFPRegOp = NewReg;
1085 assert(StackTop == 2 &&
"Must have two values live!");
1089 if (getStackEntry(0) == SecondFPRegOp) {
1090 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1091 moveToTop(FirstFPRegOp,
MI);
1096 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1097 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1105 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1108 MI.RemoveOperand(0);
1121 unsigned NumOps =
MI.getDesc().getNumOperands();
1123 "Can only handle fst* & ftst instructions!");
1127 bool KillsSrc =
MI.killsRegister(X86::FP0 +
Reg);
1135 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1136 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1137 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1138 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1139 MI.getOpcode() == X86::IST_Fp64m64 ||
1140 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1141 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1142 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1143 MI.getOpcode() == X86::IST_Fp64m80 ||
1144 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1145 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1146 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1147 MI.getOpcode() == X86::ST_FpP80m)) {
1148 duplicateToTop(
Reg, ScratchFPReg,
I);
1154 MI.RemoveOperand(NumOps - 1);
1159 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1160 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1161 MI.getOpcode() == X86::ST_FP80m) {
1165 }
else if (KillsSrc) {
1182 unsigned NumOps =
MI.getDesc().getNumOperands();
1183 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1188 bool KillsSrc =
MI.killsRegister(X86::FP0 +
Reg);
1205 MI.RemoveOperand(1);
1206 MI.RemoveOperand(0);
1217 { X86::ADD_Fp32 , X86::ADD_FST0r },
1218 { X86::ADD_Fp64 , X86::ADD_FST0r },
1219 { X86::ADD_Fp80 , X86::ADD_FST0r },
1220 { X86::DIV_Fp32 , X86::DIV_FST0r },
1221 { X86::DIV_Fp64 , X86::DIV_FST0r },
1222 { X86::DIV_Fp80 , X86::DIV_FST0r },
1223 { X86::MUL_Fp32 , X86::MUL_FST0r },
1224 { X86::MUL_Fp64 , X86::MUL_FST0r },
1225 { X86::MUL_Fp80 , X86::MUL_FST0r },
1226 { X86::SUB_Fp32 , X86::SUB_FST0r },
1227 { X86::SUB_Fp64 , X86::SUB_FST0r },
1228 { X86::SUB_Fp80 , X86::SUB_FST0r },
1233 { X86::ADD_Fp32 , X86::ADD_FST0r },
1234 { X86::ADD_Fp64 , X86::ADD_FST0r },
1235 { X86::ADD_Fp80 , X86::ADD_FST0r },
1236 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1237 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1238 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1239 { X86::MUL_Fp32 , X86::MUL_FST0r },
1240 { X86::MUL_Fp64 , X86::MUL_FST0r },
1241 { X86::MUL_Fp80 , X86::MUL_FST0r },
1242 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1243 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1244 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1249 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1250 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1251 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1252 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1253 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1254 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1255 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1256 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1257 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1258 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1259 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1260 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1265 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1266 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1267 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1268 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1269 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1270 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1271 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1272 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1273 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1274 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1275 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1276 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1293 unsigned NumOperands =
MI.getDesc().getNumOperands();
1294 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1296 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1297 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1298 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1299 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1302 unsigned TOS = getStackEntry(0);
1306 if (Op0 != TOS && Op1 != TOS) {
1313 }
else if (KillsOp1) {
1322 duplicateToTop(Op0, Dest,
I);
1326 }
else if (!KillsOp0 && !KillsOp1) {
1330 duplicateToTop(Op0, Dest,
I);
1337 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1338 "Stack conditions not set up right!");
1343 bool isForward = TOS == Op0;
1344 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1357 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1358 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1361 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1367 if (!
MI.mayRaiseFPException())
1368 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1372 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1373 assert(!updateST0 &&
"Should have updated other operand!");
1379 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1380 assert(UpdatedSlot < StackTop && Dest < 7);
1381 Stack[UpdatedSlot] = Dest;
1382 RegMap[Dest] = UpdatedSlot;
1392 unsigned NumOperands =
MI.getDesc().getNumOperands();
1393 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1394 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1395 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1396 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1397 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1404 MI.getOperand(0).setReg(getSTReg(Op1));
1405 MI.RemoveOperand(1);
1409 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1410 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1422 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1429 MI.RemoveOperand(0);
1430 MI.RemoveOperand(1);
1431 MI.getOperand(0).setReg(getSTReg(Op1));
1435 if (Op0 != Op1 && KillsOp1) {
1437 freeStackSlotAfter(
I, Op1);
1454 if (
MI.isReturn()) {
1459 switch (
MI.getOpcode()) {
1461 case TargetOpcode::COPY: {
1465 bool KillsSrc =
MI.killsRegister(MO1.
getReg());
1470 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1474 unsigned Slot = getSlot(SrcFP);
1476 RegMap[DstFP] =
Slot;
1480 duplicateToTop(SrcFP, DstFP, Inst);
1485 case TargetOpcode::IMPLICIT_DEF: {
1487 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1529 unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1530 unsigned NumOps = 0;
1535 i !=
e &&
MI.getOperand(
i).isImm();
i += 1 + NumOps) {
1536 unsigned Flags =
MI.getOperand(
i).getImm();
1544 unsigned STReg = MO.
getReg() - X86::FP0;
1557 STUses |= (1u << STReg);
1561 STDefs |= (1u << STReg);
1563 STDeadDefs |= (1u << STReg);
1566 STClobbers |= (1u << STReg);
1574 MI.emitError(
"fixed input regs must be last on the x87 stack");
1579 MI.emitError(
"output regs must be last on the x87 stack");
1585 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1586 MI.emitError(
"clobbers must be last on the x87 stack");
1589 unsigned STPopped = STUses & (STDefs | STClobbers);
1591 MI.emitError(
"implicitly popped regs must be last on the x87 stack");
1594 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1595 << NumSTPopped <<
", and defines " << NumSTDefs
1601 for (
unsigned I = 0,
E =
MI.getNumOperands();
I <
E; ++
I)
1604 "Operands with constraint \"f\" cannot overlap with defs");
1610 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1611 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1613 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1620 if (
Op.isUse() &&
Op.isKill())
1621 FPKills |= 1U << FPReg;
1625 FPKills &= ~(STDefs | STClobbers);
1628 unsigned char STUsesArray[8];
1630 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1633 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1635 dbgs() <<
"Before asm: ";
1640 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1642 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1649 Op.setReg(getSTReg(FPReg));
1652 Op.setReg(X86::ST0 + FPReg);
1656 StackTop -= NumSTPopped;
1658 for (
unsigned i = 0;
i < NumSTDefs; ++
i)
1659 pushReg(NumSTDefs -
i - 1);
1671 freeStackSlotAfter(Inst, FPReg);
1672 FPKills &= ~(1U << FPReg);
1696 LPR.addLiveOuts(
MBB);
1700 if (
I->isDebugInstr())
1703 std::bitset<8> Defs;
1707 for (
auto &MO :
I->operands()) {
1718 if (!LPR.contains(MO.
getReg()))
1721 Uses.push_back(&MO);
1724 for (
auto *MO :
Uses)
1728 LPR.stepBackward(
MI);