41 #include "llvm/Config/llvm-config.h"
52 #define DEBUG_TYPE "x86-codegen"
54 STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
55 STATISTIC(NumFP ,
"Number of floating point instructions");
58 const unsigned ScratchFPReg = 7;
65 memset(Stack, 0,
sizeof(Stack));
66 memset(RegMap, 0,
sizeof(RegMap));
84 StringRef getPassName()
const override {
return "X86 FP Stackifier"; }
106 unsigned FixCount = 0;
110 unsigned char FixStack[8];
112 LiveBundle() =
default;
115 bool isFixed()
const {
return !
Mask || FixCount; }
131 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
132 if (
Reg >= X86::FP0 &&
Reg <= X86::FP6) {
154 unsigned StackTop = 0;
164 unsigned RegMap[NumFPRegs];
167 void setupBlockStack();
170 void finishBlockStack();
172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
173 void dumpStack()
const {
174 dbgs() <<
"Stack contents:";
175 for (
unsigned i = 0;
i != StackTop; ++
i) {
177 assert(RegMap[Stack[
i]] ==
i &&
"Stack[] doesn't match RegMap[]!");
184 unsigned getSlot(
unsigned RegNo)
const {
185 assert(RegNo < NumFPRegs &&
"Regno out of range!");
186 return RegMap[RegNo];
190 bool isLive(
unsigned RegNo)
const {
191 unsigned Slot = getSlot(RegNo);
196 unsigned getStackEntry(
unsigned STi)
const {
199 return Stack[StackTop-1-STi];
204 unsigned getSTReg(
unsigned RegNo)
const {
205 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
209 void pushReg(
unsigned Reg) {
210 assert(
Reg < NumFPRegs &&
"Register number out of range!");
214 RegMap[
Reg] = StackTop++;
221 RegMap[
Stack[--StackTop]] = ~0;
224 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
227 if (isAtTop(RegNo))
return;
229 unsigned STReg = getSTReg(RegNo);
230 unsigned RegOnTop = getStackEntry(0);
233 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
236 if (RegMap[RegOnTop] >= StackTop)
238 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
245 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
248 unsigned STReg = getSTReg(RegNo);
275 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
295 return X86::RFP80RegClass.contains(DstReg) ||
296 X86::RFP80RegClass.contains(SrcReg);
318 assert(
Reg >= X86::FP0 &&
Reg <= X86::FP6 &&
"Expected FP register!");
319 return Reg - X86::FP0;
328 bool FPIsUsed =
false;
330 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
332 for (
unsigned i = 0;
i <= 6; ++
i)
339 if (!FPIsUsed)
return false;
341 Bundles = &getAnalysis<EdgeBundles>();
345 bundleCFGRecomputeKillFlags(MF);
355 LiveBundles[Bundles->
getBundle(Entry->getNumber(),
false)];
359 if ((Entry->getParent()->getFunction().getCallingConv() ==
367 assert((Bundle.Mask & 0xFE) == 0 &&
368 "Only FP0 could be passed as an argument");
370 Bundle.FixStack[0] = 0;
373 bool Changed =
false;
375 Changed |= processBasicBlock(MF, *
BB);
381 Changed |= processBasicBlock(MF,
BB);
394 assert(LiveBundles.empty() &&
"Stale data in LiveBundles");
401 const unsigned Mask = calcLiveInMask(&
MBB,
false);
413 bool Changed =
false;
423 if (
MI.isInlineAsm())
426 if (
MI.isCopy() && isFPCopy(
MI))
429 if (
MI.isImplicitDef() &&
430 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
441 PrevMI = &*std::prev(
I);
450 if (MO.isReg() && MO.isDead())
451 DeadRegs.push_back(MO.getReg());
453 switch (FPInstClass) {
466 for (
unsigned i = 0,
e = DeadRegs.size();
i !=
e; ++
i) {
467 unsigned Reg = DeadRegs[
i];
470 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
471 if (
Reg >= X86::FP0 &&
Reg <= X86::FP6 && isLive(
Reg-X86::FP0)) {
473 freeStackSlotAfter(
I,
Reg-X86::FP0);
481 dbgs() <<
"Just deleted pseudo instruction\n";
485 while (Start !=
BB.begin() && std::prev(Start) != PrevI)
487 dbgs() <<
"Inserted instructions:\n\t";
488 Start->print(
dbgs());
489 while (++Start != std::next(
I)) {
506 void FPS::setupBlockStack() {
508 <<
" derived from " <<
MBB->
getName() <<
".\n");
511 const LiveBundle &Bundle =
520 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
523 for (
unsigned i = Bundle.FixCount;
i > 0; --
i) {
525 <<
unsigned(Bundle.FixStack[
i - 1]) <<
'\n');
526 pushReg(Bundle.FixStack[
i-1]);
532 unsigned Mask = calcLiveInMask(
MBB,
true);
541 void FPS::finishBlockStack() {
547 <<
" derived from " <<
MBB->
getName() <<
".\n");
551 LiveBundle &Bundle = LiveBundles[BundleIdx];
556 adjustLiveRegs(Bundle.Mask,
Term);
565 if (Bundle.isFixed()) {
567 shuffleStackTop(Bundle.FixStack, Bundle.FixCount,
Term);
571 Bundle.FixCount = StackTop;
572 for (
unsigned i = 0;
i < StackTop; ++
i)
573 Bundle.FixStack[
i] = getStackEntry(
i);
586 bool operator<(
const TableEntry &TE)
const {
return from <
TE.from; }
587 friend bool operator<(
const TableEntry &TE,
unsigned V) {
591 const TableEntry &TE) {
599 if (
I != Table.
end() &&
I->from == Opcode)
605 #define ASSERT_SORTED(TABLE)
607 #define ASSERT_SORTED(TABLE) \
609 static std::atomic<bool> TABLE##Checked(false); \
610 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
611 assert(is_sorted(TABLE) && \
612 "All lookup tables must be sorted for efficient access!"); \
613 TABLE##Checked.store(true, std::memory_order_relaxed); \
627 { X86::ABS_Fp32 , X86::ABS_F },
628 { X86::ABS_Fp64 , X86::ABS_F },
629 { X86::ABS_Fp80 , X86::ABS_F },
630 { X86::ADD_Fp32m , X86::ADD_F32m },
631 { X86::ADD_Fp64m , X86::ADD_F64m },
632 { X86::ADD_Fp64m32 , X86::ADD_F32m },
633 { X86::ADD_Fp80m32 , X86::ADD_F32m },
634 { X86::ADD_Fp80m64 , X86::ADD_F64m },
635 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
636 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
637 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
638 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
639 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
640 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
641 { X86::CHS_Fp32 , X86::CHS_F },
642 { X86::CHS_Fp64 , X86::CHS_F },
643 { X86::CHS_Fp80 , X86::CHS_F },
644 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
645 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
646 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
647 { X86::CMOVB_Fp32 , X86::CMOVB_F },
648 { X86::CMOVB_Fp64 , X86::CMOVB_F },
649 { X86::CMOVB_Fp80 , X86::CMOVB_F },
650 { X86::CMOVE_Fp32 , X86::CMOVE_F },
651 { X86::CMOVE_Fp64 , X86::CMOVE_F },
652 { X86::CMOVE_Fp80 , X86::CMOVE_F },
653 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
654 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
655 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
656 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
657 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
658 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
659 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
660 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
661 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
662 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
663 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
664 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
665 { X86::CMOVP_Fp32 , X86::CMOVP_F },
666 { X86::CMOVP_Fp64 , X86::CMOVP_F },
667 { X86::CMOVP_Fp80 , X86::CMOVP_F },
668 { X86::COM_FpIr32 , X86::COM_FIr },
669 { X86::COM_FpIr64 , X86::COM_FIr },
670 { X86::COM_FpIr80 , X86::COM_FIr },
671 { X86::COM_Fpr32 , X86::COM_FST0r },
672 { X86::COM_Fpr64 , X86::COM_FST0r },
673 { X86::COM_Fpr80 , X86::COM_FST0r },
674 { X86::DIVR_Fp32m , X86::DIVR_F32m },
675 { X86::DIVR_Fp64m , X86::DIVR_F64m },
676 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
677 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
678 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
679 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
680 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
681 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
682 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
683 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
684 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
685 { X86::DIV_Fp32m , X86::DIV_F32m },
686 { X86::DIV_Fp64m , X86::DIV_F64m },
687 { X86::DIV_Fp64m32 , X86::DIV_F32m },
688 { X86::DIV_Fp80m32 , X86::DIV_F32m },
689 { X86::DIV_Fp80m64 , X86::DIV_F64m },
690 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
691 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
692 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
693 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
694 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
695 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
696 { X86::ILD_Fp16m32 , X86::ILD_F16m },
697 { X86::ILD_Fp16m64 , X86::ILD_F16m },
698 { X86::ILD_Fp16m80 , X86::ILD_F16m },
699 { X86::ILD_Fp32m32 , X86::ILD_F32m },
700 { X86::ILD_Fp32m64 , X86::ILD_F32m },
701 { X86::ILD_Fp32m80 , X86::ILD_F32m },
702 { X86::ILD_Fp64m32 , X86::ILD_F64m },
703 { X86::ILD_Fp64m64 , X86::ILD_F64m },
704 { X86::ILD_Fp64m80 , X86::ILD_F64m },
705 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
707 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
708 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
710 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
711 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
712 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
713 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
714 { X86::IST_Fp16m32 , X86::IST_F16m },
715 { X86::IST_Fp16m64 , X86::IST_F16m },
716 { X86::IST_Fp16m80 , X86::IST_F16m },
717 { X86::IST_Fp32m32 , X86::IST_F32m },
718 { X86::IST_Fp32m64 , X86::IST_F32m },
719 { X86::IST_Fp32m80 , X86::IST_F32m },
720 { X86::IST_Fp64m32 , X86::IST_FP64m },
721 { X86::IST_Fp64m64 , X86::IST_FP64m },
722 { X86::IST_Fp64m80 , X86::IST_FP64m },
723 { X86::LD_Fp032 , X86::LD_F0 },
724 { X86::LD_Fp064 , X86::LD_F0 },
725 { X86::LD_Fp080 , X86::LD_F0 },
726 { X86::LD_Fp132 , X86::LD_F1 },
727 { X86::LD_Fp164 , X86::LD_F1 },
728 { X86::LD_Fp180 , X86::LD_F1 },
729 { X86::LD_Fp32m , X86::LD_F32m },
730 { X86::LD_Fp32m64 , X86::LD_F32m },
731 { X86::LD_Fp32m80 , X86::LD_F32m },
732 { X86::LD_Fp64m , X86::LD_F64m },
733 { X86::LD_Fp64m80 , X86::LD_F64m },
734 { X86::LD_Fp80m , X86::LD_F80m },
735 { X86::MUL_Fp32m , X86::MUL_F32m },
736 { X86::MUL_Fp64m , X86::MUL_F64m },
737 { X86::MUL_Fp64m32 , X86::MUL_F32m },
738 { X86::MUL_Fp80m32 , X86::MUL_F32m },
739 { X86::MUL_Fp80m64 , X86::MUL_F64m },
740 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
741 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
742 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
743 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
744 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
745 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
746 { X86::SQRT_Fp32 , X86::SQRT_F },
747 { X86::SQRT_Fp64 , X86::SQRT_F },
748 { X86::SQRT_Fp80 , X86::SQRT_F },
749 { X86::ST_Fp32m , X86::ST_F32m },
750 { X86::ST_Fp64m , X86::ST_F64m },
751 { X86::ST_Fp64m32 , X86::ST_F32m },
752 { X86::ST_Fp80m32 , X86::ST_F32m },
753 { X86::ST_Fp80m64 , X86::ST_F64m },
754 { X86::ST_FpP80m , X86::ST_FP80m },
755 { X86::SUBR_Fp32m , X86::SUBR_F32m },
756 { X86::SUBR_Fp64m , X86::SUBR_F64m },
757 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
758 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
759 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
760 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
761 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
762 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
763 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
764 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
765 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
766 { X86::SUB_Fp32m , X86::SUB_F32m },
767 { X86::SUB_Fp64m , X86::SUB_F64m },
768 { X86::SUB_Fp64m32 , X86::SUB_F32m },
769 { X86::SUB_Fp80m32 , X86::SUB_F32m },
770 { X86::SUB_Fp80m64 , X86::SUB_F64m },
771 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
772 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
773 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
774 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
775 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
776 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
777 { X86::TST_Fp32 , X86::TST_F },
778 { X86::TST_Fp64 , X86::TST_F },
779 { X86::TST_Fp80 , X86::TST_F },
780 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
781 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
782 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
783 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
784 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
785 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
786 { X86::XAM_Fp32 , X86::XAM_F },
787 { X86::XAM_Fp64 , X86::XAM_F },
788 { X86::XAM_Fp80 , X86::XAM_F },
794 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
806 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
808 { X86::COMP_FST0r, X86::FCOMPP },
809 { X86::COM_FIr , X86::COM_FIPr },
810 { X86::COM_FST0r , X86::COMP_FST0r },
812 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
813 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
815 { X86::IST_F16m , X86::IST_FP16m },
816 { X86::IST_F32m , X86::IST_FP32m },
818 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
820 { X86::ST_F32m , X86::ST_FP32m },
821 { X86::ST_F64m , X86::ST_FP64m },
822 { X86::ST_Frr , X86::ST_FPrr },
824 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
825 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
827 { X86::UCOM_FIr , X86::UCOM_FIPr },
829 { X86::UCOM_FPr , X86::UCOM_FPPr },
830 { X86::UCOM_Fr , X86::UCOM_FPr },
867 I->setDesc(
TII->get(Opcode));
868 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
870 MI.dropDebugNumber();
877 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW))
889 if (getStackEntry(0) == FPRegNo) {
897 I = freeStackSlotBefore(++
I, FPRegNo);
904 unsigned STReg = getSTReg(FPRegNo);
905 unsigned OldSlot = getSlot(FPRegNo);
906 unsigned TopReg =
Stack[StackTop-1];
907 Stack[OldSlot] = TopReg;
908 RegMap[TopReg] = OldSlot;
909 RegMap[FPRegNo] = ~0;
910 Stack[--StackTop] = ~0;
919 unsigned Defs =
Mask;
921 for (
unsigned i = 0;
i < StackTop; ++
i) {
922 unsigned RegNo =
Stack[
i];
923 if (!(Defs & (1 << RegNo)))
925 Kills |= (1 << RegNo);
928 Defs &= ~(1 << RegNo);
930 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
933 while (Kills && Defs) {
940 Kills &= ~(1 << KReg);
941 Defs &= ~(1 <<
DReg);
948 unsigned KReg = getStackEntry(0);
949 if (!(Kills & (1 << KReg)))
953 Kills &= ~(1 << KReg);
961 freeStackSlotBefore(
I, KReg);
962 Kills &= ~(1 << KReg);
971 Defs &= ~(1 <<
DReg);
982 void FPS::shuffleStackTop(
const unsigned char *FixStack,
988 unsigned OldReg = getStackEntry(FixCount);
990 unsigned Reg = FixStack[FixCount];
996 moveToTop(OldReg,
I);
1008 unsigned STReturns = 0;
1010 bool ClobbersFPStack =
false;
1011 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1015 if (
Op.isRegMask()) {
1016 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1018 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1019 for (
unsigned i = 1;
i != 8; ++
i)
1020 assert(
Op.clobbersPhysReg(X86::FP0 +
i) == ClobbersFP0 &&
1021 "Inconsistent FP register clobber");
1025 ClobbersFPStack =
true;
1028 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1031 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1037 MI.removeOperand(
i);
1045 assert((ClobbersFPStack || STReturns == 0) &&
1046 "ST returns without FP stack clobber");
1047 if (!ClobbersFPStack)
1059 while (StackTop > 0)
1062 for (
unsigned I = 0;
I <
N; ++
I)
1068 I->dropDebugNumber();
1077 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1078 unsigned LiveMask = 0;
1080 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1082 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1089 MI.killsRegister(
Op.getReg())) &&
1090 "Ret only defs operands, and values aren't live beyond it");
1092 if (FirstFPRegOp == ~0U)
1095 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1101 MI.removeOperand(
i);
1108 adjustLiveRegs(LiveMask,
MI);
1109 if (!LiveMask)
return;
1115 if (SecondFPRegOp == ~0U) {
1117 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1118 "Top of stack not the right register for RET!");
1130 if (StackTop == 1) {
1131 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1132 "Stack misconfiguration for RET!");
1136 unsigned NewReg = ScratchFPReg;
1137 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1138 FirstFPRegOp = NewReg;
1142 assert(StackTop == 2 &&
"Must have two values live!");
1146 if (getStackEntry(0) == SecondFPRegOp) {
1147 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1148 moveToTop(FirstFPRegOp,
MI);
1153 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1154 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1162 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1165 MI.removeOperand(0);
1173 MI.dropDebugNumber();
1180 unsigned NumOps =
MI.getDesc().getNumOperands();
1182 "Can only handle fst* & ftst instructions!");
1186 bool KillsSrc =
MI.killsRegister(X86::FP0 +
Reg);
1194 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1197 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1198 MI.getOpcode() == X86::IST_Fp64m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1201 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1202 MI.getOpcode() == X86::IST_Fp64m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1205 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1206 MI.getOpcode() == X86::ST_FpP80m)) {
1207 duplicateToTop(
Reg, ScratchFPReg,
I);
1213 MI.removeOperand(NumOps - 1);
1218 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1219 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1220 MI.getOpcode() == X86::ST_FP80m) {
1224 }
else if (KillsSrc) {
1228 MI.dropDebugNumber();
1243 unsigned NumOps =
MI.getDesc().getNumOperands();
1244 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1249 bool KillsSrc =
MI.killsRegister(X86::FP0 +
Reg);
1266 MI.removeOperand(1);
1267 MI.removeOperand(0);
1269 MI.dropDebugNumber();
1279 { X86::ADD_Fp32 , X86::ADD_FST0r },
1280 { X86::ADD_Fp64 , X86::ADD_FST0r },
1281 { X86::ADD_Fp80 , X86::ADD_FST0r },
1282 { X86::DIV_Fp32 , X86::DIV_FST0r },
1283 { X86::DIV_Fp64 , X86::DIV_FST0r },
1284 { X86::DIV_Fp80 , X86::DIV_FST0r },
1285 { X86::MUL_Fp32 , X86::MUL_FST0r },
1286 { X86::MUL_Fp64 , X86::MUL_FST0r },
1287 { X86::MUL_Fp80 , X86::MUL_FST0r },
1288 { X86::SUB_Fp32 , X86::SUB_FST0r },
1289 { X86::SUB_Fp64 , X86::SUB_FST0r },
1290 { X86::SUB_Fp80 , X86::SUB_FST0r },
1295 { X86::ADD_Fp32 , X86::ADD_FST0r },
1296 { X86::ADD_Fp64 , X86::ADD_FST0r },
1297 { X86::ADD_Fp80 , X86::ADD_FST0r },
1298 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1300 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1301 { X86::MUL_Fp32 , X86::MUL_FST0r },
1302 { X86::MUL_Fp64 , X86::MUL_FST0r },
1303 { X86::MUL_Fp80 , X86::MUL_FST0r },
1304 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1306 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1311 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1312 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1313 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1314 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1316 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1317 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1318 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1319 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1320 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1322 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1327 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1329 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1330 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1332 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1333 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1335 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1336 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1338 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1355 unsigned NumOperands =
MI.getDesc().getNumOperands();
1356 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1358 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1359 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1360 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1361 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1364 unsigned TOS = getStackEntry(0);
1368 if (Op0 != TOS && Op1 != TOS) {
1375 }
else if (KillsOp1) {
1384 duplicateToTop(Op0, Dest,
I);
1388 }
else if (!KillsOp0 && !KillsOp1) {
1392 duplicateToTop(Op0, Dest,
I);
1399 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1400 "Stack conditions not set up right!");
1405 bool isForward = TOS == Op0;
1406 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1419 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1420 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1423 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1429 if (!
MI.mayRaiseFPException())
1430 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1434 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1435 assert(!updateST0 &&
"Should have updated other operand!");
1441 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1442 assert(UpdatedSlot < StackTop && Dest < 7);
1443 Stack[UpdatedSlot] = Dest;
1444 RegMap[Dest] = UpdatedSlot;
1454 unsigned NumOperands =
MI.getDesc().getNumOperands();
1455 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1456 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1457 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1458 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1459 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1466 MI.getOperand(0).setReg(getSTReg(Op1));
1467 MI.removeOperand(1);
1469 MI.dropDebugNumber();
1472 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1473 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1485 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1492 MI.removeOperand(0);
1493 MI.removeOperand(1);
1494 MI.getOperand(0).setReg(getSTReg(Op1));
1496 MI.dropDebugNumber();
1499 if (Op0 != Op1 && KillsOp1) {
1501 freeStackSlotAfter(
I, Op1);
1518 if (
MI.isReturn()) {
1523 switch (
MI.getOpcode()) {
1525 case TargetOpcode::COPY: {
1529 bool KillsSrc =
MI.killsRegister(MO1.
getReg());
1534 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1538 unsigned Slot = getSlot(SrcFP);
1540 RegMap[DstFP] =
Slot;
1544 duplicateToTop(SrcFP, DstFP, Inst);
1549 case TargetOpcode::IMPLICIT_DEF: {
1551 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1593 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1594 unsigned NumOps = 0;
1599 i !=
e &&
MI.getOperand(
i).isImm();
i += 1 + NumOps) {
1600 unsigned Flags =
MI.getOperand(
i).getImm();
1608 unsigned STReg = MO.
getReg() - X86::FP0;
1621 STUses |= (1u << STReg);
1625 STDefs |= (1u << STReg);
1628 STClobbers |= (1u << STReg);
1636 MI.emitError(
"fixed input regs must be last on the x87 stack");
1641 MI.emitError(
"output regs must be last on the x87 stack");
1647 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1648 MI.emitError(
"clobbers must be last on the x87 stack");
1651 unsigned STPopped = STUses & (STDefs | STClobbers);
1653 MI.emitError(
"implicitly popped regs must be last on the x87 stack");
1656 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1657 << NumSTPopped <<
", and defines " << NumSTDefs
1663 for (
unsigned I = 0,
E =
MI.getNumOperands();
I <
E; ++
I)
1666 "Operands with constraint \"f\" cannot overlap with defs");
1672 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1674 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1681 if (
Op.isUse() &&
Op.isKill())
1682 FPKills |= 1U << FPReg;
1686 FPKills &= ~(STDefs | STClobbers);
1689 unsigned char STUsesArray[8];
1691 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1694 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1696 dbgs() <<
"Before asm: ";
1701 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
1703 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1710 Op.setReg(getSTReg(FPReg));
1713 Op.setReg(X86::ST0 + FPReg);
1717 StackTop -= NumSTPopped;
1719 for (
unsigned i = 0;
i < NumSTDefs; ++
i)
1720 pushReg(NumSTDefs -
i - 1);
1732 freeStackSlotAfter(Inst, FPReg);
1733 FPKills &= ~(1U << FPReg);
1757 LPR.addLiveOuts(
MBB);
1760 if (
MI.isDebugInstr())
1763 std::bitset<8> Defs;
1766 for (
auto &MO :
MI.operands()) {
1777 if (!LPR.contains(MO.
getReg()))
1780 Uses.push_back(&MO);
1783 for (
auto *MO :
Uses)
1787 LPR.stepBackward(
MI);