40#include "llvm/Config/llvm-config.h"
51#define DEBUG_TYPE "x86-codegen"
53STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
54STATISTIC(NumFP ,
"Number of floating point instructions");
57 const unsigned ScratchFPReg = 7;
64 memset(Stack, 0,
sizeof(Stack));
65 memset(RegMap, 0,
sizeof(RegMap));
80 MachineFunctionProperties::Property::NoVRegs);
105 unsigned FixCount = 0;
109 unsigned char FixStack[8];
111 LiveBundle() =
default;
114 bool isFixed()
const {
return !
Mask || FixCount; }
130 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
153 unsigned StackTop = 0;
163 unsigned RegMap[NumFPRegs];
166 void setupBlockStack();
169 void finishBlockStack();
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack()
const {
173 dbgs() <<
"Stack contents:";
174 for (
unsigned i = 0; i != StackTop; ++i) {
176 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
183 unsigned getSlot(
unsigned RegNo)
const {
184 assert(RegNo < NumFPRegs &&
"Regno out of range!");
185 return RegMap[RegNo];
189 bool isLive(
unsigned RegNo)
const {
190 unsigned Slot = getSlot(RegNo);
195 unsigned getStackEntry(
unsigned STi)
const {
198 return Stack[StackTop-1-STi];
203 unsigned getSTReg(
unsigned RegNo)
const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
208 void pushReg(
unsigned Reg) {
209 assert(Reg < NumFPRegs &&
"Register number out of range!");
213 RegMap[
Reg] = StackTop++;
220 RegMap[
Stack[--StackTop]] = ~0;
223 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
226 if (isAtTop(RegNo))
return;
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
235 if (RegMap[RegOnTop] >= StackTop)
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
244 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
247 unsigned STReg = getSTReg(RegNo);
274 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
318 return Reg - X86::FP0;
327 bool FPIsUsed =
false;
329 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
331 for (
unsigned i = 0; i <= 6; ++i)
332 if (!
MRI.reg_nodbg_empty(X86::FP0 + i)) {
338 if (!FPIsUsed)
return false;
340 Bundles = &getAnalysis<EdgeBundles>();
344 bundleCFGRecomputeKillFlags(MF);
358 if ((
Entry->getParent()->getFunction().getCallingConv() ==
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
369 Bundle.FixStack[0] = 0;
372 bool Changed =
false;
374 Changed |= processBasicBlock(MF, *BB);
379 if (Processed.
insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
393 assert(LiveBundles.
empty() &&
"Stale data in LiveBundles");
400 const unsigned Mask = calcLiveInMask(&
MBB,
false);
412 bool Changed =
false;
422 if (
MI.isInlineAsm())
425 if (
MI.isCopy() && isFPCopy(
MI))
428 if (
MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
440 PrevMI = &*std::prev(
I);
449 if (MO.isReg() && MO.isDead())
452 switch (FPInstClass) {
465 for (
unsigned Reg : DeadRegs) {
468 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
469 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
470 LLVM_DEBUG(
dbgs() <<
"Register FP#" << Reg - X86::FP0 <<
" is dead!\n");
471 freeStackSlotAfter(
I, Reg-X86::FP0);
479 dbgs() <<
"Just deleted pseudo instruction\n";
483 while (Start != BB.
begin() && std::prev(Start) != PrevI)
485 dbgs() <<
"Inserted instructions:\n\t";
486 Start->print(
dbgs());
487 while (++Start != std::next(
I)) {
504void FPS::setupBlockStack() {
506 <<
" derived from " <<
MBB->
getName() <<
".\n");
509 const LiveBundle &Bundle =
518 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
521 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
523 <<
unsigned(Bundle.FixStack[i - 1]) <<
'\n');
524 pushReg(Bundle.FixStack[i-1]);
530 unsigned Mask = calcLiveInMask(
MBB,
true);
539void FPS::finishBlockStack() {
545 <<
" derived from " <<
MBB->
getName() <<
".\n");
549 LiveBundle &Bundle = LiveBundles[BundleIdx];
554 adjustLiveRegs(Bundle.Mask, Term);
563 if (Bundle.isFixed()) {
565 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
569 Bundle.FixCount = StackTop;
570 for (
unsigned i = 0; i < StackTop; ++i)
571 Bundle.FixStack[i] = getStackEntry(i);
597 if (
I != Table.
end() &&
I->from == Opcode)
603#define ASSERT_SORTED(TABLE)
605#define ASSERT_SORTED(TABLE) \
607 static std::atomic<bool> TABLE##Checked(false); \
608 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
609 assert(is_sorted(TABLE) && \
610 "All lookup tables must be sorted for efficient access!"); \
611 TABLE##Checked.store(true, std::memory_order_relaxed); \
625 { X86::ABS_Fp32 , X86::ABS_F },
626 { X86::ABS_Fp64 , X86::ABS_F },
627 { X86::ABS_Fp80 , X86::ABS_F },
628 { X86::ADD_Fp32m , X86::ADD_F32m },
629 { X86::ADD_Fp64m , X86::ADD_F64m },
630 { X86::ADD_Fp64m32 , X86::ADD_F32m },
631 { X86::ADD_Fp80m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m64 , X86::ADD_F64m },
633 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639 { X86::CHS_Fp32 , X86::CHS_F },
640 { X86::CHS_Fp64 , X86::CHS_F },
641 { X86::CHS_Fp80 , X86::CHS_F },
642 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645 { X86::CMOVB_Fp32 , X86::CMOVB_F },
646 { X86::CMOVB_Fp64 , X86::CMOVB_F },
647 { X86::CMOVB_Fp80 , X86::CMOVB_F },
648 { X86::CMOVE_Fp32 , X86::CMOVE_F },
649 { X86::CMOVE_Fp64 , X86::CMOVE_F },
650 { X86::CMOVE_Fp80 , X86::CMOVE_F },
651 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663 { X86::CMOVP_Fp32 , X86::CMOVP_F },
664 { X86::CMOVP_Fp64 , X86::CMOVP_F },
665 { X86::CMOVP_Fp80 , X86::CMOVP_F },
666 { X86::COM_FpIr32 , X86::COM_FIr },
667 { X86::COM_FpIr64 , X86::COM_FIr },
668 { X86::COM_FpIr80 , X86::COM_FIr },
669 { X86::COM_Fpr32 , X86::COM_FST0r },
670 { X86::COM_Fpr64 , X86::COM_FST0r },
671 { X86::COM_Fpr80 , X86::COM_FST0r },
672 { X86::DIVR_Fp32m , X86::DIVR_F32m },
673 { X86::DIVR_Fp64m , X86::DIVR_F64m },
674 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
675 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
677 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
678 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
680 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
681 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
683 { X86::DIV_Fp32m , X86::DIV_F32m },
684 { X86::DIV_Fp64m , X86::DIV_F64m },
685 { X86::DIV_Fp64m32 , X86::DIV_F32m },
686 { X86::DIV_Fp80m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m64 , X86::DIV_F64m },
688 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
689 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
691 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
692 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
694 { X86::ILD_Fp16m32 , X86::ILD_F16m },
695 { X86::ILD_Fp16m64 , X86::ILD_F16m },
696 { X86::ILD_Fp16m80 , X86::ILD_F16m },
697 { X86::ILD_Fp32m32 , X86::ILD_F32m },
698 { X86::ILD_Fp32m64 , X86::ILD_F32m },
699 { X86::ILD_Fp32m80 , X86::ILD_F32m },
700 { X86::ILD_Fp64m32 , X86::ILD_F64m },
701 { X86::ILD_Fp64m64 , X86::ILD_F64m },
702 { X86::ILD_Fp64m80 , X86::ILD_F64m },
703 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
704 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
707 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
710 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
712 { X86::IST_Fp16m32 , X86::IST_F16m },
713 { X86::IST_Fp16m64 , X86::IST_F16m },
714 { X86::IST_Fp16m80 , X86::IST_F16m },
715 { X86::IST_Fp32m32 , X86::IST_F32m },
716 { X86::IST_Fp32m64 , X86::IST_F32m },
717 { X86::IST_Fp32m80 , X86::IST_F32m },
718 { X86::IST_Fp64m32 , X86::IST_FP64m },
719 { X86::IST_Fp64m64 , X86::IST_FP64m },
720 { X86::IST_Fp64m80 , X86::IST_FP64m },
721 { X86::LD_Fp032 , X86::LD_F0 },
722 { X86::LD_Fp064 , X86::LD_F0 },
723 { X86::LD_Fp080 , X86::LD_F0 },
724 { X86::LD_Fp132 , X86::LD_F1 },
725 { X86::LD_Fp164 , X86::LD_F1 },
726 { X86::LD_Fp180 , X86::LD_F1 },
727 { X86::LD_Fp32m , X86::LD_F32m },
728 { X86::LD_Fp32m64 , X86::LD_F32m },
729 { X86::LD_Fp32m80 , X86::LD_F32m },
730 { X86::LD_Fp64m , X86::LD_F64m },
731 { X86::LD_Fp64m80 , X86::LD_F64m },
732 { X86::LD_Fp80m , X86::LD_F80m },
733 { X86::MUL_Fp32m , X86::MUL_F32m },
734 { X86::MUL_Fp64m , X86::MUL_F64m },
735 { X86::MUL_Fp64m32 , X86::MUL_F32m },
736 { X86::MUL_Fp80m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m64 , X86::MUL_F64m },
738 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
739 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
741 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
742 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
744 { X86::SQRT_Fp32 , X86::SQRT_F },
745 { X86::SQRT_Fp64 , X86::SQRT_F },
746 { X86::SQRT_Fp80 , X86::SQRT_F },
747 { X86::ST_Fp32m , X86::ST_F32m },
748 { X86::ST_Fp64m , X86::ST_F64m },
749 { X86::ST_Fp64m32 , X86::ST_F32m },
750 { X86::ST_Fp80m32 , X86::ST_F32m },
751 { X86::ST_Fp80m64 , X86::ST_F64m },
752 { X86::ST_FpP80m , X86::ST_FP80m },
753 { X86::SUBR_Fp32m , X86::SUBR_F32m },
754 { X86::SUBR_Fp64m , X86::SUBR_F64m },
755 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764 { X86::SUB_Fp32m , X86::SUB_F32m },
765 { X86::SUB_Fp64m , X86::SUB_F64m },
766 { X86::SUB_Fp64m32 , X86::SUB_F32m },
767 { X86::SUB_Fp80m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m64 , X86::SUB_F64m },
769 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775 { X86::TST_Fp32 , X86::TST_F },
776 { X86::TST_Fp64 , X86::TST_F },
777 { X86::TST_Fp80 , X86::TST_F },
778 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
784 { X86::XAM_Fp32 , X86::XAM_F },
785 { X86::XAM_Fp64 , X86::XAM_F },
786 { X86::XAM_Fp80 , X86::XAM_F },
792 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
804 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
806 { X86::COMP_FST0r, X86::FCOMPP },
807 { X86::COM_FIr , X86::COM_FIPr },
808 { X86::COM_FST0r , X86::COMP_FST0r },
810 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
811 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
813 { X86::IST_F16m , X86::IST_FP16m },
814 { X86::IST_F32m , X86::IST_FP32m },
816 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
818 { X86::ST_F32m , X86::ST_FP32m },
819 { X86::ST_F64m , X86::ST_FP64m },
820 { X86::ST_Frr , X86::ST_FPrr },
822 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
823 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
825 { X86::UCOM_FIr , X86::UCOM_FIPr },
827 { X86::UCOM_FPr , X86::UCOM_FPPr },
828 { X86::UCOM_Fr , X86::UCOM_FPr },
833 MI.findRegisterDefOperand(X86::FPSW,
nullptr))
866 I->setDesc(
TII->get(Opcode));
867 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
869 MI.dropDebugNumber();
876 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW,
nullptr))
888 if (getStackEntry(0) == FPRegNo) {
896 I = freeStackSlotBefore(++
I, FPRegNo);
903 unsigned STReg = getSTReg(FPRegNo);
904 unsigned OldSlot = getSlot(FPRegNo);
905 unsigned TopReg =
Stack[StackTop-1];
906 Stack[OldSlot] = TopReg;
907 RegMap[TopReg] = OldSlot;
908 RegMap[FPRegNo] = ~0;
909 Stack[--StackTop] = ~0;
918 unsigned Defs =
Mask;
920 for (
unsigned i = 0; i < StackTop; ++i) {
921 unsigned RegNo =
Stack[i];
922 if (!(Defs & (1 << RegNo)))
924 Kills |= (1 << RegNo);
927 Defs &= ~(1 << RegNo);
929 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
932 while (Kills && Defs) {
935 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
937 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
939 Kills &= ~(1 << KReg);
940 Defs &= ~(1 <<
DReg);
947 unsigned KReg = getStackEntry(0);
948 if (!(Kills & (1 << KReg)))
952 Kills &= ~(1 << KReg);
960 freeStackSlotBefore(
I, KReg);
961 Kills &= ~(1 << KReg);
970 Defs &= ~(1 <<
DReg);
981void FPS::shuffleStackTop(
const unsigned char *FixStack,
987 unsigned OldReg = getStackEntry(FixCount);
989 unsigned Reg = FixStack[FixCount];
995 moveToTop(OldReg,
I);
1007 unsigned STReturns = 0;
1009 bool ClobbersFPStack =
false;
1010 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1014 if (
Op.isRegMask()) {
1015 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1017 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1018 for (
unsigned i = 1; i != 8; ++i)
1019 assert(
Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1020 "Inconsistent FP register clobber");
1024 ClobbersFPStack =
true;
1027 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1030 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1036 MI.removeOperand(i);
1044 assert((ClobbersFPStack || STReturns == 0) &&
1045 "ST returns without FP stack clobber");
1046 if (!ClobbersFPStack)
1058 while (StackTop > 0)
1061 for (
unsigned I = 0;
I <
N; ++
I)
1067 I->dropDebugNumber();
1076 unsigned FirstFPRegOp = ~0
U, SecondFPRegOp = ~0
U;
1077 unsigned LiveMask = 0;
1079 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1081 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1088 MI.killsRegister(
Op.getReg(),
1090 "Ret only defs operands, and values aren't live beyond it");
1092 if (FirstFPRegOp == ~0U)
1095 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1101 MI.removeOperand(i);
1108 adjustLiveRegs(LiveMask,
MI);
1109 if (!LiveMask)
return;
1115 if (SecondFPRegOp == ~0U) {
1117 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1118 "Top of stack not the right register for RET!");
1130 if (StackTop == 1) {
1131 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1132 "Stack misconfiguration for RET!");
1136 unsigned NewReg = ScratchFPReg;
1137 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1138 FirstFPRegOp = NewReg;
1142 assert(StackTop == 2 &&
"Must have two values live!");
1146 if (getStackEntry(0) == SecondFPRegOp) {
1147 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1148 moveToTop(FirstFPRegOp,
MI);
1153 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1154 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1162 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1165 MI.removeOperand(0);
1173 MI.dropDebugNumber();
1180 unsigned NumOps =
MI.getDesc().getNumOperands();
1182 "Can only handle fst* & ftst instructions!");
1186 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1194 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1197 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1198 MI.getOpcode() == X86::IST_Fp64m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1201 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1202 MI.getOpcode() == X86::IST_Fp64m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1205 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1206 MI.getOpcode() == X86::ST_FpP80m)) {
1207 duplicateToTop(Reg, ScratchFPReg,
I);
1213 MI.removeOperand(NumOps - 1);
1218 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1219 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1220 MI.getOpcode() == X86::ST_FP80m) {
1224 }
else if (KillsSrc) {
1228 MI.dropDebugNumber();
1243 unsigned NumOps =
MI.getDesc().getNumOperands();
1244 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1249 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1262 duplicateToTop(Reg,
getFPReg(
MI.getOperand(0)),
I);
1266 MI.removeOperand(1);
1267 MI.removeOperand(0);
1269 MI.dropDebugNumber();
1279 { X86::ADD_Fp32 , X86::ADD_FST0r },
1280 { X86::ADD_Fp64 , X86::ADD_FST0r },
1281 { X86::ADD_Fp80 , X86::ADD_FST0r },
1282 { X86::DIV_Fp32 , X86::DIV_FST0r },
1283 { X86::DIV_Fp64 , X86::DIV_FST0r },
1284 { X86::DIV_Fp80 , X86::DIV_FST0r },
1285 { X86::MUL_Fp32 , X86::MUL_FST0r },
1286 { X86::MUL_Fp64 , X86::MUL_FST0r },
1287 { X86::MUL_Fp80 , X86::MUL_FST0r },
1288 { X86::SUB_Fp32 , X86::SUB_FST0r },
1289 { X86::SUB_Fp64 , X86::SUB_FST0r },
1290 { X86::SUB_Fp80 , X86::SUB_FST0r },
1295 { X86::ADD_Fp32 , X86::ADD_FST0r },
1296 { X86::ADD_Fp64 , X86::ADD_FST0r },
1297 { X86::ADD_Fp80 , X86::ADD_FST0r },
1298 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1300 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1301 { X86::MUL_Fp32 , X86::MUL_FST0r },
1302 { X86::MUL_Fp64 , X86::MUL_FST0r },
1303 { X86::MUL_Fp80 , X86::MUL_FST0r },
1304 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1306 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1311 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1312 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1313 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1314 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1316 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1317 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1318 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1319 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1320 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1322 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1327 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1329 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1330 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1332 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1333 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1335 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1336 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1338 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1355 unsigned NumOperands =
MI.getDesc().getNumOperands();
1356 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1358 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1359 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1360 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1361 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1364 unsigned TOS = getStackEntry(0);
1368 if (Op0 != TOS && Op1 != TOS) {
1375 }
else if (KillsOp1) {
1384 duplicateToTop(Op0, Dest,
I);
1388 }
else if (!KillsOp0 && !KillsOp1) {
1392 duplicateToTop(Op0, Dest,
I);
1399 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1400 "Stack conditions not set up right!");
1405 bool isForward = TOS == Op0;
1406 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1419 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1420 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1423 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1429 if (!
MI.mayRaiseFPException())
1430 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1434 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1435 assert(!updateST0 &&
"Should have updated other operand!");
1441 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1442 assert(UpdatedSlot < StackTop && Dest < 7);
1443 Stack[UpdatedSlot] = Dest;
1444 RegMap[Dest] = UpdatedSlot;
1454 unsigned NumOperands =
MI.getDesc().getNumOperands();
1455 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1456 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1457 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1458 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1459 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1466 MI.getOperand(0).setReg(getSTReg(Op1));
1467 MI.removeOperand(1);
1469 MI.dropDebugNumber();
1472 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1473 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1485 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1492 MI.removeOperand(0);
1493 MI.removeOperand(1);
1494 MI.getOperand(0).setReg(getSTReg(Op1));
1496 MI.dropDebugNumber();
1499 if (Op0 != Op1 && KillsOp1) {
1501 freeStackSlotAfter(
I, Op1);
1518 if (
MI.isReturn()) {
1523 switch (
MI.getOpcode()) {
1525 case TargetOpcode::COPY: {
1529 bool KillsSrc =
MI.killsRegister(MO1.
getReg(),
nullptr);
1534 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1538 unsigned Slot = getSlot(SrcFP);
1540 RegMap[DstFP] =
Slot;
1544 duplicateToTop(SrcFP, DstFP, Inst);
1549 case TargetOpcode::IMPLICIT_DEF: {
1551 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1552 LLVM_DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1558 case TargetOpcode::INLINEASM:
1559 case TargetOpcode::INLINEASM_BR: {
1593 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1594 unsigned NumOps = 0;
1599 i != e &&
MI.getOperand(i).isImm(); i += 1 + NumOps) {
1600 unsigned Flags =
MI.getOperand(i).getImm();
1603 NumOps =
F.getNumOperandRegisters();
1609 unsigned STReg = MO.
getReg() - X86::FP0;
1615 if (
F.hasRegClassConstraint(RCID)) {
1620 switch (
F.getKind()) {
1621 case InlineAsm::Kind::RegUse:
1622 STUses |= (1u << STReg);
1624 case InlineAsm::Kind::RegDef:
1625 case InlineAsm::Kind::RegDefEarlyClobber:
1626 STDefs |= (1u << STReg);
1628 case InlineAsm::Kind::Clobber:
1629 STClobbers |= (1u << STReg);
1637 MI.emitError(
"fixed input regs must be last on the x87 stack");
1642 MI.emitError(
"output regs must be last on the x87 stack");
1648 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1649 MI.emitError(
"clobbers must be last on the x87 stack");
1652 unsigned STPopped = STUses & (STDefs | STClobbers);
1654 MI.emitError(
"implicitly popped regs must be last on the x87 stack");
1657 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1658 << NumSTPopped <<
", and defines " << NumSTDefs
1664 for (
unsigned I = 0, E =
MI.getNumOperands();
I < E; ++
I)
1667 "Operands with constraint \"f\" cannot overlap with defs");
1673 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1675 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1682 if (
Op.isUse() &&
Op.isKill())
1683 FPKills |= 1U << FPReg;
1687 FPKills &= ~(STDefs | STClobbers);
1690 unsigned char STUsesArray[8];
1692 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1695 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1697 dbgs() <<
"Before asm: ";
1702 for (
unsigned i = 0, e =
MI.getNumOperands(); i !=
e; ++i) {
1704 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1709 if (FRegIdx.
count(i))
1711 Op.setReg(getSTReg(FPReg));
1714 Op.setReg(X86::ST0 + FPReg);
1718 StackTop -= NumSTPopped;
1720 for (
unsigned i = 0; i < NumSTDefs; ++i)
1721 pushReg(NumSTDefs - i - 1);
1733 freeStackSlotAfter(Inst, FPReg);
1734 FPKills &= ~(1U << FPReg);
1758 LPR.addLiveOuts(
MBB);
1761 if (
MI.isDebugInstr())
1764 std::bitset<8> Defs;
1767 for (
auto &MO :
MI.operands()) {
1778 if (LPR.available(MO.
getReg()))
1781 Uses.push_back(&MO);
1784 for (
auto *MO :
Uses)
1788 LPR.stepBackward(
MI);
unsigned const MachineRegisterInfo * MRI
#define LLVM_ATTRIBUTE_UNUSED
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)