40#include "llvm/Config/llvm-config.h"
51#define DEBUG_TYPE "x86-codegen"
53STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
54STATISTIC(NumFP ,
"Number of floating point instructions");
57 const unsigned ScratchFPReg = 7;
64 memset(Stack, 0,
sizeof(Stack));
65 memset(RegMap, 0,
sizeof(RegMap));
80 MachineFunctionProperties::Property::NoVRegs);
105 unsigned FixCount = 0;
109 unsigned char FixStack[8];
111 LiveBundle() =
default;
114 bool isFixed()
const {
return !
Mask || FixCount; }
130 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
153 unsigned StackTop = 0;
163 unsigned RegMap[NumFPRegs];
166 void setupBlockStack();
169 void finishBlockStack();
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack()
const {
173 dbgs() <<
"Stack contents:";
174 for (
unsigned i = 0; i != StackTop; ++i) {
176 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
183 unsigned getSlot(
unsigned RegNo)
const {
184 assert(RegNo < NumFPRegs &&
"Regno out of range!");
185 return RegMap[RegNo];
189 bool isLive(
unsigned RegNo)
const {
190 unsigned Slot = getSlot(RegNo);
195 unsigned getStackEntry(
unsigned STi)
const {
198 return Stack[StackTop-1-STi];
203 unsigned getSTReg(
unsigned RegNo)
const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
208 void pushReg(
unsigned Reg) {
209 assert(Reg < NumFPRegs &&
"Register number out of range!");
213 RegMap[
Reg] = StackTop++;
220 RegMap[
Stack[--StackTop]] = ~0;
223 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
226 if (isAtTop(RegNo))
return;
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
235 if (RegMap[RegOnTop] >= StackTop)
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
244 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
247 unsigned STReg = getSTReg(RegNo);
274 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
318 return Reg - X86::FP0;
327 bool FPIsUsed =
false;
329 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
331 for (
unsigned i = 0; i <= 6; ++i)
332 if (!
MRI.reg_nodbg_empty(X86::FP0 + i)) {
338 if (!FPIsUsed)
return false;
340 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
344 bundleCFGRecomputeKillFlags(MF);
358 if ((
Entry->getParent()->getFunction().getCallingConv() ==
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
369 Bundle.FixStack[0] = 0;
372 bool Changed =
false;
374 Changed |= processBasicBlock(MF, *BB);
379 if (Processed.
insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
393 assert(LiveBundles.
empty() &&
"Stale data in LiveBundles");
400 const unsigned Mask = calcLiveInMask(&
MBB,
false);
412 bool Changed =
false;
422 if (
MI.isInlineAsm())
425 if (
MI.isCopy() && isFPCopy(
MI))
428 if (
MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
443 if (
MI.isFakeUse()) {
445 if (MO.
isReg() && X86::RFP80RegClass.contains(MO.
getReg())) {
458 PrevMI = &*std::prev(
I);
470 switch (FPInstClass) {
483 for (
unsigned Reg : DeadRegs) {
486 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
487 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
488 LLVM_DEBUG(
dbgs() <<
"Register FP#" << Reg - X86::FP0 <<
" is dead!\n");
489 freeStackSlotAfter(
I, Reg-X86::FP0);
497 dbgs() <<
"Just deleted pseudo instruction\n";
501 while (Start != BB.
begin() && std::prev(Start) != PrevI)
503 dbgs() <<
"Inserted instructions:\n\t";
504 Start->print(
dbgs());
505 while (++Start != std::next(
I)) {
522void FPS::setupBlockStack() {
524 <<
" derived from " <<
MBB->
getName() <<
".\n");
527 const LiveBundle &Bundle =
536 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
539 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
541 <<
unsigned(Bundle.FixStack[i - 1]) <<
'\n');
542 pushReg(Bundle.FixStack[i-1]);
548 unsigned Mask = calcLiveInMask(
MBB,
true);
557void FPS::finishBlockStack() {
563 <<
" derived from " <<
MBB->
getName() <<
".\n");
567 LiveBundle &Bundle = LiveBundles[BundleIdx];
572 adjustLiveRegs(Bundle.Mask, Term);
581 if (Bundle.isFixed()) {
583 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
587 Bundle.FixCount = StackTop;
588 for (
unsigned i = 0; i < StackTop; ++i)
589 Bundle.FixStack[i] = getStackEntry(i);
615 if (
I != Table.
end() &&
I->from == Opcode)
621#define ASSERT_SORTED(TABLE)
623#define ASSERT_SORTED(TABLE) \
625 static std::atomic<bool> TABLE##Checked(false); \
626 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
627 assert(is_sorted(TABLE) && \
628 "All lookup tables must be sorted for efficient access!"); \
629 TABLE##Checked.store(true, std::memory_order_relaxed); \
643 { X86::ABS_Fp32 , X86::ABS_F },
644 { X86::ABS_Fp64 , X86::ABS_F },
645 { X86::ABS_Fp80 , X86::ABS_F },
646 { X86::ADD_Fp32m , X86::ADD_F32m },
647 { X86::ADD_Fp64m , X86::ADD_F64m },
648 { X86::ADD_Fp64m32 , X86::ADD_F32m },
649 { X86::ADD_Fp80m32 , X86::ADD_F32m },
650 { X86::ADD_Fp80m64 , X86::ADD_F64m },
651 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
652 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
653 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
654 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
655 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
656 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
657 { X86::CHS_Fp32 , X86::CHS_F },
658 { X86::CHS_Fp64 , X86::CHS_F },
659 { X86::CHS_Fp80 , X86::CHS_F },
660 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
661 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
662 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
663 { X86::CMOVB_Fp32 , X86::CMOVB_F },
664 { X86::CMOVB_Fp64 , X86::CMOVB_F },
665 { X86::CMOVB_Fp80 , X86::CMOVB_F },
666 { X86::CMOVE_Fp32 , X86::CMOVE_F },
667 { X86::CMOVE_Fp64 , X86::CMOVE_F },
668 { X86::CMOVE_Fp80 , X86::CMOVE_F },
669 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
670 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
671 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
672 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
673 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
674 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
675 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
676 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
677 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
678 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
679 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
680 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
681 { X86::CMOVP_Fp32 , X86::CMOVP_F },
682 { X86::CMOVP_Fp64 , X86::CMOVP_F },
683 { X86::CMOVP_Fp80 , X86::CMOVP_F },
684 { X86::COM_FpIr32 , X86::COM_FIr },
685 { X86::COM_FpIr64 , X86::COM_FIr },
686 { X86::COM_FpIr80 , X86::COM_FIr },
687 { X86::COM_Fpr32 , X86::COM_FST0r },
688 { X86::COM_Fpr64 , X86::COM_FST0r },
689 { X86::COM_Fpr80 , X86::COM_FST0r },
690 { X86::DIVR_Fp32m , X86::DIVR_F32m },
691 { X86::DIVR_Fp64m , X86::DIVR_F64m },
692 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
693 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
694 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
695 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
696 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
697 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
698 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
699 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
700 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
701 { X86::DIV_Fp32m , X86::DIV_F32m },
702 { X86::DIV_Fp64m , X86::DIV_F64m },
703 { X86::DIV_Fp64m32 , X86::DIV_F32m },
704 { X86::DIV_Fp80m32 , X86::DIV_F32m },
705 { X86::DIV_Fp80m64 , X86::DIV_F64m },
706 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
707 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
708 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
709 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
710 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
711 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
712 { X86::ILD_Fp16m32 , X86::ILD_F16m },
713 { X86::ILD_Fp16m64 , X86::ILD_F16m },
714 { X86::ILD_Fp16m80 , X86::ILD_F16m },
715 { X86::ILD_Fp32m32 , X86::ILD_F32m },
716 { X86::ILD_Fp32m64 , X86::ILD_F32m },
717 { X86::ILD_Fp32m80 , X86::ILD_F32m },
718 { X86::ILD_Fp64m32 , X86::ILD_F64m },
719 { X86::ILD_Fp64m64 , X86::ILD_F64m },
720 { X86::ILD_Fp64m80 , X86::ILD_F64m },
721 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
722 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
723 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
724 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
725 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
726 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
727 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
728 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
729 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
730 { X86::IST_Fp16m32 , X86::IST_F16m },
731 { X86::IST_Fp16m64 , X86::IST_F16m },
732 { X86::IST_Fp16m80 , X86::IST_F16m },
733 { X86::IST_Fp32m32 , X86::IST_F32m },
734 { X86::IST_Fp32m64 , X86::IST_F32m },
735 { X86::IST_Fp32m80 , X86::IST_F32m },
736 { X86::IST_Fp64m32 , X86::IST_FP64m },
737 { X86::IST_Fp64m64 , X86::IST_FP64m },
738 { X86::IST_Fp64m80 , X86::IST_FP64m },
739 { X86::LD_Fp032 , X86::LD_F0 },
740 { X86::LD_Fp064 , X86::LD_F0 },
741 { X86::LD_Fp080 , X86::LD_F0 },
742 { X86::LD_Fp132 , X86::LD_F1 },
743 { X86::LD_Fp164 , X86::LD_F1 },
744 { X86::LD_Fp180 , X86::LD_F1 },
745 { X86::LD_Fp32m , X86::LD_F32m },
746 { X86::LD_Fp32m64 , X86::LD_F32m },
747 { X86::LD_Fp32m80 , X86::LD_F32m },
748 { X86::LD_Fp64m , X86::LD_F64m },
749 { X86::LD_Fp64m80 , X86::LD_F64m },
750 { X86::LD_Fp80m , X86::LD_F80m },
751 { X86::MUL_Fp32m , X86::MUL_F32m },
752 { X86::MUL_Fp64m , X86::MUL_F64m },
753 { X86::MUL_Fp64m32 , X86::MUL_F32m },
754 { X86::MUL_Fp80m32 , X86::MUL_F32m },
755 { X86::MUL_Fp80m64 , X86::MUL_F64m },
756 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
757 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
758 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
759 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
760 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
761 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
762 { X86::SQRT_Fp32 , X86::SQRT_F },
763 { X86::SQRT_Fp64 , X86::SQRT_F },
764 { X86::SQRT_Fp80 , X86::SQRT_F },
765 { X86::ST_Fp32m , X86::ST_F32m },
766 { X86::ST_Fp64m , X86::ST_F64m },
767 { X86::ST_Fp64m32 , X86::ST_F32m },
768 { X86::ST_Fp80m32 , X86::ST_F32m },
769 { X86::ST_Fp80m64 , X86::ST_F64m },
770 { X86::ST_FpP80m , X86::ST_FP80m },
771 { X86::SUBR_Fp32m , X86::SUBR_F32m },
772 { X86::SUBR_Fp64m , X86::SUBR_F64m },
773 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
774 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
775 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
776 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
777 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
778 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
779 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
780 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
781 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
782 { X86::SUB_Fp32m , X86::SUB_F32m },
783 { X86::SUB_Fp64m , X86::SUB_F64m },
784 { X86::SUB_Fp64m32 , X86::SUB_F32m },
785 { X86::SUB_Fp80m32 , X86::SUB_F32m },
786 { X86::SUB_Fp80m64 , X86::SUB_F64m },
787 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
788 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
789 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
790 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
791 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
792 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
793 { X86::TST_Fp32 , X86::TST_F },
794 { X86::TST_Fp64 , X86::TST_F },
795 { X86::TST_Fp80 , X86::TST_F },
796 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
797 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
798 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
799 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
800 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
801 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
802 { X86::XAM_Fp32 , X86::XAM_F },
803 { X86::XAM_Fp64 , X86::XAM_F },
804 { X86::XAM_Fp80 , X86::XAM_F },
810 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
822 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
824 { X86::COMP_FST0r, X86::FCOMPP },
825 { X86::COM_FIr , X86::COM_FIPr },
826 { X86::COM_FST0r , X86::COMP_FST0r },
828 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
829 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
831 { X86::IST_F16m , X86::IST_FP16m },
832 { X86::IST_F32m , X86::IST_FP32m },
834 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
836 { X86::ST_F32m , X86::ST_FP32m },
837 { X86::ST_F64m , X86::ST_FP64m },
838 { X86::ST_Frr , X86::ST_FPrr },
840 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
841 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
843 { X86::UCOM_FIr , X86::UCOM_FIPr },
845 { X86::UCOM_FPr , X86::UCOM_FPPr },
846 { X86::UCOM_Fr , X86::UCOM_FPr },
851 MI.findRegisterDefOperand(X86::FPSW,
nullptr))
884 I->setDesc(
TII->get(Opcode));
885 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
887 MI.dropDebugNumber();
894 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW,
nullptr))
906 if (getStackEntry(0) == FPRegNo) {
914 I = freeStackSlotBefore(++
I, FPRegNo);
921 unsigned STReg = getSTReg(FPRegNo);
922 unsigned OldSlot = getSlot(FPRegNo);
923 unsigned TopReg =
Stack[StackTop-1];
924 Stack[OldSlot] = TopReg;
925 RegMap[TopReg] = OldSlot;
926 RegMap[FPRegNo] = ~0;
927 Stack[--StackTop] = ~0;
936 unsigned Defs =
Mask;
938 for (
unsigned i = 0; i < StackTop; ++i) {
939 unsigned RegNo =
Stack[i];
940 if (!(Defs & (1 << RegNo)))
942 Kills |= (1 << RegNo);
945 Defs &= ~(1 << RegNo);
947 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
950 while (Kills && Defs) {
953 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
955 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
957 Kills &= ~(1 << KReg);
958 Defs &= ~(1 <<
DReg);
965 unsigned KReg = getStackEntry(0);
966 if (!(Kills & (1 << KReg)))
970 Kills &= ~(1 << KReg);
978 freeStackSlotBefore(
I, KReg);
979 Kills &= ~(1 << KReg);
988 Defs &= ~(1 <<
DReg);
999void FPS::shuffleStackTop(
const unsigned char *FixStack,
1003 while (FixCount--) {
1005 unsigned OldReg = getStackEntry(FixCount);
1007 unsigned Reg = FixStack[FixCount];
1013 moveToTop(OldReg,
I);
1025 unsigned STReturns = 0;
1027 bool ClobbersFPStack =
false;
1028 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1032 if (
Op.isRegMask()) {
1033 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1035 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1036 for (
unsigned i = 1; i != 8; ++i)
1037 assert(
Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1038 "Inconsistent FP register clobber");
1042 ClobbersFPStack =
true;
1045 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1048 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1054 MI.removeOperand(i);
1062 assert((ClobbersFPStack || STReturns == 0) &&
1063 "ST returns without FP stack clobber");
1064 if (!ClobbersFPStack)
1076 while (StackTop > 0)
1079 for (
unsigned I = 0;
I <
N; ++
I)
1085 I->dropDebugNumber();
1094 unsigned FirstFPRegOp = ~0
U, SecondFPRegOp = ~0
U;
1095 unsigned LiveMask = 0;
1097 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1099 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1106 MI.killsRegister(
Op.getReg(),
1108 "Ret only defs operands, and values aren't live beyond it");
1110 if (FirstFPRegOp == ~0U)
1113 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1119 MI.removeOperand(i);
1126 adjustLiveRegs(LiveMask,
MI);
1127 if (!LiveMask)
return;
1133 if (SecondFPRegOp == ~0U) {
1135 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1136 "Top of stack not the right register for RET!");
1148 if (StackTop == 1) {
1149 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1150 "Stack misconfiguration for RET!");
1154 unsigned NewReg = ScratchFPReg;
1155 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1156 FirstFPRegOp = NewReg;
1160 assert(StackTop == 2 &&
"Must have two values live!");
1164 if (getStackEntry(0) == SecondFPRegOp) {
1165 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1166 moveToTop(FirstFPRegOp,
MI);
1171 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1172 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1180 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1183 MI.removeOperand(0);
1191 MI.dropDebugNumber();
1198 unsigned NumOps =
MI.getDesc().getNumOperands();
1200 "Can only handle fst* & ftst instructions!");
1204 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1212 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1213 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1214 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1215 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1216 MI.getOpcode() == X86::IST_Fp64m64 ||
1217 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1218 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1219 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1220 MI.getOpcode() == X86::IST_Fp64m80 ||
1221 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1222 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1223 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1224 MI.getOpcode() == X86::ST_FpP80m)) {
1225 duplicateToTop(Reg, ScratchFPReg,
I);
1231 MI.removeOperand(NumOps - 1);
1236 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1237 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1238 MI.getOpcode() == X86::ST_FP80m) {
1242 }
else if (KillsSrc) {
1246 MI.dropDebugNumber();
1261 unsigned NumOps =
MI.getDesc().getNumOperands();
1262 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1267 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1280 duplicateToTop(Reg,
getFPReg(
MI.getOperand(0)),
I);
1284 MI.removeOperand(1);
1285 MI.removeOperand(0);
1287 MI.dropDebugNumber();
1297 { X86::ADD_Fp32 , X86::ADD_FST0r },
1298 { X86::ADD_Fp64 , X86::ADD_FST0r },
1299 { X86::ADD_Fp80 , X86::ADD_FST0r },
1300 { X86::DIV_Fp32 , X86::DIV_FST0r },
1301 { X86::DIV_Fp64 , X86::DIV_FST0r },
1302 { X86::DIV_Fp80 , X86::DIV_FST0r },
1303 { X86::MUL_Fp32 , X86::MUL_FST0r },
1304 { X86::MUL_Fp64 , X86::MUL_FST0r },
1305 { X86::MUL_Fp80 , X86::MUL_FST0r },
1306 { X86::SUB_Fp32 , X86::SUB_FST0r },
1307 { X86::SUB_Fp64 , X86::SUB_FST0r },
1308 { X86::SUB_Fp80 , X86::SUB_FST0r },
1313 { X86::ADD_Fp32 , X86::ADD_FST0r },
1314 { X86::ADD_Fp64 , X86::ADD_FST0r },
1315 { X86::ADD_Fp80 , X86::ADD_FST0r },
1316 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1317 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1318 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1319 { X86::MUL_Fp32 , X86::MUL_FST0r },
1320 { X86::MUL_Fp64 , X86::MUL_FST0r },
1321 { X86::MUL_Fp80 , X86::MUL_FST0r },
1322 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1323 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1324 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1329 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1330 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1331 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1332 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1333 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1334 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1335 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1336 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1337 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1338 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1339 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1340 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1345 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1346 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1347 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1348 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1349 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1350 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1351 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1352 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1353 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1354 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1355 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1356 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1373 unsigned NumOperands =
MI.getDesc().getNumOperands();
1374 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1376 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1377 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1378 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1379 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1382 unsigned TOS = getStackEntry(0);
1386 if (Op0 != TOS && Op1 != TOS) {
1393 }
else if (KillsOp1) {
1402 duplicateToTop(Op0, Dest,
I);
1406 }
else if (!KillsOp0 && !KillsOp1) {
1410 duplicateToTop(Op0, Dest,
I);
1417 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1418 "Stack conditions not set up right!");
1423 bool isForward = TOS == Op0;
1424 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1437 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1438 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1441 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1447 if (!
MI.mayRaiseFPException())
1448 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1452 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1453 assert(!updateST0 &&
"Should have updated other operand!");
1459 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1460 assert(UpdatedSlot < StackTop && Dest < 7);
1461 Stack[UpdatedSlot] = Dest;
1462 RegMap[Dest] = UpdatedSlot;
1472 unsigned NumOperands =
MI.getDesc().getNumOperands();
1473 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1474 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1475 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1476 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1477 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1484 MI.getOperand(0).setReg(getSTReg(Op1));
1485 MI.removeOperand(1);
1487 MI.dropDebugNumber();
1490 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1491 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1503 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1510 MI.removeOperand(0);
1511 MI.removeOperand(1);
1512 MI.getOperand(0).setReg(getSTReg(Op1));
1514 MI.dropDebugNumber();
1517 if (Op0 != Op1 && KillsOp1) {
1519 freeStackSlotAfter(
I, Op1);
1536 if (
MI.isReturn()) {
1541 switch (
MI.getOpcode()) {
1543 case TargetOpcode::COPY: {
1547 bool KillsSrc =
MI.killsRegister(MO1.
getReg(),
nullptr);
1552 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1556 unsigned Slot = getSlot(SrcFP);
1558 RegMap[DstFP] =
Slot;
1562 duplicateToTop(SrcFP, DstFP, Inst);
1567 case TargetOpcode::IMPLICIT_DEF: {
1569 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1570 LLVM_DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1576 case TargetOpcode::INLINEASM:
1577 case TargetOpcode::INLINEASM_BR: {
1611 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1612 unsigned NumOps = 0;
1617 i != e &&
MI.getOperand(i).isImm(); i += 1 + NumOps) {
1618 unsigned Flags =
MI.getOperand(i).getImm();
1621 NumOps =
F.getNumOperandRegisters();
1627 unsigned STReg = MO.
getReg() - X86::FP0;
1633 if (
F.hasRegClassConstraint(RCID)) {
1638 switch (
F.getKind()) {
1639 case InlineAsm::Kind::RegUse:
1640 STUses |= (1u << STReg);
1642 case InlineAsm::Kind::RegDef:
1643 case InlineAsm::Kind::RegDefEarlyClobber:
1644 STDefs |= (1u << STReg);
1646 case InlineAsm::Kind::Clobber:
1647 STClobbers |= (1u << STReg);
1655 MI.emitGenericError(
"fixed input regs must be last on the x87 stack");
1660 MI.emitGenericError(
"output regs must be last on the x87 stack");
1666 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1667 MI.emitGenericError(
"clobbers must be last on the x87 stack");
1670 unsigned STPopped = STUses & (STDefs | STClobbers);
1672 MI.emitGenericError(
1673 "implicitly popped regs must be last on the x87 stack");
1676 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1677 << NumSTPopped <<
", and defines " << NumSTDefs
1683 for (
unsigned I = 0, E =
MI.getNumOperands();
I < E; ++
I)
1686 "Operands with constraint \"f\" cannot overlap with defs");
1692 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1694 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1701 if (
Op.isUse() &&
Op.isKill())
1702 FPKills |= 1U <<
FPReg;
1706 FPKills &= ~(STDefs | STClobbers);
1709 unsigned char STUsesArray[8];
1711 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1714 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1716 dbgs() <<
"Before asm: ";
1721 for (
unsigned i = 0, e =
MI.getNumOperands(); i !=
e; ++i) {
1723 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1728 if (FRegIdx.
count(i))
1737 StackTop -= NumSTPopped;
1739 for (
unsigned i = 0; i < NumSTDefs; ++i)
1740 pushReg(NumSTDefs - i - 1);
1752 freeStackSlotAfter(Inst,
FPReg);
1753 FPKills &= ~(1U <<
FPReg);
1764 case TargetOpcode::FAKE_USE: {
1765 assert(
MI.getNumExplicitOperands() == 1 &&
1766 "FAKE_USE must have exactly one operand");
1767 if (
MI.getOperand(0).isKill()) {
1768 freeStackSlotBefore(Inst,
getFPReg(
MI.getOperand(0)));
1770 MI.removeOperand(0);
1791 LPR.addLiveOuts(
MBB);
1794 if (
MI.isDebugInstr())
1797 std::bitset<8> Defs;
1800 for (
auto &MO :
MI.operands()) {
1811 if (LPR.available(MO.
getReg()))
1814 Uses.push_back(&MO);
1817 for (
auto *MO :
Uses)
1821 LPR.stepBackward(
MI);
unsigned const MachineRegisterInfo * MRI
#define LLVM_ATTRIBUTE_UNUSED
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static constexpr Register FPReg
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)