40#include "llvm/Config/llvm-config.h"
51#define DEBUG_TYPE "x86-codegen"
53STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
54STATISTIC(NumFP ,
"Number of floating point instructions");
57 const unsigned ScratchFPReg = 7;
64 memset(Stack, 0,
sizeof(Stack));
65 memset(RegMap, 0,
sizeof(RegMap));
80 MachineFunctionProperties::Property::NoVRegs);
105 unsigned FixCount = 0;
109 unsigned char FixStack[8];
111 LiveBundle() =
default;
114 bool isFixed()
const {
return !
Mask || FixCount; }
130 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
153 unsigned StackTop = 0;
163 unsigned RegMap[NumFPRegs];
166 void setupBlockStack();
169 void finishBlockStack();
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack()
const {
173 dbgs() <<
"Stack contents:";
174 for (
unsigned i = 0; i != StackTop; ++i) {
176 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
183 unsigned getSlot(
unsigned RegNo)
const {
184 assert(RegNo < NumFPRegs &&
"Regno out of range!");
185 return RegMap[RegNo];
189 bool isLive(
unsigned RegNo)
const {
190 unsigned Slot = getSlot(RegNo);
195 unsigned getStackEntry(
unsigned STi)
const {
198 return Stack[StackTop-1-STi];
203 unsigned getSTReg(
unsigned RegNo)
const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
208 void pushReg(
unsigned Reg) {
209 assert(Reg < NumFPRegs &&
"Register number out of range!");
213 RegMap[
Reg] = StackTop++;
220 RegMap[
Stack[--StackTop]] = ~0;
223 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
226 if (isAtTop(RegNo))
return;
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
235 if (RegMap[RegOnTop] >= StackTop)
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
244 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
247 unsigned STReg = getSTReg(RegNo);
274 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
318 return Reg - X86::FP0;
327 bool FPIsUsed =
false;
329 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
331 for (
unsigned i = 0; i <= 6; ++i)
332 if (!
MRI.reg_nodbg_empty(X86::FP0 + i)) {
338 if (!FPIsUsed)
return false;
340 Bundles = &getAnalysis<EdgeBundles>();
344 bundleCFGRecomputeKillFlags(MF);
354 LiveBundles[Bundles->
getBundle(Entry->getNumber(),
false)];
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
369 Bundle.FixStack[0] = 0;
372 bool Changed =
false;
374 Changed |= processBasicBlock(MF, *BB);
379 if (Processed.
insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
393 assert(LiveBundles.
empty() &&
"Stale data in LiveBundles");
400 const unsigned Mask = calcLiveInMask(&
MBB,
false);
412 bool Changed =
false;
422 if (
MI.isInlineAsm())
425 if (
MI.isCopy() && isFPCopy(
MI))
428 if (
MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
440 PrevMI = &*std::prev(
I);
449 if (MO.isReg() && MO.isDead())
452 switch (FPInstClass) {
465 for (
unsigned Reg : DeadRegs) {
468 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
469 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
470 LLVM_DEBUG(
dbgs() <<
"Register FP#" << Reg - X86::FP0 <<
" is dead!\n");
471 freeStackSlotAfter(
I, Reg-X86::FP0);
479 dbgs() <<
"Just deleted pseudo instruction\n";
483 while (Start != BB.
begin() && std::prev(Start) != PrevI)
485 dbgs() <<
"Inserted instructions:\n\t";
486 Start->print(
dbgs());
487 while (++Start != std::next(
I)) {
504void FPS::setupBlockStack() {
506 <<
" derived from " <<
MBB->
getName() <<
".\n");
509 const LiveBundle &Bundle =
518 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
521 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
523 <<
unsigned(Bundle.FixStack[i - 1]) <<
'\n');
524 pushReg(Bundle.FixStack[i-1]);
530 unsigned Mask = calcLiveInMask(
MBB,
true);
539void FPS::finishBlockStack() {
545 <<
" derived from " <<
MBB->
getName() <<
".\n");
549 LiveBundle &Bundle = LiveBundles[BundleIdx];
554 adjustLiveRegs(Bundle.Mask, Term);
563 if (Bundle.isFixed()) {
565 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
569 Bundle.FixCount = StackTop;
570 for (
unsigned i = 0; i < StackTop; ++i)
571 Bundle.FixStack[i] = getStackEntry(i);
597 if (
I != Table.
end() &&
I->from == Opcode)
603#define ASSERT_SORTED(TABLE)
605#define ASSERT_SORTED(TABLE) \
607 static std::atomic<bool> TABLE##Checked(false); \
608 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
609 assert(is_sorted(TABLE) && \
610 "All lookup tables must be sorted for efficient access!"); \
611 TABLE##Checked.store(true, std::memory_order_relaxed); \
625 { X86::ABS_Fp32 , X86::ABS_F },
626 { X86::ABS_Fp64 , X86::ABS_F },
627 { X86::ABS_Fp80 , X86::ABS_F },
628 { X86::ADD_Fp32m , X86::ADD_F32m },
629 { X86::ADD_Fp64m , X86::ADD_F64m },
630 { X86::ADD_Fp64m32 , X86::ADD_F32m },
631 { X86::ADD_Fp80m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m64 , X86::ADD_F64m },
633 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639 { X86::CHS_Fp32 , X86::CHS_F },
640 { X86::CHS_Fp64 , X86::CHS_F },
641 { X86::CHS_Fp80 , X86::CHS_F },
642 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645 { X86::CMOVB_Fp32 , X86::CMOVB_F },
646 { X86::CMOVB_Fp64 , X86::CMOVB_F },
647 { X86::CMOVB_Fp80 , X86::CMOVB_F },
648 { X86::CMOVE_Fp32 , X86::CMOVE_F },
649 { X86::CMOVE_Fp64 , X86::CMOVE_F },
650 { X86::CMOVE_Fp80 , X86::CMOVE_F },
651 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663 { X86::CMOVP_Fp32 , X86::CMOVP_F },
664 { X86::CMOVP_Fp64 , X86::CMOVP_F },
665 { X86::CMOVP_Fp80 , X86::CMOVP_F },
666 { X86::COM_FpIr32 , X86::COM_FIr },
667 { X86::COM_FpIr64 , X86::COM_FIr },
668 { X86::COM_FpIr80 , X86::COM_FIr },
669 { X86::COM_Fpr32 , X86::COM_FST0r },
670 { X86::COM_Fpr64 , X86::COM_FST0r },
671 { X86::COM_Fpr80 , X86::COM_FST0r },
672 { X86::DIVR_Fp32m , X86::DIVR_F32m },
673 { X86::DIVR_Fp64m , X86::DIVR_F64m },
674 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
675 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
677 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
678 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
680 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
681 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
683 { X86::DIV_Fp32m , X86::DIV_F32m },
684 { X86::DIV_Fp64m , X86::DIV_F64m },
685 { X86::DIV_Fp64m32 , X86::DIV_F32m },
686 { X86::DIV_Fp80m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m64 , X86::DIV_F64m },
688 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
689 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
691 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
692 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
694 { X86::ILD_Fp16m32 , X86::ILD_F16m },
695 { X86::ILD_Fp16m64 , X86::ILD_F16m },
696 { X86::ILD_Fp16m80 , X86::ILD_F16m },
697 { X86::ILD_Fp32m32 , X86::ILD_F32m },
698 { X86::ILD_Fp32m64 , X86::ILD_F32m },
699 { X86::ILD_Fp32m80 , X86::ILD_F32m },
700 { X86::ILD_Fp64m32 , X86::ILD_F64m },
701 { X86::ILD_Fp64m64 , X86::ILD_F64m },
702 { X86::ILD_Fp64m80 , X86::ILD_F64m },
703 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
704 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
707 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
710 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
712 { X86::IST_Fp16m32 , X86::IST_F16m },
713 { X86::IST_Fp16m64 , X86::IST_F16m },
714 { X86::IST_Fp16m80 , X86::IST_F16m },
715 { X86::IST_Fp32m32 , X86::IST_F32m },
716 { X86::IST_Fp32m64 , X86::IST_F32m },
717 { X86::IST_Fp32m80 , X86::IST_F32m },
718 { X86::IST_Fp64m32 , X86::IST_FP64m },
719 { X86::IST_Fp64m64 , X86::IST_FP64m },
720 { X86::IST_Fp64m80 , X86::IST_FP64m },
721 { X86::LD_Fp032 , X86::LD_F0 },
722 { X86::LD_Fp064 , X86::LD_F0 },
723 { X86::LD_Fp080 , X86::LD_F0 },
724 { X86::LD_Fp132 , X86::LD_F1 },
725 { X86::LD_Fp164 , X86::LD_F1 },
726 { X86::LD_Fp180 , X86::LD_F1 },
727 { X86::LD_Fp32m , X86::LD_F32m },
728 { X86::LD_Fp32m64 , X86::LD_F32m },
729 { X86::LD_Fp32m80 , X86::LD_F32m },
730 { X86::LD_Fp64m , X86::LD_F64m },
731 { X86::LD_Fp64m80 , X86::LD_F64m },
732 { X86::LD_Fp80m , X86::LD_F80m },
733 { X86::MUL_Fp32m , X86::MUL_F32m },
734 { X86::MUL_Fp64m , X86::MUL_F64m },
735 { X86::MUL_Fp64m32 , X86::MUL_F32m },
736 { X86::MUL_Fp80m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m64 , X86::MUL_F64m },
738 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
739 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
741 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
742 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
744 { X86::SQRT_Fp32 , X86::SQRT_F },
745 { X86::SQRT_Fp64 , X86::SQRT_F },
746 { X86::SQRT_Fp80 , X86::SQRT_F },
747 { X86::ST_Fp32m , X86::ST_F32m },
748 { X86::ST_Fp64m , X86::ST_F64m },
749 { X86::ST_Fp64m32 , X86::ST_F32m },
750 { X86::ST_Fp80m32 , X86::ST_F32m },
751 { X86::ST_Fp80m64 , X86::ST_F64m },
752 { X86::ST_FpP80m , X86::ST_FP80m },
753 { X86::SUBR_Fp32m , X86::SUBR_F32m },
754 { X86::SUBR_Fp64m , X86::SUBR_F64m },
755 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764 { X86::SUB_Fp32m , X86::SUB_F32m },
765 { X86::SUB_Fp64m , X86::SUB_F64m },
766 { X86::SUB_Fp64m32 , X86::SUB_F32m },
767 { X86::SUB_Fp80m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m64 , X86::SUB_F64m },
769 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775 { X86::TST_Fp32 , X86::TST_F },
776 { X86::TST_Fp64 , X86::TST_F },
777 { X86::TST_Fp80 , X86::TST_F },
778 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
784 { X86::XAM_Fp32 , X86::XAM_F },
785 { X86::XAM_Fp64 , X86::XAM_F },
786 { X86::XAM_Fp80 , X86::XAM_F },
792 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
804 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
806 { X86::COMP_FST0r, X86::FCOMPP },
807 { X86::COM_FIr , X86::COM_FIPr },
808 { X86::COM_FST0r , X86::COMP_FST0r },
810 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
811 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
813 { X86::IST_F16m , X86::IST_FP16m },
814 { X86::IST_F32m , X86::IST_FP32m },
816 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
818 { X86::ST_F32m , X86::ST_FP32m },
819 { X86::ST_F64m , X86::ST_FP64m },
820 { X86::ST_Frr , X86::ST_FPrr },
822 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
823 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
825 { X86::UCOM_FIr , X86::UCOM_FIPr },
827 { X86::UCOM_FPr , X86::UCOM_FPPr },
828 { X86::UCOM_Fr , X86::UCOM_FPr },
865 I->setDesc(
TII->get(Opcode));
866 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
868 MI.dropDebugNumber();
875 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW))
887 if (getStackEntry(0) == FPRegNo) {
895 I = freeStackSlotBefore(++
I, FPRegNo);
902 unsigned STReg = getSTReg(FPRegNo);
903 unsigned OldSlot = getSlot(FPRegNo);
904 unsigned TopReg =
Stack[StackTop-1];
905 Stack[OldSlot] = TopReg;
906 RegMap[TopReg] = OldSlot;
907 RegMap[FPRegNo] = ~0;
908 Stack[--StackTop] = ~0;
917 unsigned Defs =
Mask;
919 for (
unsigned i = 0; i < StackTop; ++i) {
920 unsigned RegNo =
Stack[i];
921 if (!(Defs & (1 << RegNo)))
923 Kills |= (1 << RegNo);
926 Defs &= ~(1 << RegNo);
928 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
931 while (Kills && Defs) {
934 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
936 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
938 Kills &= ~(1 << KReg);
939 Defs &= ~(1 <<
DReg);
946 unsigned KReg = getStackEntry(0);
947 if (!(Kills & (1 << KReg)))
951 Kills &= ~(1 << KReg);
959 freeStackSlotBefore(
I, KReg);
960 Kills &= ~(1 << KReg);
969 Defs &= ~(1 <<
DReg);
980void FPS::shuffleStackTop(
const unsigned char *FixStack,
986 unsigned OldReg = getStackEntry(FixCount);
988 unsigned Reg = FixStack[FixCount];
994 moveToTop(OldReg,
I);
1006 unsigned STReturns = 0;
1008 bool ClobbersFPStack =
false;
1009 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1013 if (
Op.isRegMask()) {
1014 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1016 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1017 for (
unsigned i = 1; i != 8; ++i)
1018 assert(
Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1019 "Inconsistent FP register clobber");
1023 ClobbersFPStack =
true;
1026 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1029 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1035 MI.removeOperand(i);
1043 assert((ClobbersFPStack || STReturns == 0) &&
1044 "ST returns without FP stack clobber");
1045 if (!ClobbersFPStack)
1057 while (StackTop > 0)
1060 for (
unsigned I = 0;
I <
N; ++
I)
1066 I->dropDebugNumber();
1075 unsigned FirstFPRegOp = ~0
U, SecondFPRegOp = ~0
U;
1076 unsigned LiveMask = 0;
1078 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1080 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1087 MI.killsRegister(
Op.getReg())) &&
1088 "Ret only defs operands, and values aren't live beyond it");
1090 if (FirstFPRegOp == ~0U)
1093 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1099 MI.removeOperand(i);
1106 adjustLiveRegs(LiveMask,
MI);
1107 if (!LiveMask)
return;
1113 if (SecondFPRegOp == ~0U) {
1115 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1116 "Top of stack not the right register for RET!");
1128 if (StackTop == 1) {
1129 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1130 "Stack misconfiguration for RET!");
1134 unsigned NewReg = ScratchFPReg;
1135 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1136 FirstFPRegOp = NewReg;
1140 assert(StackTop == 2 &&
"Must have two values live!");
1144 if (getStackEntry(0) == SecondFPRegOp) {
1145 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1146 moveToTop(FirstFPRegOp,
MI);
1151 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1152 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1160 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1163 MI.removeOperand(0);
1171 MI.dropDebugNumber();
1178 unsigned NumOps =
MI.getDesc().getNumOperands();
1180 "Can only handle fst* & ftst instructions!");
1184 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg);
1192 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1193 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1194 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1196 MI.getOpcode() == X86::IST_Fp64m64 ||
1197 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1198 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1200 MI.getOpcode() == X86::IST_Fp64m80 ||
1201 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1202 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1204 MI.getOpcode() == X86::ST_FpP80m)) {
1205 duplicateToTop(Reg, ScratchFPReg,
I);
1211 MI.removeOperand(NumOps - 1);
1216 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1217 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1218 MI.getOpcode() == X86::ST_FP80m) {
1222 }
else if (KillsSrc) {
1226 MI.dropDebugNumber();
1241 unsigned NumOps =
MI.getDesc().getNumOperands();
1242 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1247 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg);
1260 duplicateToTop(Reg,
getFPReg(
MI.getOperand(0)),
I);
1264 MI.removeOperand(1);
1265 MI.removeOperand(0);
1267 MI.dropDebugNumber();
1277 { X86::ADD_Fp32 , X86::ADD_FST0r },
1278 { X86::ADD_Fp64 , X86::ADD_FST0r },
1279 { X86::ADD_Fp80 , X86::ADD_FST0r },
1280 { X86::DIV_Fp32 , X86::DIV_FST0r },
1281 { X86::DIV_Fp64 , X86::DIV_FST0r },
1282 { X86::DIV_Fp80 , X86::DIV_FST0r },
1283 { X86::MUL_Fp32 , X86::MUL_FST0r },
1284 { X86::MUL_Fp64 , X86::MUL_FST0r },
1285 { X86::MUL_Fp80 , X86::MUL_FST0r },
1286 { X86::SUB_Fp32 , X86::SUB_FST0r },
1287 { X86::SUB_Fp64 , X86::SUB_FST0r },
1288 { X86::SUB_Fp80 , X86::SUB_FST0r },
1293 { X86::ADD_Fp32 , X86::ADD_FST0r },
1294 { X86::ADD_Fp64 , X86::ADD_FST0r },
1295 { X86::ADD_Fp80 , X86::ADD_FST0r },
1296 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1297 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1298 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1299 { X86::MUL_Fp32 , X86::MUL_FST0r },
1300 { X86::MUL_Fp64 , X86::MUL_FST0r },
1301 { X86::MUL_Fp80 , X86::MUL_FST0r },
1302 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1303 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1304 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1309 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1310 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1311 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1312 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1313 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1314 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1315 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1316 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1317 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1318 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1319 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1320 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1325 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1326 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1327 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1328 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1329 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1330 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1331 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1332 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1333 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1334 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1335 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1336 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1353 unsigned NumOperands =
MI.getDesc().getNumOperands();
1354 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1356 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1357 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1358 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1359 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1362 unsigned TOS = getStackEntry(0);
1366 if (Op0 != TOS && Op1 != TOS) {
1373 }
else if (KillsOp1) {
1382 duplicateToTop(Op0, Dest,
I);
1386 }
else if (!KillsOp0 && !KillsOp1) {
1390 duplicateToTop(Op0, Dest,
I);
1397 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1398 "Stack conditions not set up right!");
1403 bool isForward = TOS == Op0;
1404 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1417 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1418 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1421 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1427 if (!
MI.mayRaiseFPException())
1428 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1432 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1433 assert(!updateST0 &&
"Should have updated other operand!");
1439 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1440 assert(UpdatedSlot < StackTop && Dest < 7);
1441 Stack[UpdatedSlot] = Dest;
1442 RegMap[Dest] = UpdatedSlot;
1452 unsigned NumOperands =
MI.getDesc().getNumOperands();
1453 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1454 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1455 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1456 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1457 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1464 MI.getOperand(0).setReg(getSTReg(Op1));
1465 MI.removeOperand(1);
1467 MI.dropDebugNumber();
1470 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1471 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1483 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1490 MI.removeOperand(0);
1491 MI.removeOperand(1);
1492 MI.getOperand(0).setReg(getSTReg(Op1));
1494 MI.dropDebugNumber();
1497 if (Op0 != Op1 && KillsOp1) {
1499 freeStackSlotAfter(
I, Op1);
1516 if (
MI.isReturn()) {
1521 switch (
MI.getOpcode()) {
1523 case TargetOpcode::COPY: {
1527 bool KillsSrc =
MI.killsRegister(MO1.
getReg());
1532 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1536 unsigned Slot = getSlot(SrcFP);
1538 RegMap[DstFP] =
Slot;
1542 duplicateToTop(SrcFP, DstFP, Inst);
1547 case TargetOpcode::IMPLICIT_DEF: {
1549 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1550 LLVM_DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1556 case TargetOpcode::INLINEASM:
1557 case TargetOpcode::INLINEASM_BR: {
1591 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1592 unsigned NumOps = 0;
1597 i != e &&
MI.getOperand(i).isImm(); i += 1 + NumOps) {
1598 unsigned Flags =
MI.getOperand(i).getImm();
1601 NumOps =
F.getNumOperandRegisters();
1607 unsigned STReg = MO.
getReg() - X86::FP0;
1613 if (
F.hasRegClassConstraint(RCID)) {
1618 switch (
F.getKind()) {
1619 case InlineAsm::Kind::RegUse:
1620 STUses |= (1u << STReg);
1622 case InlineAsm::Kind::RegDef:
1623 case InlineAsm::Kind::RegDefEarlyClobber:
1624 STDefs |= (1u << STReg);
1626 case InlineAsm::Kind::Clobber:
1627 STClobbers |= (1u << STReg);
1635 MI.emitError(
"fixed input regs must be last on the x87 stack");
1640 MI.emitError(
"output regs must be last on the x87 stack");
1646 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1647 MI.emitError(
"clobbers must be last on the x87 stack");
1650 unsigned STPopped = STUses & (STDefs | STClobbers);
1652 MI.emitError(
"implicitly popped regs must be last on the x87 stack");
1655 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1656 << NumSTPopped <<
", and defines " << NumSTDefs
1662 for (
unsigned I = 0,
E =
MI.getNumOperands();
I <
E; ++
I)
1665 "Operands with constraint \"f\" cannot overlap with defs");
1671 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1673 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1680 if (
Op.isUse() &&
Op.isKill())
1681 FPKills |= 1U << FPReg;
1685 FPKills &= ~(STDefs | STClobbers);
1688 unsigned char STUsesArray[8];
1690 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1693 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1695 dbgs() <<
"Before asm: ";
1700 for (
unsigned i = 0, e =
MI.getNumOperands(); i !=
e; ++i) {
1702 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1707 if (FRegIdx.
count(i))
1709 Op.setReg(getSTReg(FPReg));
1712 Op.setReg(X86::ST0 + FPReg);
1716 StackTop -= NumSTPopped;
1718 for (
unsigned i = 0; i < NumSTDefs; ++i)
1719 pushReg(NumSTDefs - i - 1);
1731 freeStackSlotAfter(Inst, FPReg);
1732 FPKills &= ~(1U << FPReg);
1756 LPR.addLiveOuts(
MBB);
1759 if (
MI.isDebugInstr())
1762 std::bitset<8> Defs;
1765 for (
auto &MO :
MI.operands()) {
1776 if (LPR.available(MO.
getReg()))
1779 Uses.push_back(&MO);
1782 for (
auto *MO :
Uses)
1786 LPR.stepBackward(
MI);
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)