71#define DEBUG_TYPE "x86-isel"
74 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
76 "Sets the preferable loop alignment for experiments (as log2 bytes) "
77 "for innermost loops only. If specified, this option overrides "
78 "alignment set by x86-experimental-pref-loop-alignment."),
82 "x86-br-merging-base-cost",
cl::init(2),
84 "Sets the cost threshold for when multiple conditionals will be merged "
85 "into one branch versus be split in multiple branches. Merging "
86 "conditionals saves branches at the cost of additional instructions. "
87 "This value sets the instruction cost limit, below which conditionals "
88 "will be merged, and above which conditionals will be split. Set to -1 "
89 "to never merge branches."),
93 "x86-br-merging-ccmp-bias",
cl::init(6),
94 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that the target "
95 "supports conditional compare instructions."),
99 "x86-br-merging-likely-bias",
cl::init(0),
100 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that it is likely "
101 "that all conditionals will be executed. For example for merging "
102 "the conditionals (a == b && c > d), if its known that a == b is "
103 "likely, then it is likely that if the conditionals are split "
104 "both sides will be executed, so it may be desirable to increase "
105 "the instruction cost threshold. Set to -1 to never merge likely "
110 "x86-br-merging-unlikely-bias",
cl::init(-1),
112 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
113 "that all conditionals will be executed. For example for merging "
114 "the conditionals (a == b && c > d), if its known that a == b is "
115 "unlikely, then it is unlikely that if the conditionals are split "
116 "both sides will be executed, so it may be desirable to decrease "
117 "the instruction cost threshold. Set to -1 to never merge unlikely "
122 "mul-constant-optimization",
cl::init(
true),
123 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
130 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
147 if (Subtarget.isAtom())
149 else if (Subtarget.is64Bit())
158 if (Subtarget.hasSlowDivide32())
160 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
166 static const struct {
168 const char *
const Name;
178 for (
const auto &LC : LibraryCalls) {
199 if (Subtarget.is64Bit())
216 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
225 if (Subtarget.is64Bit())
234 if (Subtarget.is64Bit())
242 if (Subtarget.is64Bit())
253 if (Subtarget.is64Bit())
257 if (!Subtarget.useSoftFloat()) {
321 if (!Subtarget.is64Bit()) {
330 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
334 if (Subtarget.is64Bit()) {
348 if (Subtarget.is64Bit()) {
353 }
else if (!Subtarget.is64Bit())
366 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
377 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
378 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
382 if (Subtarget.is64Bit())
393 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
411 if (!Subtarget.hasBMI()) {
414 if (Subtarget.is64Bit()) {
421 if (Subtarget.hasLZCNT()) {
427 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
428 if (VT == MVT::i64 && !Subtarget.is64Bit())
442 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
449 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
454 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
467 if (Subtarget.is64Bit())
469 if (Subtarget.hasPOPCNT()) {
483 if (!Subtarget.hasMOVBE())
487 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
493 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
494 if (VT == MVT::i64 && !Subtarget.is64Bit())
514 for (
auto VT : { MVT::i32, MVT::i64 }) {
515 if (VT == MVT::i64 && !Subtarget.is64Bit())
526 for (
auto VT : { MVT::i32, MVT::i64 }) {
527 if (VT == MVT::i64 && !Subtarget.is64Bit())
540 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
550 if (!Subtarget.is64Bit())
553 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
586 bool Is64Bit = Subtarget.is64Bit();
637 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
641 : &X86::FR16RegClass);
643 : &X86::FR32RegClass);
645 : &X86::FR64RegClass);
653 for (
auto VT : { MVT::f32, MVT::f64 }) {
674 setF16Action(MVT::f16,
Promote);
720 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
721 (UseX87 || Is64Bit)) {
759 for (
auto VT : { MVT::f32, MVT::f64 }) {
772 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
773 addLegalFPImmediate(
APFloat(+0.0f));
774 addLegalFPImmediate(
APFloat(+1.0f));
775 addLegalFPImmediate(
APFloat(-0.0f));
776 addLegalFPImmediate(
APFloat(-1.0f));
778 addLegalFPImmediate(
APFloat(+0.0f));
783 addLegalFPImmediate(
APFloat(+0.0));
784 addLegalFPImmediate(
APFloat(+1.0));
785 addLegalFPImmediate(
APFloat(-0.0));
786 addLegalFPImmediate(
APFloat(-1.0));
788 addLegalFPImmediate(
APFloat(+0.0));
819 addLegalFPImmediate(TmpFlt);
821 addLegalFPImmediate(TmpFlt);
827 addLegalFPImmediate(TmpFlt2);
829 addLegalFPImmediate(TmpFlt2);
876 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
878 : &X86::VR128RegClass);
954 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
955 MVT::v4f32, MVT::v8f32, MVT::v16f32,
956 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
1039 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1044 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
1046 : &X86::VR128RegClass);
1070 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1072 : &X86::VR128RegClass);
1077 : &X86::VR128RegClass);
1079 : &X86::VR128RegClass);
1081 : &X86::VR128RegClass);
1083 : &X86::VR128RegClass);
1085 : &X86::VR128RegClass);
1087 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1092 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1093 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1127 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1150 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1170 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1178 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1183 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1189 setF16Action(MVT::v8f16,
Expand);
1214 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1288 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1292 if (VT == MVT::v2i64)
continue;
1306 if (Subtarget.hasGFNI()) {
1313 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1318 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1330 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1331 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1371 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1386 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1398 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1402 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1403 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1404 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1410 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1414 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1418 : &X86::VR256RegClass);
1420 : &X86::VR256RegClass);
1422 : &X86::VR256RegClass);
1424 : &X86::VR256RegClass);
1426 : &X86::VR256RegClass);
1428 : &X86::VR256RegClass);
1430 : &X86::VR256RegClass);
1432 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1493 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1499 if (VT == MVT::v4i64)
continue;
1520 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1531 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1551 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1552 MVT::v2f64, MVT::v4f64 }) {
1558 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1599 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1607 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1629 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1630 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1637 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1638 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1643 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1644 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1655 setF16Action(MVT::v16f16,
Expand);
1671 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1672 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1677 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1678 Subtarget.hasF16C()) {
1679 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1683 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1696 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1721 if (!Subtarget.hasDQI()) {
1734 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1740 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1743 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1756 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1759 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1760 for (
MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1769 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1770 bool HasBWI = Subtarget.hasBWI();
1790 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1803 if (Subtarget.hasDQI())
1806 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1813 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1850 if (!Subtarget.hasVLX()) {
1851 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1852 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1878 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1895 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1922 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1946 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
1955 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
1976 if (Subtarget.hasDQI()) {
1984 if (Subtarget.hasCDI()) {
1986 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
1991 if (Subtarget.hasVPOPCNTDQ()) {
1992 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
1999 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
2000 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2003 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2004 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2015 setF16Action(MVT::v32f16,
Expand);
2023 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2030 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2039 if (Subtarget.hasVBMI2()) {
2040 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2054 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2055 for (
auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2065 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
2073 if (Subtarget.hasDQI()) {
2078 "Unexpected operation action!");
2086 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2094 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2103 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2104 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2107 if (Subtarget.hasDQI()) {
2118 if (Subtarget.hasCDI()) {
2119 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2124 if (Subtarget.hasVPOPCNTDQ()) {
2125 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2132 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2136 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2149 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2157 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2166 if (Subtarget.hasBITALG()) {
2167 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2172 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2173 auto setGroup = [&] (
MVT VT) {
2240 setGroup(MVT::v32f16);
2279 if (Subtarget.hasVLX()) {
2280 setGroup(MVT::v8f16);
2281 setGroup(MVT::v16f16);
2324 if (!Subtarget.useSoftFloat() &&
2325 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2327 : &X86::VR128RegClass);
2329 : &X86::VR256RegClass);
2335 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2336 setF16Action(VT,
Expand);
2350 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
2352 setF16Action(MVT::v32bf16,
Expand);
2362 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2375 if (Subtarget.hasBWI()) {
2380 if (Subtarget.hasFP16()) {
2412 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2420 if (!Subtarget.is64Bit()) {
2430 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2431 if (VT == MVT::i64 && !Subtarget.is64Bit())
2475 if (Subtarget.is32Bit() &&
2612 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2619 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2620 !Subtarget.hasBWI())
2645 bool AssumeSingleUse) {
2646 if (!AssumeSingleUse && !
Op.hasOneUse())
2652 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2653 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2654 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2665 bool AssumeSingleUse) {
2666 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2672 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2673 return !Ld->isVolatile() ||
2682 if (
Op.hasOneUse()) {
2683 unsigned Opcode =
Op.getNode()->use_begin()->getOpcode();
2696 default:
return false;
2737 default:
return false;
2758 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2760 if (ReturnAddrIndex == 0) {
2773 bool HasSymbolicDisplacement) {
2780 if (!HasSymbolicDisplacement)
2798 return Offset < 16 * 1024 * 1024;
2822 switch (SetCCOpcode) {
2847 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2852 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
2856 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
2860 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
2875 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
2879 switch (SetCCOpcode) {
2895 switch (SetCCOpcode) {
2949 unsigned Intrinsic)
const {
2955 switch (Intrinsic) {
2956 case Intrinsic::x86_aesenc128kl:
2957 case Intrinsic::x86_aesdec128kl:
2959 Info.ptrVal =
I.getArgOperand(1);
2964 case Intrinsic::x86_aesenc256kl:
2965 case Intrinsic::x86_aesdec256kl:
2967 Info.ptrVal =
I.getArgOperand(1);
2972 case Intrinsic::x86_aesencwide128kl:
2973 case Intrinsic::x86_aesdecwide128kl:
2975 Info.ptrVal =
I.getArgOperand(0);
2980 case Intrinsic::x86_aesencwide256kl:
2981 case Intrinsic::x86_aesdecwide256kl:
2983 Info.ptrVal =
I.getArgOperand(0);
2988 case Intrinsic::x86_cmpccxadd32:
2989 case Intrinsic::x86_cmpccxadd64:
2990 case Intrinsic::x86_atomic_bts:
2991 case Intrinsic::x86_atomic_btc:
2992 case Intrinsic::x86_atomic_btr: {
2994 Info.ptrVal =
I.getArgOperand(0);
2995 unsigned Size =
I.getType()->getScalarSizeInBits();
3002 case Intrinsic::x86_atomic_bts_rm:
3003 case Intrinsic::x86_atomic_btc_rm:
3004 case Intrinsic::x86_atomic_btr_rm: {
3006 Info.ptrVal =
I.getArgOperand(0);
3007 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3014 case Intrinsic::x86_aadd32:
3015 case Intrinsic::x86_aadd64:
3016 case Intrinsic::x86_aand32:
3017 case Intrinsic::x86_aand64:
3018 case Intrinsic::x86_aor32:
3019 case Intrinsic::x86_aor64:
3020 case Intrinsic::x86_axor32:
3021 case Intrinsic::x86_axor64:
3022 case Intrinsic::x86_atomic_add_cc:
3023 case Intrinsic::x86_atomic_sub_cc:
3024 case Intrinsic::x86_atomic_or_cc:
3025 case Intrinsic::x86_atomic_and_cc:
3026 case Intrinsic::x86_atomic_xor_cc: {
3028 Info.ptrVal =
I.getArgOperand(0);
3029 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3040 switch (IntrData->
Type) {
3045 Info.ptrVal =
I.getArgOperand(0);
3051 ScalarVT = MVT::i16;
3053 ScalarVT = MVT::i32;
3063 Info.ptrVal =
nullptr;
3075 Info.ptrVal =
nullptr;
3096 bool ForCodeSize)
const {
3097 for (
const APFloat &FPImm : LegalFPImmediates)
3098 if (Imm.bitwiseIsEqual(FPImm))
3106 assert(cast<LoadSDNode>(Load)->
isSimple() &&
"illegal to narrow");
3110 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3112 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3118 EVT VT = Load->getValueType(0);
3120 for (
auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
3122 if (UI.getUse().getResNo() != 0)
3144 if (BitSize == 0 || BitSize > 64)