106 std::initializer_list<LLT> PackedVectorAllTypeList = {
112 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
116 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
119 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
126 const bool HasFP16 = ST.hasFullFP16();
127 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
129 const bool HasCSSC = ST.hasCSSC();
130 const bool HasRCPC3 = ST.hasRCPC3();
131 const bool HasSVE = ST.hasSVE();
134 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
135 .legalFor({p0, s8, s16, s32, s64})
136 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
138 .widenScalarToNextPow2(0)
151 .legalFor(PackedVectorAllTypeList)
165 .widenScalarToNextPow2(0)
170 .maxScalarIf(
typeInSet(0, {s64, p0}), 1, s32);
175 .widenScalarToNextPow2(1)
180 .maxScalarIf(
typeInSet(1, {s64, p0}), 0, s32)
181 .maxScalarIf(
typeInSet(1, {s128}), 0, s64);
184 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
185 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
186 .widenScalarToNextPow2(0)
194 return Query.
Types[0].getNumElements() <= 2;
199 return Query.
Types[0].getNumElements() <= 4;
204 return Query.
Types[0].getNumElements() <= 16;
211 .
legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
212 .widenScalarToNextPow2(0)
220 return Query.
Types[0].getNumElements() <= 2;
225 return Query.
Types[0].getNumElements() <= 4;
230 return Query.
Types[0].getNumElements() <= 16;
238 const auto &SrcTy = Query.
Types[0];
239 const auto &AmtTy = Query.
Types[1];
240 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
241 AmtTy.getSizeInBits() == 32;
255 .widenScalarToNextPow2(0)
269 .
legalFor({{p0, i64}, {v2p0, v2i64}})
270 .clampScalarOrElt(1, s64, s64)
276 .legalFor({i32, i64})
278 .clampScalar(0, s32, s64)
283 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
292 .widenScalarToNextPow2(0, 32)
297 .legalFor({i64, v16i8, v8i16, v4i32})
303 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
304 .legalFor(HasCSSC, {i32, i64})
305 .minScalar(HasCSSC, 0, s32)
314 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
318 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
322 [=](
const LegalityQuery &Query) {
return std::make_pair(0, v4i16); })
325 [=](
const LegalityQuery &Query) {
return std::make_pair(0, v2i32); })
326 .clampNumElements(0, v8s8, v16s8)
334 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
335 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
339 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
340 .legalFor({{i32, i32}, {i64, i32}})
341 .clampScalar(0, s32, s64)
346 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
352 return Q.
Types[0].isScalar() && Q.
Types[1].getScalarSizeInBits() < 64;
358 .customFor({{s32, s32}, {s64, s64}});
362 .
legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
363 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
364 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
365 .customFor({{s128, s128},
371 .clampScalar(0, s32, s128)
384 .legalFor({{i32, i32},
392 .widenScalarToNextPow2(1, 32)
410 .customFor(!HasCSSC, {s32, s64});
416 .widenScalarToNextPow2(0, 32)
428 .
legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
437 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
438 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
439 .clampNumElements(0, v8s8, v16s8)
448 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
449 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
450 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
451 .legalFor({f32, f64, v2f32, v4f32, v2f64})
452 .legalFor(HasFP16, {f16, v4f16, v8f16})
457 return (!HasFP16 && Q.
Types[0].getScalarType().isFloat16()) ||
458 Q.
Types[0].getScalarType().isBFloat16();
461 .clampNumElements(0, v4s16, v8s16)
467 .legalFor({f32, f64, v2f32, v4f32, v2f64})
468 .legalFor(HasFP16, {f16, v4f16, v8f16})
483 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
484 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
485 G_FSINH, G_FTANH, G_FMODF})
494 .
libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
497 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
498 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
503 .legalFor({{i64, f32}, {i64, f64}})
504 .legalFor(HasFP16, {{i64, f16}})
522 for (
unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
525 if (
Op == G_SEXTLOAD)
530 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
538 {v2s32, p0, s64, 8}})
539 .widenScalarToNextPow2(0)
540 .clampScalar(0, s32, s64)
543 .unsupportedIfMemSizeNotPow2()
555 return HasRCPC3 && Query.
Types[0] == s128 &&
559 return Query.
Types[0] == s128 &&
562 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
569 {v16s8, p0, s128, 8},
571 {v8s16, p0, s128, 8},
573 {v4s32, p0, s128, 8},
574 {v2s64, p0, s128, 8}})
576 .legalForTypesWithMemDesc(
577 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
578 .legalForTypesWithMemDesc({
580 {nxv16s8, p0, nxv16s8, 8},
581 {nxv8s16, p0, nxv8s16, 8},
582 {nxv4s32, p0, nxv4s32, 8},
583 {nxv2s64, p0, nxv2s64, 8},
585 .widenScalarToNextPow2(0, 8)
596 return Query.
Types[0].isScalar() &&
598 Query.
Types[0].getSizeInBits() > 32;
607 .customIf(IsPtrVecPred)
613 return HasRCPC3 && Query.
Types[0] == s128 &&
617 return Query.
Types[0] == s128 &&
625 {{s8, p0, s8, 8}, {s16, p0, s8, 8},
628 {s16, p0, s16, 8}, {s32, p0, s16, 8},
630 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
631 {s64, p0, s64, 8}, {s64, p0, s32, 8},
632 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
633 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
634 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
635 .legalForTypesWithMemDesc({
640 {nxv16s8, p0, nxv16s8, 8},
641 {nxv8s16, p0, nxv8s16, 8},
642 {nxv4s32, p0, nxv4s32, 8},
643 {nxv2s64, p0, nxv2s64, 8},
645 .clampScalar(0, s8, s64)
648 return Query.
Types[0].isScalar() &&
652 .clampMaxNumElements(0, s8, 16)
661 return Query.
Types[0].getSizeInBits() ==
662 Query.
MMODescrs[0].MemoryTy.getSizeInBits();
668 .customIf(IsPtrVecPred)
686 {p0, v16s8, v16s8, 8},
687 {p0, v4s16, v4s16, 8},
688 {p0, v8s16, v8s16, 8},
689 {p0, v2s32, v2s32, 8},
690 {p0, v4s32, v4s32, 8},
691 {p0, v2s64, v2s64, 8},
697 auto IndexedLoadBasicPred = [=](
const LegalityQuery &Query) {
725 return MemTy == s8 || MemTy == s16;
727 return MemTy == s8 || MemTy == s16 || MemTy == s32;
735 .widenScalarToNextPow2(0)
739 .clampScalar(0, MinFPScalar, s128);
743 .
legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
753 return Ty.isVector() && !SrcTy.isPointerVector() &&
754 Ty.getElementType() != SrcTy.getElementType();
762 return Query.
Types[1].isPointerVector();
779 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
784 return (!HasFP16 && Q.
Types[1].getScalarType().isFloat16()) ||
785 Q.
Types[1].getScalarType().isBFloat16();
793 return Ty.isVector() && !SrcTy.isPointerVector() &&
794 Ty.getElementType() != SrcTy.getElementType();
797 .clampNumElements(1, v4s16, v8s16)
805 unsigned DstSize = Query.
Types[0].getSizeInBits();
808 if (Query.
Types[0].isVector())
811 if (DstSize < 8 || DstSize >= 128 || !
isPowerOf2_32(DstSize))
819 unsigned SrcSize = SrcTy.getSizeInBits();
826 .legalIf(ExtLegalFunc)
827 .
legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
828 .clampScalar(0, s64, s64)
835 return (Query.
Types[0].getScalarSizeInBits() >
836 Query.
Types[1].getScalarSizeInBits() * 2) &&
837 Query.
Types[0].isVector() &&
838 (Query.
Types[1].getScalarSizeInBits() == 8 ||
839 Query.
Types[1].getScalarSizeInBits() == 16);
841 .clampMinNumElements(1, s8, 8)
846 .
legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
857 return DstTy.
isVector() && SrcTy.getSizeInBits() > 128 &&
860 .clampMinNumElements(0, s8, 8)
865 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
866 .clampNumElements(0, v2s32, v2s32);
869 .
legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
880 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
881 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
882 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
890 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
892 .clampNumElements(1, v4s32, v4s32)
896 getActionDefinitionsBuilder(G_FPEXT)
904 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
915 .clampNumElements(0, v4s32, v4s32)
920 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
921 .legalFor({{i32, f32},
929 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
936 return Query.
Types[1] == f16 && Query.
Types[0].getSizeInBits() > 64;
945 return Query.
Types[0].getScalarSizeInBits() <= 64 &&
946 Query.
Types[0].getScalarSizeInBits() >
947 Query.
Types[1].getScalarSizeInBits();
952 return Query.
Types[1].getScalarSizeInBits() <= 64 &&
953 Query.
Types[0].getScalarSizeInBits() <
954 Query.
Types[1].getScalarSizeInBits();
957 .clampNumElements(0, v4s16, v8s16)
961 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
963 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
964 .legalFor({{i32, f32},
973 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
981 return Query.
Types[1] == f16 && Query.
Types[0].getSizeInBits() > 64;
991 unsigned ITySize = Query.
Types[0].getScalarSizeInBits();
992 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
993 ITySize > Query.
Types[1].getScalarSizeInBits();
998 unsigned FTySize = Query.
Types[1].getScalarSizeInBits();
999 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
1000 Query.
Types[0].getScalarSizeInBits() < FTySize;
1008 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1009 .legalFor({{f32, i32},
1017 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1019 return Query.
Types[0].getScalarType().isBFloat16();
1027 return Query.
Types[1].isVector() &&
1028 Query.
Types[1].getScalarSizeInBits() == 64 &&
1029 Query.
Types[0].getScalarSizeInBits() == 16;
1031 .widenScalarOrEltToNextPow2OrMinSize(0, HasFP16 ? 16 : 32)
1035 return Query.
Types[0].getScalarSizeInBits() == 32 &&
1036 Query.
Types[1].getScalarSizeInBits() == 64;
1041 return Query.
Types[1].getScalarSizeInBits() <= 64 &&
1042 Query.
Types[0].getScalarSizeInBits() <
1043 Query.
Types[1].getScalarSizeInBits();
1048 return Query.
Types[0].getScalarSizeInBits() <= 64 &&
1049 Query.
Types[0].getScalarSizeInBits() >
1050 Query.
Types[1].getScalarSizeInBits();
1053 .clampNumElements(0, v4s16, v8s16)
1065 getActionDefinitionsBuilder(G_BRCOND)
1067 .clampScalar(0, s32, s32);
1068 getActionDefinitionsBuilder(G_BRINDIRECT).
legalFor({p0});
1070 getActionDefinitionsBuilder(G_SELECT)
1071 .
legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1072 .widenScalarToNextPow2(0)
1080 getActionDefinitionsBuilder(G_FRAME_INDEX).
legalFor({p0});
1083 getActionDefinitionsBuilder(G_GLOBAL_VALUE).
custom();
1085 getActionDefinitionsBuilder(G_GLOBAL_VALUE).
legalFor({p0});
1087 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1090 getActionDefinitionsBuilder(G_PTRTOINT)
1091 .
legalFor({{i64, p0}, {v2i64, v2p0}})
1092 .widenScalarToNextPow2(0, 64)
1096 getActionDefinitionsBuilder(G_INTTOPTR)
1098 return Query.
Types[0].getSizeInBits() != Query.
Types[1].getSizeInBits();
1100 .legalFor({{p0, i64}, {v2p0, v2i64}})
1101 .clampMaxNumElements(1, s64, 2);
1105 getActionDefinitionsBuilder(G_BITCAST)
1108 .legalForCartesianProduct({s32, v2s16, v4s8})
1109 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1110 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1119 return Query.
Types[0].isVector() != Query.
Types[1].isVector();
1128 getActionDefinitionsBuilder(G_VASTART).
legalFor({p0});
1132 getActionDefinitionsBuilder(G_VAARG)
1134 .clampScalar(0, s8, s64)
1137 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1141 bool UseOutlineAtomics =
ST.outlineAtomics() && !
ST.hasLSE();
1143 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1144 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1145 .customFor(!UseOutlineAtomics, {{s128, p0}})
1146 .libcallFor(UseOutlineAtomics,
1147 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1148 .clampScalar(0, s32, s64);
1150 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1151 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1153 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1154 .libcallFor(UseOutlineAtomics,
1155 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1156 .clampScalar(0, s32, s64);
1160 getActionDefinitionsBuilder(
1161 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1163 .clampScalar(0, s32, s64);
1165 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1168 for (
unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1169 unsigned BigTyIdx =
Op == G_MERGE_VALUES ? 0 : 1;
1170 unsigned LitTyIdx =
Op == G_MERGE_VALUES ? 1 : 0;
1171 getActionDefinitionsBuilder(
Op)
1172 .widenScalarToNextPow2(LitTyIdx, 8)
1173 .widenScalarToNextPow2(BigTyIdx, 32)
1174 .clampScalar(LitTyIdx, s8, s64)
1175 .clampScalar(BigTyIdx, s32, s128)
1177 switch (Q.
Types[BigTyIdx].getSizeInBits()) {
1185 switch (Q.
Types[LitTyIdx].getSizeInBits()) {
1198 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1199 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1200 {s16, nxv8s16, s64},
1201 {s32, nxv4s32, s64},
1202 {s64, nxv2s64, s64}})
1204 const LLT &EltTy = Query.
Types[1].getElementType();
1205 if (Query.
Types[1].isScalableVector())
1207 return Query.
Types[0] != EltTy;
1212 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1213 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1214 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1220 return Query.
Types[1].isFixedVector() &&
1221 Query.
Types[1].getNumElements() <= 2;
1226 return Query.
Types[1].isFixedVector() &&
1227 Query.
Types[1].getNumElements() <= 4;
1232 return Query.
Types[1].isFixedVector() &&
1233 Query.
Types[1].getNumElements() <= 8;
1238 return Query.
Types[1].isFixedVector() &&
1239 Query.
Types[1].getNumElements() <= 16;
1242 .minScalarOrElt(0, s8)
1243 .moreElementsToNextPow2(1)
1244 .clampMaxNumElements(1, s64, 2)
1245 .clampMaxNumElements(1, s32, 4)
1246 .clampMaxNumElements(1, s16, 8)
1247 .clampMaxNumElements(1, s8, 16)
1248 .clampMaxNumElements(1, p0, 2)
1251 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1253 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1254 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1255 {nxv8s16, s32, s64},
1256 {nxv4s32, s32, s64},
1257 {nxv2s64, s64, s64}})
1259 .widenVectorEltsToVectorMinSize(0, 64)
1260 .clampNumElements(0, v8s8, v16s8)
1261 .clampNumElements(0, v4s16, v8s16)
1262 .clampNumElements(0, v2s32, v4s32)
1263 .clampMaxNumElements(0, s64, 2)
1264 .clampMaxNumElements(0, p0, 2)
1267 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1268 .legalFor({{v8s8, s8},
1276 .clampNumElements(0, v4s32, v4s32)
1277 .clampNumElements(0, v2s64, v2s64)
1278 .minScalarOrElt(0, s8)
1279 .widenVectorEltsToVectorMinSize(0, 64)
1280 .widenScalarOrEltToNextPow2(0)
1281 .minScalarSameAs(1, 0);
1283 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1285 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1294 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1298 return Query.
Types[0].getNumElements() >
1299 Query.
Types[1].getNumElements();
1305 return Query.
Types[0].getNumElements() <
1306 Query.
Types[1].getNumElements();
1309 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1310 .clampNumElements(0, v8s8, v16s8)
1311 .clampNumElements(0, v4s16, v8s16)
1312 .clampNumElements(0, v4s32, v4s32)
1313 .clampNumElements(0, v2s64, v2s64)
1322 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1323 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1325 return Query.
Types[0].isFixedVector() &&
1326 Query.
Types[0].getScalarSizeInBits() < 8;
1330 return Query.
Types[0].isFixedVector() &&
1331 Query.
Types[1].isFixedVector() &&
1332 Query.
Types[0].getScalarSizeInBits() >= 8 &&
1334 Query.
Types[0].getSizeInBits() <= 128 &&
1335 Query.
Types[1].getSizeInBits() <= 64;
1347 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1348 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1353 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1354 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1356 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1358 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1360 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1362 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1364 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1369 getActionDefinitionsBuilder(G_BZERO).unsupported();
1371 getActionDefinitionsBuilder(G_MEMSET)
1372 .legalForCartesianProduct({p0}, {s64}, {s64})
1373 .customForCartesianProduct({p0}, {s8}, {s64})
1376 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1377 .legalForCartesianProduct({p0}, {p0}, {s64})
1381 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1382 .legalForCartesianProduct({p0}, {p0}, {s64});
1385 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1392 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1393 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1394 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1395 .minScalarOrElt(0, MinFPScalar)
1396 .clampMaxNumElements(1, s64, 2)
1397 .clampMaxNumElements(1, s32, 4)
1398 .clampMaxNumElements(1, s16, 8)
1399 .moreElementsToNextPow2(1)
1406 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1407 .minScalarOrElt(0, MinFPScalar)
1408 .clampMaxNumElements(1, s64, 2)
1409 .clampMaxNumElements(1, s32, 4)
1410 .clampMaxNumElements(1, s16, 8)
1411 .clampMaxNumElements(1, s32, 2)
1412 .clampMaxNumElements(1, s16, 4)
1416 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1420 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1421 .legalFor({{i8, v8i8},
1429 .clampMaxNumElements(1, s64, 2)
1430 .clampMaxNumElements(1, s32, 4)
1431 .clampMaxNumElements(1, s16, 8)
1432 .clampMaxNumElements(1, s8, 16)
1433 .widenVectorEltsToVectorMinSize(1, 64)
1436 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1437 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1438 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1439 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1440 .minScalarOrElt(0, MinFPScalar)
1441 .clampMaxNumElements(1, s64, 2)
1442 .clampMaxNumElements(1, s32, 4)
1443 .clampMaxNumElements(1, s16, 8)
1447 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1448 .clampMaxNumElements(1, s32, 2)
1449 .clampMaxNumElements(1, s16, 4)
1450 .clampMaxNumElements(1, s8, 8)
1454 getActionDefinitionsBuilder(
1455 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1456 .legalFor({{i8, v8i8},
1464 return Query.
Types[1].isVector() &&
1465 Query.
Types[1].getElementType() != s8 &&
1466 Query.
Types[1].getNumElements() & 1;
1469 .clampMaxNumElements(1, s64, 2)
1470 .clampMaxNumElements(1, s32, 4)
1471 .clampMaxNumElements(1, s16, 8)
1472 .clampMaxNumElements(1, s8, 16)
1476 getActionDefinitionsBuilder(
1477 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1493 return std::make_pair(1, SrcTy.
divide(2));
1499 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1502 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1503 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1506 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1508 getActionDefinitionsBuilder(G_PREFETCH).custom();
1510 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1512 getLegacyLegalizerInfo().computeTables();
1758 auto LowerUnaryOp = [&
MI, &MIB](
unsigned Opcode) {
1760 MI.eraseFromParent();
1763 auto LowerBinOp = [&
MI, &MIB](
unsigned Opcode) {
1765 {
MI.getOperand(2),
MI.getOperand(3)});
1766 MI.eraseFromParent();
1769 auto LowerTriOp = [&
MI, &MIB](
unsigned Opcode) {
1771 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4)});
1772 MI.eraseFromParent();
1777 switch (IntrinsicID) {
1778 case Intrinsic::vacopy: {
1779 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1780 unsigned VaListSize =
1781 (ST->isTargetDarwin() || ST->isTargetWindows())
1783 : ST->isTargetILP32() ? 20 : 32;
1791 VaListSize,
Align(PtrSize)));
1795 VaListSize,
Align(PtrSize)));
1796 MI.eraseFromParent();
1799 case Intrinsic::get_dynamic_area_offset: {
1801 MI.eraseFromParent();
1804 case Intrinsic::aarch64_mops_memset_tag: {
1805 assert(
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1808 auto &
Value =
MI.getOperand(3);
1810 Value.setReg(ExtValueReg);
1813 case Intrinsic::aarch64_prefetch: {
1814 auto &AddrVal =
MI.getOperand(1);
1816 int64_t IsWrite =
MI.getOperand(2).getImm();
1817 int64_t
Target =
MI.getOperand(3).getImm();
1818 int64_t IsStream =
MI.getOperand(4).getImm();
1819 int64_t IsData =
MI.getOperand(5).getImm();
1821 unsigned PrfOp = (IsWrite << 4) |
1827 MI.eraseFromParent();
1830 case Intrinsic::aarch64_range_prefetch: {
1831 auto &AddrVal =
MI.getOperand(1);
1833 int64_t IsWrite =
MI.getOperand(2).getImm();
1834 int64_t IsStream =
MI.getOperand(3).getImm();
1835 unsigned PrfOp = (IsStream << 2) | IsWrite;
1837 MIB.
buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1840 .
addUse(
MI.getOperand(4).getReg());
1841 MI.eraseFromParent();
1844 case Intrinsic::aarch64_prefetch_ir: {
1845 auto &AddrVal =
MI.getOperand(1);
1847 MI.eraseFromParent();
1850 case Intrinsic::aarch64_neon_uaddv:
1851 case Intrinsic::aarch64_neon_saddv:
1852 case Intrinsic::aarch64_neon_umaxv:
1853 case Intrinsic::aarch64_neon_smaxv:
1854 case Intrinsic::aarch64_neon_uminv:
1855 case Intrinsic::aarch64_neon_sminv: {
1856 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1857 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1858 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1860 auto OldDst =
MI.getOperand(0).getReg();
1861 auto OldDstTy = MRI.
getType(OldDst);
1863 if (OldDstTy == NewDstTy)
1869 MI.getOperand(0).setReg(NewDst);
1873 MIB.
buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1878 case Intrinsic::aarch64_neon_uaddlp:
1879 case Intrinsic::aarch64_neon_saddlp: {
1880 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1882 : AArch64::G_SADDLP;
1884 MI.eraseFromParent();
1888 case Intrinsic::aarch64_neon_uaddlv:
1889 case Intrinsic::aarch64_neon_saddlv: {
1890 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1892 : AArch64::G_SADDLV;
1919 MI.eraseFromParent();
1923 case Intrinsic::aarch64_neon_smax:
1924 return LowerBinOp(TargetOpcode::G_SMAX);
1925 case Intrinsic::aarch64_neon_smin:
1926 return LowerBinOp(TargetOpcode::G_SMIN);
1927 case Intrinsic::aarch64_neon_umax:
1928 return LowerBinOp(TargetOpcode::G_UMAX);
1929 case Intrinsic::aarch64_neon_umin:
1930 return LowerBinOp(TargetOpcode::G_UMIN);
1931 case Intrinsic::aarch64_neon_fmax:
1932 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1933 case Intrinsic::aarch64_neon_fmin:
1934 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1935 case Intrinsic::aarch64_neon_fmaxnm:
1936 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1937 case Intrinsic::aarch64_neon_fminnm:
1938 return LowerBinOp(TargetOpcode::G_FMINNUM);
1939 case Intrinsic::aarch64_neon_pmull:
1940 case Intrinsic::aarch64_neon_pmull64:
1941 return LowerBinOp(AArch64::G_PMULL);
1942 case Intrinsic::aarch64_neon_smull:
1943 return LowerBinOp(AArch64::G_SMULL);
1944 case Intrinsic::aarch64_neon_umull:
1945 return LowerBinOp(AArch64::G_UMULL);
1946 case Intrinsic::aarch64_neon_sabd:
1947 return LowerBinOp(TargetOpcode::G_ABDS);
1948 case Intrinsic::aarch64_neon_uabd:
1949 return LowerBinOp(TargetOpcode::G_ABDU);
1950 case Intrinsic::aarch64_neon_uhadd:
1951 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1952 case Intrinsic::aarch64_neon_urhadd:
1953 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1954 case Intrinsic::aarch64_neon_shadd:
1955 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1956 case Intrinsic::aarch64_neon_srhadd:
1957 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1958 case Intrinsic::aarch64_neon_sqshrn: {
1963 {MRI.
getType(
MI.getOperand(2).getReg())},
1964 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1966 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {
MI.getOperand(0)}, {Shr});
1967 MI.eraseFromParent();
1970 case Intrinsic::aarch64_neon_sqshrun: {
1975 {MRI.
getType(
MI.getOperand(2).getReg())},
1976 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1978 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {
MI.getOperand(0)}, {Shr});
1979 MI.eraseFromParent();
1982 case Intrinsic::aarch64_neon_sqrshrn: {
1986 auto Shr = MIB.
buildInstr(AArch64::G_SRSHR_I,
1987 {MRI.
getType(
MI.getOperand(2).getReg())},
1988 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1990 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {
MI.getOperand(0)}, {Shr});
1991 MI.eraseFromParent();
1994 case Intrinsic::aarch64_neon_sqrshrun: {
1998 auto Shr = MIB.
buildInstr(AArch64::G_SRSHR_I,
1999 {MRI.
getType(
MI.getOperand(2).getReg())},
2000 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
2002 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {
MI.getOperand(0)}, {Shr});
2003 MI.eraseFromParent();
2006 case Intrinsic::aarch64_neon_uqrshrn: {
2010 auto Shr = MIB.
buildInstr(AArch64::G_URSHR_I,
2011 {MRI.
getType(
MI.getOperand(2).getReg())},
2012 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
2014 MIB.
buildInstr(TargetOpcode::G_TRUNC_USAT_U, {
MI.getOperand(0)}, {Shr});
2015 MI.eraseFromParent();
2018 case Intrinsic::aarch64_neon_uqshrn: {
2023 {MRI.
getType(
MI.getOperand(2).getReg())},
2024 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
2026 MIB.
buildInstr(TargetOpcode::G_TRUNC_USAT_U, {
MI.getOperand(0)}, {Shr});
2027 MI.eraseFromParent();
2030 case Intrinsic::aarch64_neon_sqshlu: {
2036 MIB.
buildInstr(AArch64::G_SQSHLU_I, {
MI.getOperand(0)},
2038 .addImm(ShiftAmount->getSExtValue());
2039 MI.eraseFromParent();
2044 case Intrinsic::aarch64_neon_vsli: {
2046 AArch64::G_SLI, {
MI.getOperand(0)},
2047 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4).getImm()});
2048 MI.eraseFromParent();
2051 case Intrinsic::aarch64_neon_vsri: {
2053 AArch64::G_SRI, {
MI.getOperand(0)},
2054 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4).getImm()});
2055 MI.eraseFromParent();
2058 case Intrinsic::aarch64_neon_abs: {
2060 MIB.
buildInstr(TargetOpcode::G_ABS, {
MI.getOperand(0)}, {
MI.getOperand(2)});
2061 MI.eraseFromParent();
2064 case Intrinsic::aarch64_neon_sqadd: {
2066 return LowerBinOp(TargetOpcode::G_SADDSAT);
2069 case Intrinsic::aarch64_neon_sqsub: {
2071 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2074 case Intrinsic::aarch64_neon_uqadd: {
2076 return LowerBinOp(TargetOpcode::G_UADDSAT);
2079 case Intrinsic::aarch64_neon_uqsub: {
2081 return LowerBinOp(TargetOpcode::G_USUBSAT);
2084 case Intrinsic::aarch64_neon_udot:
2085 return LowerTriOp(AArch64::G_UDOT);
2086 case Intrinsic::aarch64_neon_sdot:
2087 return LowerTriOp(AArch64::G_SDOT);
2088 case Intrinsic::aarch64_neon_usdot:
2089 return LowerTriOp(AArch64::G_USDOT);
2090 case Intrinsic::aarch64_neon_sqxtn:
2091 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2092 case Intrinsic::aarch64_neon_sqxtun:
2093 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2094 case Intrinsic::aarch64_neon_uqxtn:
2095 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2096 case Intrinsic::aarch64_neon_fcvtzu:
2097 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2098 case Intrinsic::aarch64_neon_fcvtzs:
2099 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2101 case Intrinsic::vector_reverse: