28 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include <initializer_list>
33 #define DEBUG_TYPE "aarch64-legalinfo"
36 using namespace LegalizeActions;
37 using namespace LegalizeMutations;
38 using namespace LegalityPredicates;
39 using namespace MIPatternMatch;
43 using namespace TargetOpcode;
62 std::initializer_list<LLT> PackedVectorAllTypeList = {
72 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
79 const bool HasFP16 = ST.hasFullFP16();
80 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
83 .legalFor({p0,
s1, s8, s16, s32, s64})
84 .legalFor(PackedVectorAllTypeList)
89 return Query.
Types[0].isVector() &&
90 (Query.
Types[0].getElementType() != s64 ||
91 Query.
Types[0].getNumElements() != 2);
94 LLT EltTy = Query.
Types[0].getElementType();
97 return std::make_pair(0, EltTy);
102 .legalFor(PackedVectorAllTypeList)
113 .
legalFor({s32, s64, v4s32, v2s32, v2s64})
114 .widenScalarToNextPow2(0)
118 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
121 return Query.
Opcode == G_MUL && Query.
Types[0] == v2s64;
125 .widenScalarToNextPow2(0)
133 const auto &SrcTy = Query.
Types[0];
134 const auto &AmtTy = Query.
Types[1];
135 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
136 AmtTy.getSizeInBits() == 32;
150 .widenScalarToNextPow2(0)
159 .
legalFor({{p0, s64}, {v2p0, v2s64}})
160 .clampScalar(1, s64, s64);
165 .legalFor({s32, s64})
167 .clampScalar(0, s32, s64)
172 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
174 .clampScalarOrElt(0, s32, s64)
175 .clampNumElements(0, v2s32, v4s32)
176 .clampNumElements(0, v2s64, v2s64)
177 .moreElementsToNextPow2(0);
181 .widenScalarToNextPow2(0, 32)
186 .legalFor({s64, v8s16, v16s8, v4s32})
190 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
191 .clampNumElements(0, v8s8, v16s8)
200 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
201 .legalFor({{s32,
s1}, {s64,
s1}})
202 .clampScalar(0, s32, s64)
206 .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
207 .clampScalar(0, MinFPScalar, s64)
214 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
215 G_FNEARBYINT, G_INTRINSIC_LRINT})
220 const auto &Ty = Query.
Types[0];
221 return Ty.isVector() && Ty.getElementType() == s16 &&
224 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s16); })
229 return Query.
Types[0] == s16 && !ST.hasFullFP16();
231 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s32); })
232 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
235 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
240 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
273 {v2s32, p0, s64, 8}})
274 .widenScalarToNextPow2(0)
292 return Query.
Types[0] == s128 &&
295 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
302 {v16s8, p0, s128, 8},
304 {v8s16, p0, s128, 8},
306 {v4s32, p0, s128, 8},
307 {v2s64, p0, s128, 8}})
309 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
310 .widenScalarToNextPow2(0, 8)
315 return Query.
Types[0].isScalar() &&
317 Query.
Types[0].getSizeInBits() > 32;
320 .clampMaxNumElements(0, s8, 16)
330 return Query.
Types[0] == s128 &&
333 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
347 {v16s8, p0, s128, 8},
350 {v8s16, p0, s128, 8},
352 {v4s32, p0, s128, 8},
353 {v2s64, p0, s128, 8}})
354 .clampScalar(0, s8, s64)
356 return Query.
Types[0].isScalar() &&
360 .clampMaxNumElements(0, s8, 16)
372 .widenScalarToNextPow2(0)
376 const auto &Ty = Query.
Types[0];
377 if (HasFP16 && Ty == s16)
379 return Ty == s32 || Ty == s64 || Ty == s128;
381 .clampScalar(0, MinFPScalar, s128);
384 .legalFor({{s32, s32},
396 .clampScalar(1, s32, s64)
397 .clampScalar(0, s32, s32)
398 .minScalarEltSameAsIf(
412 .clampNumElements(0, v2s32, v4s32);
416 unsigned DstSize = Query.
Types[0].getSizeInBits();
418 if (DstSize == 128 && !Query.
Types[0].isVector())
442 .legalIf(ExtLegalFunc)
461 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
462 .clampMaxNumElements(0, s32, 2);
465 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
466 .clampMaxNumElements(0, s64, 2);
470 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
471 .widenScalarToNextPow2(0)
477 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
478 .clampScalar(1, s32, s64)
489 .widenScalarToNextPow2(0)
504 .legalFor({{v2s64, v2p0}})
510 return Query.
Types[0].getSizeInBits() != Query.
Types[1].getSizeInBits();
512 .legalFor({{p0, s64}, {v2p0, v2s64}});
521 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
530 .clampScalar(0, s8, s64)
539 return Query.
Types[0].getSizeInBits() == 128;
541 .clampScalar(0, s32, s64)
545 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
546 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
547 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
548 .clampScalar(0, s32, s64)
554 for (
unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
555 unsigned BigTyIdx =
Op == G_MERGE_VALUES ? 0 : 1;
556 unsigned LitTyIdx =
Op == G_MERGE_VALUES ? 1 : 0;
563 switch (Q.
Types[BigTyIdx].getSizeInBits()) {
571 switch (Q.
Types[LitTyIdx].getSizeInBits()) {
585 const LLT &EltTy = Query.
Types[1].getElementType();
586 return Query.
Types[0] != EltTy;
591 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
592 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
593 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
600 return Query.
Types[1].getNumElements() <= 2;
605 return Query.
Types[1].getNumElements() <= 4;
610 return Query.
Types[1].getNumElements() <= 8;
615 return Query.
Types[1].getNumElements() <= 16;
618 .minScalarOrElt(0, s8)
637 .clampNumElements(0, v4s32, v4s32)
646 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
653 .widenScalarToNextPow2(0, 32)
672 for (
auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
681 return !Query.
Types[1].isVector();
684 .clampNumElements(0, v4s32, v4s32)
685 .clampNumElements(0, v2s64, v2s64);
688 .
legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
693 return Query.
Types[0] == p0 && Query.
Types[1] == s64;
705 .customForCartesianProduct({p0}, {s8}, {s64})
709 .legalForCartesianProduct({p0}, {p0}, {s64})
728 .
legalFor({{s32, v2s32}, {s64, v2s64}})
729 .clampMaxNumElements(1, s64, 2)
735 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
736 .clampMaxNumElements(1, s64, 2)
741 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
757 return std::make_pair(1, SrcTy.
divide(2));
770 return Q.
Types[0].isScalar() && Q.
Types[1].getScalarSizeInBits() < 64;
776 .customFor({{s32, s32}, {s64, s64}});
781 .
legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
782 .clampScalar(0, s32, s128)
800 .legalFor({MinFPScalar, s32, s64})
802 .minScalar(0, MinFPScalar);
806 .legalFor({MinFPScalar, s32, s64})
807 .minScalar(0, MinFPScalar);
812 .legalFor({{s64, s32}, {s64, s64}});
815 verify(*ST.getInstrInfo());
823 switch (
MI.getOpcode()) {
827 case TargetOpcode::G_VAARG:
828 return legalizeVaArg(
MI,
MRI, MIRBuilder);
829 case TargetOpcode::G_LOAD:
830 case TargetOpcode::G_STORE:
831 return legalizeLoadStore(
MI,
MRI, MIRBuilder, Observer);
832 case TargetOpcode::G_SHL:
833 case TargetOpcode::G_ASHR:
834 case TargetOpcode::G_LSHR:
835 return legalizeShlAshrLshr(
MI,
MRI, MIRBuilder, Observer);
836 case TargetOpcode::G_GLOBAL_VALUE:
837 return legalizeSmallCMGlobalValue(
MI,
MRI, MIRBuilder, Observer);
838 case TargetOpcode::G_TRUNC:
839 return legalizeVectorTrunc(
MI, Helper);
840 case TargetOpcode::G_SBFX:
841 case TargetOpcode::G_UBFX:
842 return legalizeBitfieldExtract(
MI,
MRI, Helper);
843 case TargetOpcode::G_ROTR:
844 return legalizeRotate(
MI,
MRI, Helper);
845 case TargetOpcode::G_CTPOP:
846 return legalizeCTPOP(
MI,
MRI, Helper);
847 case TargetOpcode::G_ATOMIC_CMPXCHG:
848 return legalizeAtomicCmpxchg128(
MI,
MRI, Helper);
849 case TargetOpcode::G_CTTZ:
850 return legalizeCTTZ(
MI, Helper);
851 case TargetOpcode::G_BZERO:
852 case TargetOpcode::G_MEMCPY:
853 case TargetOpcode::G_MEMMOVE:
854 case TargetOpcode::G_MEMSET:
855 return legalizeMemOps(
MI, Helper);
873 MI.getOperand(2).setReg(NewAmt.getReg(0));
881 for (
int I = 0;
I < NumParts; ++
I)
886 bool AArch64LegalizerInfo::legalizeVectorTrunc(
914 for (
unsigned I = 0;
I < SplitSrcs.size(); ++
I)
921 MI.getOperand(1).setReg(
Concat.getReg(0));
926 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
929 assert(
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
934 auto &GlobalOp =
MI.getOperand(1);
935 const auto* GV = GlobalOp.getGlobal();
936 if (GV->isThreadLocal())
945 auto Offset = GlobalOp.getOffset();
967 "Should not have folded in an offset for a tagged global!");
969 .addGlobalAddress(GV, 0x100000000,
976 .addGlobalAddress(GV, Offset,
978 MI.eraseFromParent();
984 switch (
MI.getIntrinsicID()) {
985 case Intrinsic::vacopy: {
987 unsigned VaListSize =
999 VaListSize,
Align(PtrSize)));
1003 VaListSize,
Align(PtrSize)));
1004 MI.eraseFromParent();
1007 case Intrinsic::get_dynamic_area_offset: {
1010 MI.eraseFromParent();
1013 case Intrinsic::aarch64_mops_memset_tag: {
1014 assert(
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1017 auto &
Value =
MI.getOperand(3);
1019 Value.setReg(ZExtValueReg);
1027 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1030 assert(
MI.getOpcode() == TargetOpcode::G_ASHR ||
1031 MI.getOpcode() == TargetOpcode::G_LSHR ||
1032 MI.getOpcode() == TargetOpcode::G_SHL);
1040 int64_t Amount = VRegAndVal->Value.getSExtValue();
1045 MI.getOperand(2).setReg(ExtCst.getReg(0));
1058 isShiftedInt<7, 3>(NewOffset)) {
1066 bool AArch64LegalizerInfo::legalizeLoadStore(
1069 assert(
MI.getOpcode() == TargetOpcode::G_STORE ||
1070 MI.getOpcode() == TargetOpcode::G_LOAD);
1084 assert((*
MI.memoperands_begin())->getSuccessOrdering() ==
1086 (*
MI.memoperands_begin())->getSuccessOrdering() ==
1088 assert(ST->hasLSE2() &&
"ldp/stp not single copy atomic without +lse2");
1091 if (
MI.getOpcode() == TargetOpcode::G_LOAD) {
1092 NewI = MIRBuilder.
buildInstr(AArch64::LDPXi, {s64, s64}, {});
1097 AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
1109 MI.eraseFromParent();
1115 LLVM_DEBUG(
dbgs() <<
"Tried to do custom legalization on wrong load/store");
1121 auto &MMO = **
MI.memoperands_begin();
1124 if (
MI.getOpcode() == TargetOpcode::G_STORE) {
1128 auto NewLoad = MIRBuilder.
buildLoad(NewTy,
MI.getOperand(1), MMO);
1131 MI.eraseFromParent();
1141 Register ListPtr =
MI.getOperand(1).getReg();
1154 if (Alignment > PtrAlign) {
1158 auto ListTmp = MIRBuilder.
buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1168 ValTy,
std::max(Alignment, PtrAlign)));
1179 MI.eraseFromParent();
1183 bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1214 if (!ST->hasNEON() ||
1215 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
1223 "Expected src and dst to have the same type!");
1231 assert((Size == 32 || Size == 64 || Size == 128) &&
"Expected only 32, 64, or 128 bit scalars!");
1246 Opc = Intrinsic::aarch64_neon_uaddlv;
1249 Opc = Intrinsic::aarch64_neon_uaddlp;
1252 Opc = Intrinsic::aarch64_neon_uaddlp;
1256 Opc = Intrinsic::aarch64_neon_uaddlp;
1261 Opc = Intrinsic::aarch64_neon_uaddlp;
1264 Opc = Intrinsic::aarch64_neon_uaddlp;
1270 for (
LLT HTy : HAddTys) {
1277 if (Ty.
isScalar() && (Size == 64 || Size == 128))
1281 MI.eraseFromParent();
1285 bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1289 auto Addr =
MI.getOperand(1).getReg();
1290 auto DesiredI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(2));
1291 auto NewI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(3));
1306 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
1310 Opcode = AArch64::CASPAX;
1313 Opcode = AArch64::CASPLX;
1317 Opcode = AArch64::CASPALX;
1320 Opcode = AArch64::CASPX;
1328 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1329 .addUse(DesiredI->getOperand(0).getReg())
1331 .
addUse(DesiredI->getOperand(1).getReg())
1332 .
addImm(AArch64::subo64);
1333 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1337 .
addImm(AArch64::subo64);
1339 CAS = MIRBuilder.
buildInstr(Opcode, {CASDst}, {CASDesired, CASNew,
Addr});
1347 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
1351 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1354 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1358 Opcode = AArch64::CMP_SWAP_128;
1361 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1366 CAS = MIRBuilder.
buildInstr(Opcode, {DstLo, DstHi, Scratch},
1367 {
Addr, DesiredI->getOperand(0),
1368 DesiredI->getOperand(1), NewI->
getOperand(0),
1377 MIRBuilder.
buildMerge(
MI.getOperand(0), {DstLo, DstHi});
1378 MI.eraseFromParent();
1388 MIRBuilder.
buildCTLZ(
MI.getOperand(0).getReg(), BitReverse);
1389 MI.eraseFromParent();
1398 if (
MI.getOpcode() == TargetOpcode::G_MEMSET) {
1400 auto &
Value =
MI.getOperand(1);
1403 Value.setReg(ZExtValueReg);