42#define DEBUG_TYPE "legalizer"
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
55static std::pair<int, int>
61 unsigned NumParts =
Size / NarrowSize;
62 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
65 if (LeftoverSize == 0)
70 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
107 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
114 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
124 if (isa<GIntrinsic>(
MI))
127 switch (Step.Action) {
142 return bitcast(
MI, Step.TypeIdx, Step.NewType);
145 return lower(
MI, Step.TypeIdx, Step.NewType);
162void LegalizerHelper::insertParts(
Register DstReg,
184 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
186 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
188 return mergeMixedSubvectors(DstReg, AllRegs);
193 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
209void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
212 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
219 appendVectorElts(AllElts, Leftover);
227 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
229 const int StartIdx = Regs.
size();
230 const int NumResults =
MI.getNumOperands() - 1;
232 for (
int I = 0;
I != NumResults; ++
I)
233 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
239 if (SrcTy == GCDTy) {
254 extractGCDType(Parts, GCDTy, SrcReg);
258LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
260 unsigned PadStrategy) {
265 int NumOrigSrc = VRegs.
size();
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 assert(PadStrategy == TargetOpcode::G_SEXT);
298 for (
int I = 0;
I != NumParts; ++
I) {
299 bool AllMergePartsArePadding =
true;
302 for (
int J = 0; J != NumSubParts; ++J) {
303 int Idx =
I * NumSubParts + J;
304 if (
Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
309 SubMerge[J] = VRegs[
Idx];
312 AllMergePartsArePadding =
false;
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 Remerge[
I] = AllPadReg;
335 if (NumSubParts == 1)
336 Remerge[
I] = SubMerge[0];
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[
I];
345 VRegs = std::move(Remerge);
349void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
356 if (DstTy == LCMTy) {
370 UnmergeDefs[0] = DstReg;
371 for (
unsigned I = 1;
I != NumDefs; ++
I)
383#define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397#define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_MUL:
416 case TargetOpcode::G_SDIV:
418 case TargetOpcode::G_UDIV:
420 case TargetOpcode::G_SREM:
422 case TargetOpcode::G_UREM:
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
426 case TargetOpcode::G_FADD:
428 case TargetOpcode::G_FSUB:
430 case TargetOpcode::G_FMUL:
432 case TargetOpcode::G_FDIV:
434 case TargetOpcode::G_FEXP:
436 case TargetOpcode::G_FEXP2:
438 case TargetOpcode::G_FEXP10:
440 case TargetOpcode::G_FREM:
442 case TargetOpcode::G_FPOW:
444 case TargetOpcode::G_FPOWI:
446 case TargetOpcode::G_FMA:
448 case TargetOpcode::G_FSIN:
450 case TargetOpcode::G_FCOS:
452 case TargetOpcode::G_FTAN:
454 case TargetOpcode::G_FASIN:
456 case TargetOpcode::G_FACOS:
458 case TargetOpcode::G_FATAN:
460 case TargetOpcode::G_FSINH:
462 case TargetOpcode::G_FCOSH:
464 case TargetOpcode::G_FTANH:
466 case TargetOpcode::G_FLOG10:
468 case TargetOpcode::G_FLOG:
470 case TargetOpcode::G_FLOG2:
472 case TargetOpcode::G_FLDEXP:
474 case TargetOpcode::G_FCEIL:
476 case TargetOpcode::G_FFLOOR:
478 case TargetOpcode::G_FMINNUM:
480 case TargetOpcode::G_FMAXNUM:
482 case TargetOpcode::G_FSQRT:
484 case TargetOpcode::G_FRINT:
486 case TargetOpcode::G_FNEARBYINT:
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
490 case TargetOpcode::G_INTRINSIC_LRINT:
492 case TargetOpcode::G_INTRINSIC_LLRINT:
518 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
530 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
537 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
540 Register PReg = Next->getOperand(0).getReg();
548 if (Ret->getNumImplicitOperands() != 1)
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
575 Info.OrigRet = Result;
578 (Result.Ty->isVoidTy() ||
583 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder,
Info))
587 if (
MI &&
Info.LoweredTailCall) {
588 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
599 "Expected instr following MI to be return or debug inst?");
603 }
while (
MI->getNextNode());
633 Args.push_back({MO.getReg(), OpType, 0});
635 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
646 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
650 LLT OpLLT =
MRI.getType(Reg);
651 Type *OpTy =
nullptr;
656 Args.push_back({Reg, OpTy, 0});
662 unsigned Opc =
MI.getOpcode();
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
682 const char *
Name = TLI.getLibcallName(RTLibcall);
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
696 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
699 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder,
Info))
703 if (
Info.LoweredTailCall) {
704 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
715 "Expected instr following MI to be return or debug inst?");
719 }
while (
MI.getNextNode());
729 unsigned Opc =
MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(
MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
736 return RTLIB::UNKNOWN_LIBCALL;
738#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
740 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
742 case TargetOpcode::G_ATOMIC_CMPXCHG:
743 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
745 return getOutlineAtomicHelper(LC, Ordering, MemSize);
747 case TargetOpcode::G_ATOMICRMW_XCHG: {
749 return getOutlineAtomicHelper(LC, Ordering, MemSize);
751 case TargetOpcode::G_ATOMICRMW_ADD:
752 case TargetOpcode::G_ATOMICRMW_SUB: {
754 return getOutlineAtomicHelper(LC, Ordering, MemSize);
756 case TargetOpcode::G_ATOMICRMW_AND: {
758 return getOutlineAtomicHelper(LC, Ordering, MemSize);
760 case TargetOpcode::G_ATOMICRMW_OR: {
762 return getOutlineAtomicHelper(LC, Ordering, MemSize);
764 case TargetOpcode::G_ATOMICRMW_XOR: {
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
769 return RTLIB::UNKNOWN_LIBCALL;
782 unsigned Opc =
MI.getOpcode();
784 case TargetOpcode::G_ATOMIC_CMPXCHG:
785 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
788 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
789 MI.getFirst4RegLLTs();
792 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
793 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
794 NewLLT) =
MI.getFirst5RegLLTs();
804 case TargetOpcode::G_ATOMICRMW_XCHG:
805 case TargetOpcode::G_ATOMICRMW_ADD:
806 case TargetOpcode::G_ATOMICRMW_SUB:
807 case TargetOpcode::G_ATOMICRMW_AND:
808 case TargetOpcode::G_ATOMICRMW_OR:
809 case TargetOpcode::G_ATOMICRMW_XOR: {
810 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
813 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
817 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
832 const char *
Name = TLI.getLibcallName(RTLibcall);
842 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
846 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
847 if (!CLI.lowerCall(MIRBuilder,
Info))
859 case TargetOpcode::G_FPEXT:
861 case TargetOpcode::G_FPTRUNC:
863 case TargetOpcode::G_FPTOSI:
865 case TargetOpcode::G_FPTOUI:
867 case TargetOpcode::G_SITOFP:
869 case TargetOpcode::G_UITOFP:
881 {{
MI.getOperand(1).
getReg(), FromType, 0}}, LocObserver, &
MI);
887 switch (
MI.getOpcode()) {
888 case TargetOpcode::G_GET_FPENV:
889 RTLibcall = RTLIB::FEGETENV;
891 case TargetOpcode::G_SET_FPENV:
892 case TargetOpcode::G_RESET_FPENV:
893 RTLibcall = RTLIB::FESETENV;
895 case TargetOpcode::G_GET_FPMODE:
896 RTLibcall = RTLIB::FEGETMODE;
898 case TargetOpcode::G_SET_FPMODE:
899 case TargetOpcode::G_RESET_FPMODE:
900 RTLibcall = RTLIB::FESETMODE;
929 auto &Ctx = MF.getFunction().getContext();
940 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
947 LocObserver,
nullptr);
969 auto &Ctx = MF.getFunction().getContext();
985 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
991 LocObserver,
nullptr);
997static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1036 const auto Cond =
Cmp->getCond();
1041 const auto BuildLibcall =
1050 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1063 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1065 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1079 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
1082 const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
1105 if (NotOeq && NotUno)
1124 const auto [InversedLibcall, InversedPred] =
1126 if (!BuildLibcall(InversedLibcall,
1152 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1154 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1171 switch (
MI.getOpcode()) {
1174 case TargetOpcode::G_MUL:
1175 case TargetOpcode::G_SDIV:
1176 case TargetOpcode::G_UDIV:
1177 case TargetOpcode::G_SREM:
1178 case TargetOpcode::G_UREM:
1179 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1188 case TargetOpcode::G_FADD:
1189 case TargetOpcode::G_FSUB:
1190 case TargetOpcode::G_FMUL:
1191 case TargetOpcode::G_FDIV:
1192 case TargetOpcode::G_FMA:
1193 case TargetOpcode::G_FPOW:
1194 case TargetOpcode::G_FREM:
1195 case TargetOpcode::G_FCOS:
1196 case TargetOpcode::G_FSIN:
1197 case TargetOpcode::G_FTAN:
1198 case TargetOpcode::G_FACOS:
1199 case TargetOpcode::G_FASIN:
1200 case TargetOpcode::G_FATAN:
1201 case TargetOpcode::G_FCOSH:
1202 case TargetOpcode::G_FSINH:
1203 case TargetOpcode::G_FTANH:
1204 case TargetOpcode::G_FLOG10:
1205 case TargetOpcode::G_FLOG:
1206 case TargetOpcode::G_FLOG2:
1207 case TargetOpcode::G_FLDEXP:
1208 case TargetOpcode::G_FEXP:
1209 case TargetOpcode::G_FEXP2:
1210 case TargetOpcode::G_FEXP10:
1211 case TargetOpcode::G_FCEIL:
1212 case TargetOpcode::G_FFLOOR:
1213 case TargetOpcode::G_FMINNUM:
1214 case TargetOpcode::G_FMAXNUM:
1215 case TargetOpcode::G_FSQRT:
1216 case TargetOpcode::G_FRINT:
1217 case TargetOpcode::G_FNEARBYINT:
1218 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1223 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1231 case TargetOpcode::G_INTRINSIC_LRINT:
1232 case TargetOpcode::G_INTRINSIC_LLRINT: {
1239 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1245 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1248 MI.eraseFromParent();
1251 case TargetOpcode::G_FPOWI: {
1258 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1262 std::initializer_list<CallLowering::ArgInfo> Args = {
1263 {
MI.getOperand(1).getReg(), HLTy, 0},
1264 {
MI.getOperand(2).getReg(), ITy, 1}};
1267 Args, LocObserver, &
MI);
1272 case TargetOpcode::G_FPEXT:
1273 case TargetOpcode::G_FPTRUNC: {
1276 if (!FromTy || !ToTy)
1284 case TargetOpcode::G_FCMP: {
1288 MI.eraseFromParent();
1291 case TargetOpcode::G_FPTOSI:
1292 case TargetOpcode::G_FPTOUI: {
1297 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1305 case TargetOpcode::G_SITOFP:
1306 case TargetOpcode::G_UITOFP: {
1310 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1318 case TargetOpcode::G_ATOMICRMW_XCHG:
1319 case TargetOpcode::G_ATOMICRMW_ADD:
1320 case TargetOpcode::G_ATOMICRMW_SUB:
1321 case TargetOpcode::G_ATOMICRMW_AND:
1322 case TargetOpcode::G_ATOMICRMW_OR:
1323 case TargetOpcode::G_ATOMICRMW_XOR:
1324 case TargetOpcode::G_ATOMIC_CMPXCHG:
1325 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1331 case TargetOpcode::G_BZERO:
1332 case TargetOpcode::G_MEMCPY:
1333 case TargetOpcode::G_MEMMOVE:
1334 case TargetOpcode::G_MEMSET: {
1339 MI.eraseFromParent();
1342 case TargetOpcode::G_GET_FPENV:
1343 case TargetOpcode::G_GET_FPMODE: {
1349 case TargetOpcode::G_SET_FPENV:
1350 case TargetOpcode::G_SET_FPMODE: {
1356 case TargetOpcode::G_RESET_FPENV:
1357 case TargetOpcode::G_RESET_FPMODE: {
1366 MI.eraseFromParent();
1376 switch (
MI.getOpcode()) {
1379 case TargetOpcode::G_IMPLICIT_DEF: {
1389 if (SizeOp0 % NarrowSize != 0) {
1390 LLT ImplicitTy = NarrowTy;
1397 MI.eraseFromParent();
1401 int NumParts = SizeOp0 / NarrowSize;
1404 for (
int i = 0; i < NumParts; ++i)
1411 MI.eraseFromParent();
1414 case TargetOpcode::G_CONSTANT: {
1416 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1419 int NumParts = TotalSize / NarrowSize;
1422 for (
int I = 0;
I != NumParts; ++
I) {
1423 unsigned Offset =
I * NarrowSize;
1430 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1432 if (LeftoverBits != 0) {
1436 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1440 insertParts(
MI.getOperand(0).getReg(),
1441 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1443 MI.eraseFromParent();
1446 case TargetOpcode::G_SEXT:
1447 case TargetOpcode::G_ZEXT:
1448 case TargetOpcode::G_ANYEXT:
1450 case TargetOpcode::G_TRUNC: {
1456 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1462 MI.eraseFromParent();
1465 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1466 case TargetOpcode::G_FREEZE: {
1477 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1484 MI.eraseFromParent();
1487 case TargetOpcode::G_ADD:
1488 case TargetOpcode::G_SUB:
1489 case TargetOpcode::G_SADDO:
1490 case TargetOpcode::G_SSUBO:
1491 case TargetOpcode::G_SADDE:
1492 case TargetOpcode::G_SSUBE:
1493 case TargetOpcode::G_UADDO:
1494 case TargetOpcode::G_USUBO:
1495 case TargetOpcode::G_UADDE:
1496 case TargetOpcode::G_USUBE:
1498 case TargetOpcode::G_MUL:
1499 case TargetOpcode::G_UMULH:
1501 case TargetOpcode::G_EXTRACT:
1503 case TargetOpcode::G_INSERT:
1505 case TargetOpcode::G_LOAD: {
1506 auto &LoadMI = cast<GLoad>(
MI);
1507 Register DstReg = LoadMI.getDstReg();
1512 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1516 LoadMI.eraseFromParent();
1522 case TargetOpcode::G_ZEXTLOAD:
1523 case TargetOpcode::G_SEXTLOAD: {
1524 auto &LoadMI = cast<GExtLoad>(
MI);
1525 Register DstReg = LoadMI.getDstReg();
1526 Register PtrReg = LoadMI.getPointerReg();
1529 auto &MMO = LoadMI.getMMO();
1532 if (MemSize == NarrowSize) {
1534 }
else if (MemSize < NarrowSize) {
1536 }
else if (MemSize > NarrowSize) {
1541 if (isa<GZExtLoad>(LoadMI))
1546 LoadMI.eraseFromParent();
1549 case TargetOpcode::G_STORE: {
1550 auto &StoreMI = cast<GStore>(
MI);
1552 Register SrcReg = StoreMI.getValueReg();
1557 int NumParts = SizeOp0 / NarrowSize;
1559 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1560 if (SrcTy.
isVector() && LeftoverBits != 0)
1563 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1567 StoreMI.eraseFromParent();
1573 case TargetOpcode::G_SELECT:
1575 case TargetOpcode::G_AND:
1576 case TargetOpcode::G_OR:
1577 case TargetOpcode::G_XOR: {
1589 case TargetOpcode::G_SHL:
1590 case TargetOpcode::G_LSHR:
1591 case TargetOpcode::G_ASHR:
1593 case TargetOpcode::G_CTLZ:
1594 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1595 case TargetOpcode::G_CTTZ:
1596 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1597 case TargetOpcode::G_CTPOP:
1599 switch (
MI.getOpcode()) {
1600 case TargetOpcode::G_CTLZ:
1601 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1603 case TargetOpcode::G_CTTZ:
1604 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1606 case TargetOpcode::G_CTPOP:
1616 case TargetOpcode::G_INTTOPTR:
1624 case TargetOpcode::G_PTRTOINT:
1632 case TargetOpcode::G_PHI: {
1635 if (SizeOp0 % NarrowSize != 0)
1638 unsigned NumParts = SizeOp0 / NarrowSize;
1642 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1650 for (
unsigned i = 0; i < NumParts; ++i) {
1654 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1655 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1660 MI.eraseFromParent();
1663 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1664 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1668 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1674 case TargetOpcode::G_ICMP: {
1693 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1694 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1707 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1708 auto LHS = std::get<0>(LHSAndRHS);
1709 auto RHS = std::get<1>(LHSAndRHS);
1717 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1718 auto LHS = std::get<0>(LHSAndRHS);
1719 auto RHS = std::get<1>(LHSAndRHS);
1721 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1722 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1723 TargetOpcode::G_ZEXT);
1730 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1732 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1737 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1738 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1750 MI.eraseFromParent();
1753 case TargetOpcode::G_FCMP:
1762 case TargetOpcode::G_SEXT_INREG: {
1766 int64_t SizeInBits =
MI.getOperand(2).getImm();
1776 MO1.
setReg(TruncMIB.getReg(0));
1791 if (SizeOp0 % NarrowSize != 0)
1793 int NumParts = SizeOp0 / NarrowSize;
1801 for (
int i = 0; i < NumParts; ++i) {
1817 for (
int i = 0; i < NumParts; ++i) {
1820 PartialExtensionReg = DstRegs.
back();
1822 assert(PartialExtensionReg &&
1823 "Expected to visit partial extension before full");
1824 if (FullExtensionReg) {
1831 FullExtensionReg = DstRegs.
back();
1836 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1839 PartialExtensionReg = DstRegs.
back();
1846 MI.eraseFromParent();
1849 case TargetOpcode::G_BSWAP:
1850 case TargetOpcode::G_BITREVERSE: {
1851 if (SizeOp0 % NarrowSize != 0)
1856 unsigned NumParts = SizeOp0 / NarrowSize;
1857 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1860 for (
unsigned i = 0; i < NumParts; ++i) {
1862 {SrcRegs[NumParts - 1 - i]});
1869 MI.eraseFromParent();
1872 case TargetOpcode::G_PTR_ADD:
1873 case TargetOpcode::G_PTRMASK: {
1881 case TargetOpcode::G_FPTOUI:
1882 case TargetOpcode::G_FPTOSI:
1884 case TargetOpcode::G_FPEXT:
1891 case TargetOpcode::G_FLDEXP:
1892 case TargetOpcode::G_STRICT_FLDEXP:
1894 case TargetOpcode::G_VSCALE: {
1905 MI.eraseFromParent();
1933 unsigned OpIdx,
unsigned ExtOpcode) {
1936 MO.
setReg(ExtB.getReg(0));
1943 MO.
setReg(ExtB.getReg(0));
1947 unsigned OpIdx,
unsigned TruncOpcode) {
1956 unsigned OpIdx,
unsigned ExtOpcode) {
1995LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2000 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2001 if (DstTy.isVector())
2008 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2010 unsigned NumOps =
MI.getNumOperands();
2011 unsigned NumSrc =
MI.getNumOperands() - 1;
2012 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2014 if (WideSize >= DstSize) {
2018 for (
unsigned I = 2;
I != NumOps; ++
I) {
2019 const unsigned Offset = (
I - 1) * PartSize;
2026 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
2032 ResultReg = NextResult;
2035 if (WideSize > DstSize)
2037 else if (DstTy.isPointer())
2040 MI.eraseFromParent();
2065 const int GCD = std::gcd(SrcSize, WideSize);
2076 if (GCD == SrcSize) {
2080 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2086 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2088 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2092 const int PartsPerGCD = WideSize / GCD;
2096 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2111 MI.eraseFromParent();
2116LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2121 int NumDst =
MI.getNumOperands() - 1;
2122 Register SrcReg =
MI.getOperand(NumDst).getReg();
2127 Register Dst0Reg =
MI.getOperand(0).getReg();
2137 dbgs() <<
"Not casting non-integral address space integer\n");
2158 for (
int I = 1;
I != NumDst; ++
I) {
2164 MI.eraseFromParent();
2175 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2200 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2205 if (PartsPerRemerge == 1) {
2208 for (
int I = 0;
I != NumUnmerge; ++
I) {
2211 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2212 int Idx =
I * PartsPerUnmerge + J;
2214 MIB.addDef(
MI.getOperand(
Idx).getReg());
2221 MIB.addUse(Unmerge.getReg(
I));
2225 for (
int J = 0; J != NumUnmerge; ++J)
2226 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2229 for (
int I = 0;
I != NumDst; ++
I) {
2230 for (
int J = 0; J < PartsPerRemerge; ++J) {
2231 const int Idx =
I * PartsPerRemerge + J;
2236 RemergeParts.
clear();
2240 MI.eraseFromParent();
2245LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2247 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2248 unsigned Offset =
MI.getOperand(2).getImm();
2251 if (SrcTy.
isVector() || DstTy.isVector())
2267 if (DstTy.isPointer())
2274 MI.eraseFromParent();
2279 LLT ShiftTy = SrcTy;
2288 MI.eraseFromParent();
2319LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2321 if (TypeIdx != 0 || WideTy.
isVector())
2331LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2335 std::optional<Register> CarryIn;
2336 switch (
MI.getOpcode()) {
2339 case TargetOpcode::G_SADDO:
2340 Opcode = TargetOpcode::G_ADD;
2341 ExtOpcode = TargetOpcode::G_SEXT;
2343 case TargetOpcode::G_SSUBO:
2344 Opcode = TargetOpcode::G_SUB;
2345 ExtOpcode = TargetOpcode::G_SEXT;
2347 case TargetOpcode::G_UADDO:
2348 Opcode = TargetOpcode::G_ADD;
2349 ExtOpcode = TargetOpcode::G_ZEXT;
2351 case TargetOpcode::G_USUBO:
2352 Opcode = TargetOpcode::G_SUB;
2353 ExtOpcode = TargetOpcode::G_ZEXT;
2355 case TargetOpcode::G_SADDE:
2356 Opcode = TargetOpcode::G_UADDE;
2357 ExtOpcode = TargetOpcode::G_SEXT;
2358 CarryIn =
MI.getOperand(4).getReg();
2360 case TargetOpcode::G_SSUBE:
2361 Opcode = TargetOpcode::G_USUBE;
2362 ExtOpcode = TargetOpcode::G_SEXT;
2363 CarryIn =
MI.getOperand(4).getReg();
2365 case TargetOpcode::G_UADDE:
2366 Opcode = TargetOpcode::G_UADDE;
2367 ExtOpcode = TargetOpcode::G_ZEXT;
2368 CarryIn =
MI.getOperand(4).getReg();
2370 case TargetOpcode::G_USUBE:
2371 Opcode = TargetOpcode::G_USUBE;
2372 ExtOpcode = TargetOpcode::G_ZEXT;
2373 CarryIn =
MI.getOperand(4).getReg();
2394 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2397 {LHSExt, RHSExt, *CarryIn})
2409 MI.eraseFromParent();
2414LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2416 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2417 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2418 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2419 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2420 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2445 {ShiftL, ShiftR},
MI.getFlags());
2453 MI.eraseFromParent();
2458LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2467 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2470 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2477 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2486 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2489 if (WideMulCanOverflow)
2491 {LeftOperand, RightOperand});
2512 if (WideMulCanOverflow) {
2520 MI.eraseFromParent();
2526 unsigned Opcode =
MI.getOpcode();
2530 case TargetOpcode::G_ATOMICRMW_XCHG:
2531 case TargetOpcode::G_ATOMICRMW_ADD:
2532 case TargetOpcode::G_ATOMICRMW_SUB:
2533 case TargetOpcode::G_ATOMICRMW_AND:
2534 case TargetOpcode::G_ATOMICRMW_OR:
2535 case TargetOpcode::G_ATOMICRMW_XOR:
2536 case TargetOpcode::G_ATOMICRMW_MIN:
2537 case TargetOpcode::G_ATOMICRMW_MAX:
2538 case TargetOpcode::G_ATOMICRMW_UMIN:
2539 case TargetOpcode::G_ATOMICRMW_UMAX:
2540 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2546 case TargetOpcode::G_ATOMIC_CMPXCHG:
2547 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2554 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2564 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2569 case TargetOpcode::G_EXTRACT:
2570 return widenScalarExtract(
MI, TypeIdx, WideTy);
2571 case TargetOpcode::G_INSERT:
2572 return widenScalarInsert(
MI, TypeIdx, WideTy);
2573 case TargetOpcode::G_MERGE_VALUES:
2574 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2575 case TargetOpcode::G_UNMERGE_VALUES:
2576 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2577 case TargetOpcode::G_SADDO:
2578 case TargetOpcode::G_SSUBO:
2579 case TargetOpcode::G_UADDO:
2580 case TargetOpcode::G_USUBO:
2581 case TargetOpcode::G_SADDE:
2582 case TargetOpcode::G_SSUBE:
2583 case TargetOpcode::G_UADDE:
2584 case TargetOpcode::G_USUBE:
2585 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2586 case TargetOpcode::G_UMULO:
2587 case TargetOpcode::G_SMULO:
2588 return widenScalarMulo(
MI, TypeIdx, WideTy);
2589 case TargetOpcode::G_SADDSAT:
2590 case TargetOpcode::G_SSUBSAT:
2591 case TargetOpcode::G_SSHLSAT:
2592 case TargetOpcode::G_UADDSAT:
2593 case TargetOpcode::G_USUBSAT:
2594 case TargetOpcode::G_USHLSAT:
2595 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2596 case TargetOpcode::G_CTTZ:
2597 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2598 case TargetOpcode::G_CTLZ:
2599 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2600 case TargetOpcode::G_CTPOP: {
2611 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2612 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2613 ? TargetOpcode::G_ANYEXT
2614 : TargetOpcode::G_ZEXT;
2617 unsigned NewOpc = Opcode;
2618 if (NewOpc == TargetOpcode::G_CTTZ) {
2627 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2632 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2644 if (Opcode == TargetOpcode::G_CTLZ) {
2651 MI.eraseFromParent();
2654 case TargetOpcode::G_BSWAP: {
2663 MI.getOperand(0).setReg(DstExt);
2676 case TargetOpcode::G_BITREVERSE: {
2685 MI.getOperand(0).setReg(DstExt);
2694 case TargetOpcode::G_FREEZE:
2695 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2702 case TargetOpcode::G_ABS:
2709 case TargetOpcode::G_ADD:
2710 case TargetOpcode::G_AND:
2711 case TargetOpcode::G_MUL:
2712 case TargetOpcode::G_OR:
2713 case TargetOpcode::G_XOR:
2714 case TargetOpcode::G_SUB:
2715 case TargetOpcode::G_SHUFFLE_VECTOR:
2726 case TargetOpcode::G_SBFX:
2727 case TargetOpcode::G_UBFX:
2741 case TargetOpcode::G_SHL:
2757 case TargetOpcode::G_ROTR:
2758 case TargetOpcode::G_ROTL:
2767 case TargetOpcode::G_SDIV:
2768 case TargetOpcode::G_SREM:
2769 case TargetOpcode::G_SMIN:
2770 case TargetOpcode::G_SMAX:
2778 case TargetOpcode::G_SDIVREM:
2787 case TargetOpcode::G_ASHR:
2788 case TargetOpcode::G_LSHR:
2792 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2793 : TargetOpcode::G_ZEXT;
2806 case TargetOpcode::G_UDIV:
2807 case TargetOpcode::G_UREM:
2808 case TargetOpcode::G_UMIN:
2809 case TargetOpcode::G_UMAX:
2817 case TargetOpcode::G_UDIVREM:
2826 case TargetOpcode::G_SELECT:
2843 case TargetOpcode::G_FPTOSI:
2844 case TargetOpcode::G_FPTOUI:
2845 case TargetOpcode::G_INTRINSIC_LRINT:
2846 case TargetOpcode::G_INTRINSIC_LLRINT:
2847 case TargetOpcode::G_IS_FPCLASS:
2857 case TargetOpcode::G_SITOFP:
2867 case TargetOpcode::G_UITOFP:
2877 case TargetOpcode::G_LOAD:
2878 case TargetOpcode::G_SEXTLOAD:
2879 case TargetOpcode::G_ZEXTLOAD:
2885 case TargetOpcode::G_STORE: {
2896 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2902 case TargetOpcode::G_CONSTANT: {
2906 MRI.
getType(
MI.getOperand(0).getReg()));
2907 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2908 ExtOpc == TargetOpcode::G_ANYEXT) &&
2911 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2915 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
2921 case TargetOpcode::G_FCONSTANT: {
2929 MI.eraseFromParent();
2932 case TargetOpcode::G_IMPLICIT_DEF: {
2938 case TargetOpcode::G_BRCOND:
2944 case TargetOpcode::G_FCMP:
2955 case TargetOpcode::G_ICMP:
2961 MI.getOperand(1).getPredicate()))
2962 ? TargetOpcode::G_SEXT
2963 : TargetOpcode::G_ZEXT;
2970 case TargetOpcode::G_PTR_ADD:
2971 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2977 case TargetOpcode::G_PHI: {
2978 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2981 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2993 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3001 TargetOpcode::G_ANYEXT);
3016 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3052 case TargetOpcode::G_FADD:
3053 case TargetOpcode::G_FMUL:
3054 case TargetOpcode::G_FSUB:
3055 case TargetOpcode::G_FMA:
3056 case TargetOpcode::G_FMAD:
3057 case TargetOpcode::G_FNEG:
3058 case TargetOpcode::G_FABS:
3059 case TargetOpcode::G_FCANONICALIZE:
3060 case TargetOpcode::G_FMINNUM:
3061 case TargetOpcode::G_FMAXNUM:
3062 case TargetOpcode::G_FMINNUM_IEEE:
3063 case TargetOpcode::G_FMAXNUM_IEEE:
3064 case TargetOpcode::G_FMINIMUM:
3065 case TargetOpcode::G_FMAXIMUM:
3066 case TargetOpcode::G_FDIV:
3067 case TargetOpcode::G_FREM:
3068 case TargetOpcode::G_FCEIL:
3069 case TargetOpcode::G_FFLOOR:
3070 case TargetOpcode::G_FCOS:
3071 case TargetOpcode::G_FSIN:
3072 case TargetOpcode::G_FTAN:
3073 case TargetOpcode::G_FACOS:
3074 case TargetOpcode::G_FASIN:
3075 case TargetOpcode::G_FATAN:
3076 case TargetOpcode::G_FCOSH:
3077 case TargetOpcode::G_FSINH:
3078 case TargetOpcode::G_FTANH:
3079 case TargetOpcode::G_FLOG10:
3080 case TargetOpcode::G_FLOG:
3081 case TargetOpcode::G_FLOG2:
3082 case TargetOpcode::G_FRINT:
3083 case TargetOpcode::G_FNEARBYINT:
3084 case TargetOpcode::G_FSQRT:
3085 case TargetOpcode::G_FEXP:
3086 case TargetOpcode::G_FEXP2:
3087 case TargetOpcode::G_FEXP10:
3088 case TargetOpcode::G_FPOW:
3089 case TargetOpcode::G_INTRINSIC_TRUNC:
3090 case TargetOpcode::G_INTRINSIC_ROUND:
3091 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3095 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3101 case TargetOpcode::G_FPOWI:
3102 case TargetOpcode::G_FLDEXP:
3103 case TargetOpcode::G_STRICT_FLDEXP: {
3105 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3126 case TargetOpcode::G_FFREXP: {
3139 case TargetOpcode::G_INTTOPTR:
3147 case TargetOpcode::G_PTRTOINT:
3155 case TargetOpcode::G_BUILD_VECTOR: {
3159 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3173 case TargetOpcode::G_SEXT_INREG:
3182 case TargetOpcode::G_PTRMASK: {
3190 case TargetOpcode::G_VECREDUCE_FADD:
3191 case TargetOpcode::G_VECREDUCE_FMUL:
3192 case TargetOpcode::G_VECREDUCE_FMIN:
3193 case TargetOpcode::G_VECREDUCE_FMAX:
3194 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3195 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3209 case TargetOpcode::G_VSCALE: {
3216 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3221 case TargetOpcode::G_SPLAT_VECTOR: {
3235 auto Unmerge =
B.buildUnmerge(Ty, Src);
3236 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3245 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3247 LLT DstLLT =
MRI.getType(DstReg);
3268 MI.eraseFromParent();
3279 MI.eraseFromParent();
3286 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3291 if (DstTy.isVector()) {
3292 int NumDstElt = DstTy.getNumElements();
3296 LLT DstCastTy = DstEltTy;
3297 LLT SrcPartTy = SrcEltTy;
3301 if (NumSrcElt < NumDstElt) {
3311 SrcPartTy = SrcEltTy;
3312 }
else if (NumSrcElt > NumDstElt) {
3323 DstCastTy = DstEltTy;
3333 MI.eraseFromParent();
3337 if (DstTy.isVector()) {
3341 MI.eraseFromParent();
3357 unsigned NewEltSize,
3358 unsigned OldEltSize) {
3359 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3360 LLT IdxTy =
B.getMRI()->getType(
Idx);
3363 auto OffsetMask =
B.buildConstant(
3365 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3366 return B.buildShl(IdxTy, OffsetIdx,
3367 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3382 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3386 unsigned OldNumElts = SrcVecTy.getNumElements();
3393 if (NewNumElts > OldNumElts) {
3404 if (NewNumElts % OldNumElts != 0)
3408 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3417 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3421 NewOps[
I] = Elt.getReg(0);
3426 MI.eraseFromParent();
3430 if (NewNumElts < OldNumElts) {
3431 if (NewEltSize % OldEltSize != 0)
3453 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3472 MI.eraseFromParent();
3486 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3487 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3488 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3489 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3492 auto EltMask =
B.buildConstant(
3496 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3497 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3500 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3504 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3518 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3519 MI.getFirst4RegLLTs();
3531 if (NewNumElts < OldNumElts) {
3532 if (NewEltSize % OldEltSize != 0)
3541 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3561 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3565 MI.eraseFromParent();
3589 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3595 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3599 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3600 return UnableToLegalize;
3605 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3607 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3616 MI.eraseFromParent();
3632 if (MemSizeInBits != MemStoreSizeInBits) {
3652 if (isa<GSExtLoad>(LoadMI)) {
3655 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3664 if (DstTy != LoadTy)
3690 uint64_t LargeSplitSize, SmallSplitSize;
3695 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3705 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3730 LargeSplitSize / 8);
3734 SmallPtr, *SmallMMO);
3739 if (AnyExtTy == DstTy)
3774 if (StoreWidth != StoreSizeInBits) {
3809 uint64_t LargeSplitSize, SmallSplitSize;
3812 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3819 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3858 switch (
MI.getOpcode()) {
3859 case TargetOpcode::G_LOAD: {
3877 case TargetOpcode::G_STORE: {
3893 case TargetOpcode::G_SELECT: {
3899 dbgs() <<
"bitcast action not implemented for vector select\n");
3910 case TargetOpcode::G_AND:
3911 case TargetOpcode::G_OR:
3912 case TargetOpcode::G_XOR: {
3920 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3922 case TargetOpcode::G_INSERT_VECTOR_ELT:
3924 case TargetOpcode::G_CONCAT_VECTORS:
3932void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3940 using namespace TargetOpcode;
3942 switch(
MI.getOpcode()) {
3945 case TargetOpcode::G_FCONSTANT:
3947 case TargetOpcode::G_BITCAST:
3949 case TargetOpcode::G_SREM:
3950 case TargetOpcode::G_UREM: {
3954 {MI.getOperand(1), MI.getOperand(2)});
3958 MI.eraseFromParent();
3961 case TargetOpcode::G_SADDO:
3962 case TargetOpcode::G_SSUBO:
3964 case TargetOpcode::G_UMULH:
3965 case TargetOpcode::G_SMULH:
3967 case TargetOpcode::G_SMULO:
3968 case TargetOpcode::G_UMULO: {
3971 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
3974 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3975 ? TargetOpcode::G_SMULH
3976 : TargetOpcode::G_UMULH;
3980 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3981 MI.removeOperand(1);
3992 if (Opcode == TargetOpcode::G_SMULH) {
4001 case TargetOpcode::G_FNEG: {
4002 auto [Res, SubByReg] =
MI.getFirst2Regs();
4012 MI.eraseFromParent();
4015 case TargetOpcode::G_FSUB:
4016 case TargetOpcode::G_STRICT_FSUB: {
4017 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
4023 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4028 MI.eraseFromParent();
4031 case TargetOpcode::G_FMAD:
4033 case TargetOpcode::G_FFLOOR:
4035 case TargetOpcode::G_LROUND:
4036 case TargetOpcode::G_LLROUND: {
4043 MI.eraseFromParent();
4046 case TargetOpcode::G_INTRINSIC_ROUND:
4048 case TargetOpcode::G_FRINT: {
4051 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4054 case TargetOpcode::G_INTRINSIC_LRINT:
4055 case TargetOpcode::G_INTRINSIC_LLRINT: {
4062 MI.eraseFromParent();
4065 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4066 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4069 **
MI.memoperands_begin());
4072 MI.eraseFromParent();
4075 case TargetOpcode::G_LOAD:
4076 case TargetOpcode::G_SEXTLOAD:
4077 case TargetOpcode::G_ZEXTLOAD:
4079 case TargetOpcode::G_STORE:
4081 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4082 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4083 case TargetOpcode::G_CTLZ:
4084 case TargetOpcode::G_CTTZ:
4085 case TargetOpcode::G_CTPOP:
4088 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
4097 MI.eraseFromParent();
4101 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
4127 MI.eraseFromParent();
4131 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
4136 MI.eraseFromParent();
4140 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
4162 MI.eraseFromParent();
4190 case G_MERGE_VALUES:
4192 case G_UNMERGE_VALUES:
4194 case TargetOpcode::G_SEXT_INREG: {
4195 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4196 int64_t SizeInBits =
MI.getOperand(2).getImm();
4198 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4205 MI.eraseFromParent();
4208 case G_EXTRACT_VECTOR_ELT:
4209 case G_INSERT_VECTOR_ELT:
4211 case G_SHUFFLE_VECTOR:
4213 case G_VECTOR_COMPRESS:
4215 case G_DYN_STACKALLOC:
4219 case G_STACKRESTORE:
4229 case G_READ_REGISTER:
4230 case G_WRITE_REGISTER:
4275 case G_MEMCPY_INLINE:
4276 return lowerMemcpyInline(
MI);
4307 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4316 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4328 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4331 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4342 "Converting bits to bytes lost precision");
4349 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4366 std::initializer_list<unsigned> NonVecOpIndices) {
4367 if (
MI.getNumMemOperands() != 0)
4370 LLT VecTy =
MRI.getType(
MI.getReg(0));
4375 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4408 int NumParts, NumLeftover;
4409 std::tie(NumParts, NumLeftover) =
4412 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4413 for (
int i = 0; i < NumParts; ++i) {
4418 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4427 for (
unsigned i = 0; i <
N; ++i) {
4430 else if (
Op.isImm())
4432 else if (
Op.isPredicate())
4454 std::initializer_list<unsigned> NonVecOpIndices) {
4456 "Non-compatible opcode or not specified non-vector operands");
4459 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4460 unsigned NumDefs =
MI.getNumDefs();
4468 for (
unsigned i = 0; i < NumDefs; ++i) {
4477 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4478 ++UseIdx, ++UseNo) {
4481 MI.getOperand(UseIdx));
4486 for (
auto Reg : SplitPieces)
4491 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4495 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4497 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4498 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4501 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4502 Uses.push_back(InputOpsPieces[InputNo][i]);
4505 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4506 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4511 for (
unsigned i = 0; i < NumDefs; ++i)
4512 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4514 for (
unsigned i = 0; i < NumDefs; ++i)
4518 MI.eraseFromParent();
4527 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4528 unsigned NumDefs =
MI.getNumDefs();
4537 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4538 UseIdx += 2, ++UseNo) {
4546 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4548 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4554 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4555 Phi.addUse(InputOpsPieces[j][i]);
4556 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4566 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4571 MI.eraseFromParent();
4579 const int NumDst =
MI.getNumOperands() - 1;
4580 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4584 if (TypeIdx != 1 || NarrowTy == DstTy)
4610 const int PartsPerUnmerge = NumDst / NumUnmerge;
4612 for (
int I = 0;
I != NumUnmerge; ++
I) {
4615 for (
int J = 0; J != PartsPerUnmerge; ++J)
4616 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
4617 MIB.
addUse(Unmerge.getReg(
I));
4620 MI.eraseFromParent();
4627 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4631 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
4633 if (NarrowTy == SrcTy)
4643 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
4657 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
4659 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4665 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4666 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
4667 ++i,
Offset += NumNarrowTyElts) {
4674 MI.eraseFromParent();
4678 assert(TypeIdx == 0 &&
"Bad type index");
4694 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
4697 for (
unsigned i = 0; i < NumParts; ++i) {
4699 for (
unsigned j = 0; j < NumElts; ++j)
4700 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
4706 MI.eraseFromParent();
4714 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
4716 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4718 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
4720 InsertVal =
MI.getOperand(2).getReg();
4735 IdxVal = MaybeCst->Value.getSExtValue();
4739 MI.eraseFromParent();
4744 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4747 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4748 TargetOpcode::G_ANYEXT);
4753 int64_t PartIdx = IdxVal / NewNumElts;
4762 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4763 VecParts[PartIdx] = InsertPart.getReg(0);
4767 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4772 MI.eraseFromParent();
4796 bool IsLoad = isa<GLoad>(LdStMI);
4808 int NumLeftover = -1;
4814 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4816 NumParts = NarrowRegs.
size();
4817 NumLeftover = NarrowLeftoverRegs.
size();
4834 auto MMO = LdStMI.
getMMO();
4836 unsigned NumParts,
unsigned Offset) ->
unsigned {
4839 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
4841 unsigned ByteOffset =
Offset / 8;
4851 ValRegs.push_back(Dst);
4863 unsigned HandledOffset =
4864 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4868 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4871 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4872 LeftoverTy, NarrowLeftoverRegs);
4882 using namespace TargetOpcode;
4886 switch (
MI.getOpcode()) {
4887 case G_IMPLICIT_DEF:
4903 case G_FCANONICALIZE:
4920 case G_INTRINSIC_LRINT:
4921 case G_INTRINSIC_LLRINT:
4922 case G_INTRINSIC_ROUND:
4923 case G_INTRINSIC_ROUNDEVEN:
4924 case G_INTRINSIC_TRUNC:
4950 case G_FMINNUM_IEEE:
4951 case G_FMAXNUM_IEEE:
4971 case G_CTLZ_ZERO_UNDEF:
4973 case G_CTTZ_ZERO_UNDEF:
4987 case G_ADDRSPACE_CAST:
5000 case G_STRICT_FLDEXP:
5014 case G_UNMERGE_VALUES:
5016 case G_BUILD_VECTOR:
5017 assert(TypeIdx == 0 &&
"not a vector type index");
5019 case G_CONCAT_VECTORS:
5023 case G_EXTRACT_VECTOR_ELT:
5024 case G_INSERT_VECTOR_ELT:
5033 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5034 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5036 case G_SHUFFLE_VECTOR:
5042 case G_INTRINSIC_FPTRUNC_ROUND:
5052 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5053 "Not a bitcast operation");
5058 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5066 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5071 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
5076 MI.eraseFromParent();
5082 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5086 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5087 MI.getFirst3RegLLTs();
5090 if (DstTy != Src1Ty)
5092 if (DstTy != Src2Ty)
5107 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5123 unsigned InputUsed[2] = {-1U, -1U};
5124 unsigned FirstMaskIdx =
High * NewElts;
5125 bool UseBuildVector =
false;
5126 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5128 int Idx = Mask[FirstMaskIdx + MaskOffset];
5133 if (Input >= std::size(Inputs)) {
5140 Idx -= Input * NewElts;
5144 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5145 if (InputUsed[OpNo] == Input) {
5148 }
else if (InputUsed[OpNo] == -1U) {
5150 InputUsed[OpNo] = Input;
5155 if (OpNo >= std::size(InputUsed)) {
5158 UseBuildVector =
true;
5166 if (UseBuildVector) {
5171 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5173 int Idx = Mask[FirstMaskIdx + MaskOffset];
5178 if (Input >= std::size(Inputs)) {
5185 Idx -= Input * NewElts;
5189 .buildExtractVectorElement(
5190 EltTy, Inputs[Input],
5197 }
else if (InputUsed[0] == -1U) {
5201 Register Op0 = Inputs[InputUsed[0]];
5205 : Inputs[InputUsed[1]];
5214 MI.eraseFromParent();
5220 auto &RdxMI = cast<GVecReduce>(
MI);
5227 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5233 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5236 const unsigned NumParts =
5242 if (DstTy != NarrowTy)
5248 unsigned NumPartsLeft = NumParts;
5249 while (NumPartsLeft > 1) {
5250 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5253 .buildInstr(ScalarOpc, {NarrowTy},
5254 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5257 SplitSrcs = PartialResults;
5258 PartialResults.
clear();
5259 NumPartsLeft = SplitSrcs.
size();
5263 MI.eraseFromParent();
5268 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5272 MI.eraseFromParent();
5276 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5286 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5289 Register Acc = PartialReductions[0];
5290 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5291 if (Part == NumParts - 1) {
5293 {Acc, PartialReductions[Part]});
5296 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5300 MI.eraseFromParent();
5306 unsigned int TypeIdx,
5308 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5309 MI.getFirst3RegLLTs();
5310 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5314 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5315 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5316 "Unexpected vecreduce opcode");
5317 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5318 ? TargetOpcode::G_FADD
5319 : TargetOpcode::G_FMUL;
5325 for (
unsigned i = 0; i < NumParts; i++)
5330 MI.eraseFromParent();
5337 unsigned ScalarOpc) {
5345 while (SplitSrcs.
size() > 1) {
5347 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5355 SplitSrcs = std::move(PartialRdxs);
5359 MI.getOperand(1).setReg(SplitSrcs[0]);
5366 const LLT HalfTy,
const LLT AmtTy) {
5374 MI.eraseFromParent();
5380 unsigned VTBits = 2 * NVTBits;
5383 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5384 if (Amt.
ugt(VTBits)) {
5386 }
else if (Amt.
ugt(NVTBits)) {
5390 }
else if (Amt == NVTBits) {
5401 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5402 if (Amt.
ugt(VTBits)) {
5404 }
else if (Amt.
ugt(NVTBits)) {
5408 }
else if (Amt == NVTBits) {
5422 if (Amt.
ugt(VTBits)) {
5425 }
else if (Amt.
ugt(NVTBits)) {
5430 }
else if (Amt == NVTBits) {
5447 MI.eraseFromParent();
5471 if (DstEltSize % 2 != 0)
5477 const unsigned NewBitSize = DstEltSize / 2;
5503 switch (
MI.getOpcode()) {
5504 case TargetOpcode::G_SHL: {
5520 ResultRegs[0] =
Lo.getReg(0);
5521 ResultRegs[1] =
Hi.getReg(0);
5524 case TargetOpcode::G_LSHR:
5525 case TargetOpcode::G_ASHR: {
5535 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5549 ResultRegs[0] =
Lo.getReg(0);
5550 ResultRegs[1] =
Hi.getReg(0);
5558 MI.eraseFromParent();
5565 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5568 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5583 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
5588 "getNeutralElementForVecReduce called with invalid opcode!");
5589 case TargetOpcode::G_VECREDUCE_ADD:
5590 case TargetOpcode::G_VECREDUCE_OR:
5591 case TargetOpcode::G_VECREDUCE_XOR:
5592 case TargetOpcode::G_VECREDUCE_UMAX:
5594 case TargetOpcode::G_VECREDUCE_MUL:
5596 case TargetOpcode::G_VECREDUCE_AND:
5597 case TargetOpcode::G_VECREDUCE_UMIN:
5600 case TargetOpcode::G_VECREDUCE_SMAX:
5603 case TargetOpcode::G_VECREDUCE_SMIN:
5606 case TargetOpcode::G_VECREDUCE_FADD:
5608 case TargetOpcode::G_VECREDUCE_FMUL:
5610 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5611 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5612 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
5613 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5621 unsigned Opc =
MI.getOpcode();
5623 case TargetOpcode::G_IMPLICIT_DEF:
5624 case TargetOpcode::G_LOAD: {
5632 case TargetOpcode::G_STORE:
5639 case TargetOpcode::G_AND:
5640 case TargetOpcode::G_OR:
5641 case TargetOpcode::G_XOR:
5642 case TargetOpcode::G_ADD:
5643 case TargetOpcode::G_SUB:
5644 case TargetOpcode::G_MUL:
5645 case TargetOpcode::G_FADD:
5646 case TargetOpcode::G_FSUB:
5647 case TargetOpcode::G_FMUL:
5648 case TargetOpcode::G_FDIV:
5649 case TargetOpcode::G_FCOPYSIGN:
5650 case TargetOpcode::G_UADDSAT:
5651 case TargetOpcode::G_USUBSAT:
5652 case TargetOpcode::G_SADDSAT:
5653 case TargetOpcode::G_SSUBSAT:
5654 case TargetOpcode::G_SMIN:
5655 case TargetOpcode::G_SMAX:
5656 case TargetOpcode::G_UMIN:
5657 case TargetOpcode::G_UMAX:
5658 case TargetOpcode::G_FMINNUM:
5659 case TargetOpcode::G_FMAXNUM:
5660 case TargetOpcode::G_FMINNUM_IEEE:
5661 case TargetOpcode::G_FMAXNUM_IEEE:
5662 case TargetOpcode::G_FMINIMUM:
5663 case TargetOpcode::G_FMAXIMUM:
5664 case TargetOpcode::G_STRICT_FADD:
5665 case TargetOpcode::G_STRICT_FSUB:
5666 case TargetOpcode::G_STRICT_FMUL:
5667 case TargetOpcode::G_SHL:
5668 case TargetOpcode::G_ASHR:
5669 case TargetOpcode::G_LSHR: {
5677 case TargetOpcode::G_FMA:
5678 case TargetOpcode::G_STRICT_FMA:
5679 case TargetOpcode::G_FSHR:
5680 case TargetOpcode::G_FSHL: {
5689 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5690 case TargetOpcode::G_EXTRACT:
5697 case TargetOpcode::G_INSERT:
5698 case TargetOpcode::G_INSERT_VECTOR_ELT:
5699 case TargetOpcode::G_FREEZE:
5700 case TargetOpcode::G_FNEG:
5701 case TargetOpcode::G_FABS:
5702 case TargetOpcode::G_FSQRT:
5703 case TargetOpcode::G_FCEIL:
5704 case TargetOpcode::G_FFLOOR:
5705 case TargetOpcode::G_FNEARBYINT:
5706 case TargetOpcode::G_FRINT:
5707 case TargetOpcode::G_INTRINSIC_ROUND:
5708 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5709 case TargetOpcode::G_INTRINSIC_TRUNC:
5710 case TargetOpcode::G_BSWAP:
5711 case TargetOpcode::G_FCANONICALIZE:
5712 case TargetOpcode::G_SEXT_INREG:
5713 case TargetOpcode::G_ABS:
5721 case TargetOpcode::G_SELECT: {
5722 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
5724 if (!CondTy.isScalar() ||
5732 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5737 if (CondTy.isVector())
5747 case TargetOpcode::G_UNMERGE_VALUES:
5749 case TargetOpcode::G_PHI:
5751 case TargetOpcode::G_SHUFFLE_VECTOR:
5753 case TargetOpcode::G_BUILD_VECTOR: {
5755 for (
auto Op :
MI.uses()) {
5765 MI.eraseFromParent();
5768 case TargetOpcode::G_SEXT:
5769 case TargetOpcode::G_ZEXT:
5770 case TargetOpcode::G_ANYEXT:
5771 case TargetOpcode::G_TRUNC:
5772 case TargetOpcode::G_FPTRUNC:
5773 case TargetOpcode::G_FPEXT:
5774 case TargetOpcode::G_FPTOSI:
5775 case TargetOpcode::G_FPTOUI:
5776 case TargetOpcode::G_SITOFP:
5777 case TargetOpcode::G_UITOFP: {
5797 case TargetOpcode::G_ICMP:
5798 case TargetOpcode::G_FCMP: {
5812 case TargetOpcode::G_BITCAST: {
5833 case TargetOpcode::G_VECREDUCE_FADD:
5834 case TargetOpcode::G_VECREDUCE_FMUL:
5835 case TargetOpcode::G_VECREDUCE_ADD:
5836 case TargetOpcode::G_VECREDUCE_MUL:
5837 case TargetOpcode::G_VECREDUCE_AND:
5838 case TargetOpcode::G_VECREDUCE_OR:
5839 case TargetOpcode::G_VECREDUCE_XOR:
5840 case TargetOpcode::G_VECREDUCE_SMAX:
5841 case TargetOpcode::G_VECREDUCE_SMIN:
5842 case TargetOpcode::G_VECREDUCE_UMAX:
5843 case TargetOpcode::G_VECREDUCE_UMIN: {
5847 auto NeutralElement = getNeutralElementForVecReduce(
5855 NeutralElement,
Idx);
5859 MO.
setReg(NewVec.getReg(0));
5871 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5873 unsigned MaskNumElts = Mask.size();
5877 if (MaskNumElts == SrcNumElts)
5880 if (MaskNumElts < SrcNumElts) {
5884 for (
unsigned I = MaskNumElts;
I < SrcNumElts; ++
I)
5890 MI.getOperand(1).getReg(),
5891 MI.getOperand(2).getReg(), NewMask);
5892 MI.eraseFromParent();
5897 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
5898 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5906 MOps1[0] =
MI.getOperand(1).getReg();
5907 MOps2[0] =
MI.getOperand(2).getReg();
5914 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
5916 if (
Idx >=
static_cast<int>(SrcNumElts))
5917 Idx += PaddedMaskNumElts - SrcNumElts;
5922 if (MaskNumElts != PaddedMaskNumElts) {
5927 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
5937 MI.eraseFromParent();
5943 unsigned int TypeIdx,
LLT MoreTy) {
5944 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
5946 unsigned NumElts = DstTy.getNumElements();
5949 if (DstTy.isVector() && Src1Ty.isVector() &&
5950 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5958 if (DstTy != Src1Ty || DstTy != Src2Ty)
5966 for (
unsigned I = 0;
I != NumElts; ++
I) {
5968 if (
Idx <
static_cast<int>(NumElts))
5973 for (
unsigned I = NumElts;
I != WidenNumElts; ++
I)
5978 MI.getOperand(1).getReg(),
5979 MI.getOperand(2).getReg(), NewMask);
5980 MI.eraseFromParent();
5989 unsigned SrcParts = Src1Regs.
size();
5990 unsigned DstParts = DstRegs.
size();
5992 unsigned DstIdx = 0;
5994 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5995 DstRegs[DstIdx] = FactorSum;
5997 unsigned CarrySumPrevDstIdx;
6000 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6002 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6003 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6005 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6009 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6010 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6012 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6022 if (DstIdx != DstParts - 1) {
6024 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
6025 FactorSum = Uaddo.
getReg(0);
6026 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
6027 for (
unsigned i = 2; i < Factors.
size(); ++i) {
6029 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
6030 FactorSum = Uaddo.
getReg(0);
6032 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6036 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6037 for (
unsigned i = 2; i < Factors.
size(); ++i)
6038 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6041 CarrySumPrevDstIdx = CarrySum;
6042 DstRegs[DstIdx] = FactorSum;
6059 unsigned Opcode =
MI.getOpcode();
6060 unsigned OpO, OpE, OpF;
6062 case TargetOpcode::G_SADDO:
6063 case TargetOpcode::G_SADDE:
6064 case TargetOpcode::G_UADDO:
6065 case TargetOpcode::G_UADDE:
6066 case TargetOpcode::G_ADD:
6067 OpO = TargetOpcode::G_UADDO;
6068 OpE = TargetOpcode::G_UADDE;
6069 OpF = TargetOpcode::G_UADDE;
6070 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6071 OpF = TargetOpcode::G_SADDE;
6073 case TargetOpcode::G_SSUBO:
6074 case TargetOpcode::G_SSUBE:
6075 case TargetOpcode::G_USUBO:
6076 case TargetOpcode::G_USUBE:
6077 case TargetOpcode::G_SUB:
6078 OpO = TargetOpcode::G_USUBO;
6079 OpE = TargetOpcode::G_USUBE;
6080 OpF = TargetOpcode::G_USUBE;
6081 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6082 OpF = TargetOpcode::G_SSUBE;
6089 unsigned NumDefs =
MI.getNumExplicitDefs();
6090 Register Src1 =
MI.getOperand(NumDefs).getReg();
6091 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
6094 CarryDst =
MI.getOperand(1).getReg();
6095 if (
MI.getNumOperands() == NumDefs + 3)
6096 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
6099 LLT LeftoverTy, DummyTy;
6101 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6106 int NarrowParts = Src1Regs.
size();
6107 for (
int I = 0, E = Src1Left.
size();
I != E; ++
I) {
6113 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
6118 if (i == e - 1 && CarryDst)
6119 CarryOut = CarryDst;
6123 {Src1Regs[i], Src2Regs[i]});
6124 }
else if (i == e - 1) {
6126 {Src1Regs[i], Src2Regs[i], CarryIn});
6129 {Src1Regs[i], Src2Regs[i], CarryIn});
6135 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
6136 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6137 ArrayRef(DstRegs).drop_front(NarrowParts));
6139 MI.eraseFromParent();
6145 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
6153 if (
Size % NarrowSize != 0)
6156 unsigned NumParts =
Size / NarrowSize;
6157 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
6158 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6164 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6169 MI.eraseFromParent();
6179 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6193 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6209 if (SizeOp1 % NarrowSize != 0)
6211 int NumParts = SizeOp1 / NarrowSize;
6215 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6219 uint64_t OpStart =
MI.getOperand(2).getImm();
6221 for (
int i = 0; i < NumParts; ++i) {
6222 unsigned SrcStart = i * NarrowSize;
6224 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6227 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6235 int64_t ExtractOffset;
6237 if (OpStart < SrcStart) {
6239 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6241 ExtractOffset = OpStart - SrcStart;
6242 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6246 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6258 else if (DstRegs.
size() > 1)
6262 MI.eraseFromParent();
6277 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6285 uint64_t OpStart =
MI.getOperand(3).getImm();
6287 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6288 unsigned DstStart =
I * NarrowSize;
6290 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6298 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6304 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6312 int64_t ExtractOffset, InsertOffset;
6314 if (OpStart < DstStart) {
6316 ExtractOffset = DstStart - OpStart;
6317 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6319 InsertOffset = OpStart - DstStart;
6322 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6326 if (ExtractOffset != 0 || SegSize != OpSize) {
6346 MI.eraseFromParent();
6356 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6362 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6363 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6367 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6368 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6371 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6373 {Src0Regs[I], Src1Regs[I]});
6377 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6380 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6381 DstLeftoverRegs.
push_back(Inst.getReg(0));
6384 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6385 LeftoverTy, DstLeftoverRegs);
6387 MI.eraseFromParent();
6397 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6404 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6405 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6406 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6408 MI.eraseFromParent();
6418 Register CondReg =
MI.getOperand(1).getReg();
6430 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6431 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6435 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6436 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6439 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6441 CondReg, Src1Regs[
I], Src2Regs[
I]);
6445 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6447 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6451 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6452 LeftoverTy, DstLeftoverRegs);
6454 MI.eraseFromParent();
6464 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6468 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6471 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6473 auto C_0 =
B.buildConstant(NarrowTy, 0);
6475 UnmergeSrc.getReg(1), C_0);
6476 auto LoCTLZ = IsUndef ?
6477 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6478 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6479 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6480 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6481 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6482 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6484 MI.eraseFromParent();
6497 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6501 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6504 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6506 auto C_0 =
B.buildConstant(NarrowTy, 0);
6508 UnmergeSrc.getReg(0), C_0);
6509 auto HiCTTZ = IsUndef ?
6510 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6511 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6512 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6513 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6514 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6515 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6517 MI.eraseFromParent();
6530 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6540 MI.eraseFromParent();
6560 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6561 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6562 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6563 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6565 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6567 MI.getOperand(2).setReg(Trunc.getReg(0));
6574 unsigned Opc =
MI.getOpcode();
6583 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6586 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
6590 case TargetOpcode::G_CTLZ: {
6591 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6594 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6602 MI.eraseFromParent();
6618 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6622 Op = MIBOp.getReg(0);
6627 MI.eraseFromParent();
6630 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6633 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
6637 case TargetOpcode::G_CTTZ: {
6638 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6641 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6650 MI.eraseFromParent();
6661 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6662 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6666 MI.eraseFromParent();
6670 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
6671 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6675 case TargetOpcode::G_CTPOP: {
6686 auto C_1 =
B.buildConstant(Ty, 1);
6687 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
6689 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
6690 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6691 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
6695 auto C_2 =
B.buildConstant(Ty, 2);
6696 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
6698 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
6699 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6700 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6701 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6708 auto C_4 =
B.buildConstant(Ty, 4);
6709 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
6710 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
6712 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
6713 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6715 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
6721 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
6723 auto IsMulSupported = [
this](
const LLT Ty) {
6724 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6727 if (IsMulSupported(Ty)) {
6728 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
6729 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6731 auto ResTmp = B8Count;
6732 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
6733 auto ShiftC =
B.buildConstant(Ty, Shift);
6734 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
6735 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
6737 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6739 MI.eraseFromParent();
6752 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
6760 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6769 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6770 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6793 MI.eraseFromParent();
6799 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6804 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6848 MI.eraseFromParent();
6862 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6863 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6867 return lowerFunnelShiftAsShifts(
MI);
6871 if (Result == UnableToLegalize)
6872 return lowerFunnelShiftAsShifts(
MI);
6877 auto [Dst, Src] =
MI.getFirst2Regs();
6891 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6905 {UnmergeSrc.getReg(0)});
6907 {UnmergeSrc.getReg(1)});
6912 MI.eraseFromParent();
6929 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
6933 LLT DstTy =
MRI.getType(DstReg);
6934 LLT SrcTy =
MRI.getType(SrcReg);
6954 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
6968 MI.eraseFromParent();
6977 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6979 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6980 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6983 MI.eraseFromParent();
6988 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6990 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6991 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6996 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6999 return lowerRotateWithReverseRotate(
MI);
7002 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7003 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7004 bool IsFShLegal =
false;
7005 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7006 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7010 MI.eraseFromParent();
7015 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7018 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7023 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7024 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7025 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
7031 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
7032 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7034 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7040 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
7041 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
7043 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7045 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7049 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
7050 MI.eraseFromParent();
7058 auto [Dst, Src] =
MI.getFirst2Regs();
7108 MI.eraseFromParent();
7113 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7119 MI.eraseFromParent();
7138 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7148 MI.eraseFromParent();
7173 MI.eraseFromParent();
7181 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7185 if (SrcTy !=
S64 && SrcTy !=
S32)
7187 if (DstTy !=
S32 && DstTy !=
S64)
7216 MI.eraseFromParent();
7221 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7280 MI.eraseFromParent();
7290 auto [Dst, Src] =
MI.getFirst2Regs();
7298 unsigned Flags =
MI.getFlags();
7301 MI.eraseFromParent();
7305 const unsigned ExpMask = 0x7ff;
7306 const unsigned ExpBiasf64 = 1023;
7307 const unsigned ExpBiasf16 = 15;
7396 MI.eraseFromParent();
7402 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7413 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7418 MI.eraseFromParent();
7424 case TargetOpcode::G_SMIN:
7426 case TargetOpcode::G_SMAX:
7428 case TargetOpcode::G_UMIN:
7430 case TargetOpcode::G_UMAX:
7438 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7446 MI.eraseFromParent();
7476 MI.eraseFromParent();
7482 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
7483 const int Src0Size = Src0Ty.getScalarSizeInBits();
7484 const int Src1Size = Src1Ty.getScalarSizeInBits();
7494 if (Src0Ty == Src1Ty) {
7496 }
else if (Src0Size > Src1Size) {
7511 unsigned Flags =
MI.getFlags();
7518 MI.eraseFromParent();
7524 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7525 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7527 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7547 MI.eraseFromParent();
7555 unsigned Flags =
MI.getFlags();
7560 MI.eraseFromParent();
7566 auto [DstReg,
X] =
MI.getFirst2Regs();
7567 const unsigned Flags =
MI.getFlags();
7594 MI.eraseFromParent();
7599 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7600 unsigned Flags =
MI.getFlags();
7612 SrcReg, Zero, Flags);
7614 SrcReg, Trunc, Flags);
7619 MI.eraseFromParent();
7625 const unsigned NumOps =
MI.getNumOperands();
7626 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
7627 unsigned PartSize = Src0Ty.getSizeInBits();
7632 for (
unsigned I = 2;
I != NumOps; ++
I) {
7633 const unsigned Offset = (
I - 1) * PartSize;
7638 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
7644 ResultReg = NextResult;
7647 if (DstTy.isPointer()) {
7649 DstTy.getAddressSpace())) {
7657 MI.eraseFromParent();
7663 const unsigned NumDst =
MI.getNumOperands() - 1;
7664 Register SrcReg =
MI.getOperand(NumDst).getReg();
7665 Register Dst0Reg =
MI.getOperand(0).getReg();
7680 unsigned Offset = DstSize;
7681 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
7687 MI.eraseFromParent();
7706 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7707 InsertVal =
MI.getOperand(2).getReg();
7721 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
7727 MI.eraseFromParent();
7732 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
7750 int64_t
Offset = IdxVal * EltBytes;
7769 MI.eraseFromParent();
7775 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7776 MI.getFirst3RegLLTs();
7784 for (
int Idx : Mask) {
7786 if (!Undef.isValid())
7792 if (Src0Ty.isScalar()) {
7795 int NumElts = Src0Ty.getNumElements();
7796 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
7797 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
7804 if (DstTy.isScalar())
7808 MI.eraseFromParent();
7814 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7815 MI.getFirst4RegLLTs();
7817 if (VecTy.isScalableVector())
7842 std::optional<APInt> PassthruSplatVal =
7845 if (PassthruSplatVal.has_value()) {
7848 }
else if (HasPassthru) {
7860 unsigned NumElmts = VecTy.getNumElements();
7861 for (
unsigned I = 0;
I < NumElmts; ++
I) {
7876 if (HasPassthru &&
I == NumElmts - 1) {
7882 {OutPos, EndOfVector});
7895 MI.eraseFromParent();
7912 if (Alignment >
Align(1)) {
7924 const auto &MF = *
MI.getMF();
7925 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7930 Register AllocSize =
MI.getOperand(1).getReg();
7941 MI.eraseFromParent();
7952 MI.eraseFromParent();
7963 MI.eraseFromParent();
7969 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7970 unsigned Offset =
MI.getOperand(2).getImm();
7975 unsigned DstSize = DstTy.getSizeInBits();
7977 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7984 for (
unsigned Idx =
Offset / SrcEltSize;
7988 if (SubVectorElts.
size() == 1)
7993 MI.eraseFromParent();
7998 if (DstTy.isScalar() &&
8001 LLT SrcIntTy = SrcTy;
8015 MI.eraseFromParent();
8023 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
8035 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8048 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
8050 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
8063 MI.eraseFromParent();
8077 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
8081 LLT IntDstTy = DstTy;
8107 MI.eraseFromParent();
8113 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
8114 MI.getFirst4RegLLTs();
8115 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
8118 LLT BoolTy = Dst1Ty;
8137 auto ResultLowerThanLHS =
8145 MI.eraseFromParent();
8152 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8157 switch (
MI.getOpcode()) {
8160 case TargetOpcode::G_UADDSAT:
8163 BaseOp = TargetOpcode::G_ADD;
8165 case TargetOpcode::G_SADDSAT:
8168 BaseOp = TargetOpcode::G_ADD;
8170 case TargetOpcode::G_USUBSAT:
8173 BaseOp = TargetOpcode::G_SUB;
8175 case TargetOpcode::G_SSUBSAT:
8178 BaseOp = TargetOpcode::G_SUB;
8221 MI.eraseFromParent();
8227 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8232 unsigned OverflowOp;
8233 switch (
MI.getOpcode()) {
8236 case TargetOpcode::G_UADDSAT:
8239 OverflowOp = TargetOpcode::G_UADDO;
8241 case TargetOpcode::G_SADDSAT:
8244 OverflowOp = TargetOpcode::G_SADDO;
8246 case TargetOpcode::G_USUBSAT:
8249 OverflowOp = TargetOpcode::G_USUBO;
8251 case TargetOpcode::G_SSUBSAT:
8254 OverflowOp = TargetOpcode::G_SSUBO;
8260 Register Tmp = OverflowRes.getReg(0);
8261 Register Ov = OverflowRes.getReg(1);
8287 MI.eraseFromParent();
8293 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8294 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8295 "Expected shlsat opcode!");
8296 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8297 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8319 MI.eraseFromParent();
8324 auto [Dst, Src] =
MI.getFirst2Regs();
8327 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8336 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8338 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8350 Res.getInstr()->getOperand(0).setReg(Dst);
8352 MI.eraseFromParent();
8359 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8362 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8363 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8364 return B.buildOr(Dst,
LHS,
RHS);
8369 auto [Dst, Src] =
MI.getFirst2Regs();
8397 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
8417 MI.eraseFromParent();
8425 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8426 int NameOpIdx = IsRead ? 1 : 0;
8427 int ValRegIndex = IsRead ? 0 : 1;
8429 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
8431 const MDString *RegStr = cast<MDString>(
8432 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8443 MI.eraseFromParent();
8449 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
8450 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8459 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8465 MI.eraseFromParent();
8471 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8476 MI.eraseFromParent();
8481 MI.eraseFromParent();
8500 APInt ExpMask = Inf;
8518 LLT DstTyCopy = DstTy;
8533 Mask &= ~fcPosFinite;
8540 Mask &= ~fcNegFinite;
8551 Mask &= ~PartialCheck;
8560 else if (PartialCheck ==
fcZero)
8579 appendToRes(SubnormalRes);
8586 else if (PartialCheck ==
fcInf)
8599 if (PartialCheck ==
fcNan) {
8603 }
else if (PartialCheck ==
fcQNan) {
8613 Abs, InfWithQnanBitC);
8621 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
8624 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8635 appendToRes(NormalRes);
8639 MI.eraseFromParent();
8645 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8646 MI.getFirst4RegLLTs();
8648 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8657 if (MaskTy.isScalar()) {
8671 if (DstTy.isVector()) {
8674 MaskReg = ShufSplat.
getReg(0);
8679 }
else if (!DstTy.isVector()) {
8684 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8697 MI.eraseFromParent();
8703 unsigned Opcode =
MI.getOpcode();
8706 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8707 : TargetOpcode::G_UDIV,
8708 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8710 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8711 : TargetOpcode::G_UREM,
8712 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8713 MI.eraseFromParent();
8730 MI.eraseFromParent();
8745 MI.eraseFromParent();
8752 Register DestReg =
MI.getOperand(0).getReg();
8758 MI.eraseFromParent();
8785 Register ListPtr =
MI.getOperand(1).getReg();
8795 const Align A(
MI.getOperand(2).getImm());
8802 VAList = AndDst.
getReg(0);
8820 Align EltAlignment =
DL.getABITypeAlign(Ty);
8825 MI.eraseFromParent();
8840 unsigned Limit,
const MemOp &
Op,
8841 unsigned DstAS,
unsigned SrcAS,
8844 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
8854 if (
Op.isFixedDstAlign())
8862 unsigned NumMemOps = 0;
8866 while (TySize >
Size) {
8875 assert(NewTySize > 0 &&
"Could not find appropriate type");
8882 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
8884 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
8894 if (++NumMemOps > Limit)
8897 MemOps.push_back(Ty);
8909 if (!Ty.
isVector() && ValVRegAndVal) {
8910 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8918 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8941 auto &MF = *
MI.getParent()->getParent();
8942 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8943 auto &
DL = MF.getDataLayout();
8946 assert(KnownLen != 0 &&
"Have a zero length memset length!");
8948 bool DstAlignCanChange =
false;
8954 DstAlignCanChange =
true;
8957 std::vector<LLT> MemOps;
8959 const auto &DstMMO = **
MI.memoperands_begin();
8963 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8971 MF.getFunction().getAttributes(), TLI))
8974 if (DstAlignCanChange) {
8977 Align NewAlign =
DL.getABITypeAlign(IRTy);
8978 if (NewAlign > Alignment) {
8979 Alignment = NewAlign;
8989 LLT LargestTy = MemOps[0];
8990 for (
unsigned i = 1; i < MemOps.size(); i++)
8992 LargestTy = MemOps[i];
9005 unsigned DstOff = 0;
9006 unsigned Size = KnownLen;
9007 for (
unsigned I = 0;
I < MemOps.size();
I++) {
9010 if (TySize >
Size) {
9013 assert(
I == MemOps.size() - 1 &&
I != 0);
9014 DstOff -= TySize -
Size;
9025 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9032 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9038 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
9041 MIB.buildStore(
Value,
Ptr, *StoreMMO);
9046 MI.eraseFromParent();
9052 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9054 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9056 const auto *MMOIt =
MI.memoperands_begin();
9058 bool IsVolatile =
MemOp->isVolatile();
9064 "inline memcpy with dynamic size is not yet supported");
9065 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9066 if (KnownLen == 0) {
9067 MI.eraseFromParent();
9071 const auto &DstMMO = **
MI.memoperands_begin();
9072 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9073 Align DstAlign = DstMMO.getBaseAlign();
9074 Align SrcAlign = SrcMMO.getBaseAlign();
9076 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9083 Align SrcAlign,
bool IsVolatile) {
9084 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9085 return lowerMemcpy(
MI, Dst, Src, KnownLen,
9086 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9093 Align SrcAlign,
bool IsVolatile) {
9094 auto &MF = *
MI.getParent()->getParent();
9095 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9096 auto &
DL = MF.getDataLayout();
9099 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
9101 bool DstAlignCanChange =
false;
9103 Align Alignment = std::min(DstAlign, SrcAlign);
9107 DstAlignCanChange =
true;
9113 std::vector<LLT> MemOps;
9115 const auto &DstMMO = **
MI.memoperands_begin();
9116 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9122 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9125 MF.getFunction().getAttributes(), TLI))
9128 if (DstAlignCanChange) {
9131 Align NewAlign =
DL.getABITypeAlign(IRTy);
9136 if (!
TRI->hasStackRealignment(MF))
9137 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
9140 if (NewAlign > Alignment) {
9141 Alignment = NewAlign;
9149 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
9157 unsigned CurrOffset = 0;
9158 unsigned Size = KnownLen;
9159 for (
auto CopyTy : MemOps) {
9162 if (CopyTy.getSizeInBytes() >
Size)
9163 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
9167 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9169 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9174 if (CurrOffset != 0) {
9178 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9180 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9184 if (CurrOffset != 0) {
9186 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9188 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9189 CurrOffset += CopyTy.getSizeInBytes();
9190 Size -= CopyTy.getSizeInBytes();
9193 MI.eraseFromParent();
9201 auto &MF = *
MI.getParent()->getParent();
9202 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9203 auto &
DL = MF.getDataLayout();
9206 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9208 bool DstAlignCanChange =
false;
9211 Align Alignment = std::min(DstAlign, SrcAlign);
9215 DstAlignCanChange =
true;
9218 std::vector<LLT> MemOps;
9220 const auto &DstMMO = **
MI.memoperands_begin();
9221 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9230 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9233 MF.getFunction().getAttributes(), TLI))
9236 if (DstAlignCanChange) {
9239 Align NewAlign =
DL.getABITypeAlign(IRTy);
9244 if (!
TRI->hasStackRealignment(MF))
9245 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
9248 if (NewAlign > Alignment) {
9249 Alignment = NewAlign;
9257 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9263 unsigned CurrOffset = 0;
9265 for (
auto CopyTy : MemOps) {
9268 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9272 if (CurrOffset != 0) {
9276 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9278 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9279 CurrOffset += CopyTy.getSizeInBytes();
9283 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9284 LLT CopyTy = MemOps[
I];
9287 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9290 if (CurrOffset != 0) {
9294 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9296 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9299 MI.eraseFromParent();
9305 const unsigned Opc =
MI.getOpcode();
9308 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9309 Opc == TargetOpcode::G_MEMSET) &&
9310 "Expected memcpy like instruction");
9312 auto MMOIt =
MI.memoperands_begin();
9317 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9319 if (Opc != TargetOpcode::G_MEMSET) {
9320 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9322 SrcAlign =
MemOp->getBaseAlign();
9329 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9331 if (KnownLen == 0) {
9332 MI.eraseFromParent();
9336 bool IsVolatile =
MemOp->isVolatile();
9337 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9338 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9345 if (MaxLen && KnownLen > MaxLen)
9348 if (Opc == TargetOpcode::G_MEMCPY) {
9349 auto &MF = *
MI.getParent()->getParent();
9350 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9353 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9356 if (Opc == TargetOpcode::G_MEMMOVE)
9357 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9358 if (Opc == TargetOpcode::G_MEMSET)
9359 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)