42#define DEBUG_TYPE "legalizer"
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
55static std::pair<int, int>
61 unsigned NumParts =
Size / NarrowSize;
62 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
65 if (LeftoverSize == 0)
70 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
107 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
114 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
124 if (isa<GIntrinsic>(
MI))
127 switch (Step.Action) {
142 return bitcast(
MI, Step.TypeIdx, Step.NewType);
145 return lower(
MI, Step.TypeIdx, Step.NewType);
162void LegalizerHelper::insertParts(
Register DstReg,
184 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
186 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
188 return mergeMixedSubvectors(DstReg, AllRegs);
193 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
209void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
212 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
219 appendVectorElts(AllElts, Leftover);
227 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
229 const int StartIdx = Regs.
size();
230 const int NumResults =
MI.getNumOperands() - 1;
232 for (
int I = 0;
I != NumResults; ++
I)
233 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
239 if (SrcTy == GCDTy) {
254 extractGCDType(Parts, GCDTy, SrcReg);
258LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
260 unsigned PadStrategy) {
265 int NumOrigSrc = VRegs.
size();
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 assert(PadStrategy == TargetOpcode::G_SEXT);
298 for (
int I = 0;
I != NumParts; ++
I) {
299 bool AllMergePartsArePadding =
true;
302 for (
int J = 0; J != NumSubParts; ++J) {
303 int Idx =
I * NumSubParts + J;
304 if (
Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
309 SubMerge[J] = VRegs[
Idx];
312 AllMergePartsArePadding =
false;
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 Remerge[
I] = AllPadReg;
335 if (NumSubParts == 1)
336 Remerge[
I] = SubMerge[0];
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[
I];
345 VRegs = std::move(Remerge);
349void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
356 if (DstTy == LCMTy) {
370 UnmergeDefs[0] = DstReg;
371 for (
unsigned I = 1;
I != NumDefs; ++
I)
383#define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397#define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_MUL:
416 case TargetOpcode::G_SDIV:
418 case TargetOpcode::G_UDIV:
420 case TargetOpcode::G_SREM:
422 case TargetOpcode::G_UREM:
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
426 case TargetOpcode::G_FADD:
428 case TargetOpcode::G_FSUB:
430 case TargetOpcode::G_FMUL:
432 case TargetOpcode::G_FDIV:
434 case TargetOpcode::G_FEXP:
436 case TargetOpcode::G_FEXP2:
438 case TargetOpcode::G_FEXP10:
440 case TargetOpcode::G_FREM:
442 case TargetOpcode::G_FPOW:
444 case TargetOpcode::G_FPOWI:
446 case TargetOpcode::G_FMA:
448 case TargetOpcode::G_FSIN:
450 case TargetOpcode::G_FCOS:
452 case TargetOpcode::G_FTAN:
454 case TargetOpcode::G_FASIN:
456 case TargetOpcode::G_FACOS:
458 case TargetOpcode::G_FATAN:
460 case TargetOpcode::G_FSINH:
462 case TargetOpcode::G_FCOSH:
464 case TargetOpcode::G_FTANH:
466 case TargetOpcode::G_FLOG10:
468 case TargetOpcode::G_FLOG:
470 case TargetOpcode::G_FLOG2:
472 case TargetOpcode::G_FLDEXP:
474 case TargetOpcode::G_FCEIL:
476 case TargetOpcode::G_FFLOOR:
478 case TargetOpcode::G_FMINNUM:
480 case TargetOpcode::G_FMAXNUM:
482 case TargetOpcode::G_FSQRT:
484 case TargetOpcode::G_FRINT:
486 case TargetOpcode::G_FNEARBYINT:
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
490 case TargetOpcode::G_INTRINSIC_LRINT:
492 case TargetOpcode::G_INTRINSIC_LLRINT:
518 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
530 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
537 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
540 Register PReg = Next->getOperand(0).getReg();
548 if (Ret->getNumImplicitOperands() != 1)
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
575 Info.OrigRet = Result;
578 (Result.Ty->isVoidTy() ||
583 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder,
Info))
587 if (
MI &&
Info.LoweredTailCall) {
588 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
599 "Expected instr following MI to be return or debug inst?");
603 }
while (
MI->getNextNode());
633 Args.push_back({MO.getReg(), OpType, 0});
635 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
646 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
650 LLT OpLLT =
MRI.getType(Reg);
651 Type *OpTy =
nullptr;
656 Args.push_back({Reg, OpTy, 0});
662 unsigned Opc =
MI.getOpcode();
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
682 const char *
Name = TLI.getLibcallName(RTLibcall);
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
696 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
699 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder,
Info))
703 if (
Info.LoweredTailCall) {
704 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
715 "Expected instr following MI to be return or debug inst?");
719 }
while (
MI.getNextNode());
729 unsigned Opc =
MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(
MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
736 return RTLIB::UNKNOWN_LIBCALL;
738#define LCALLS(A, B) \
739 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
741 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
743 case TargetOpcode::G_ATOMIC_CMPXCHG:
744 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
746 return getOutlineAtomicHelper(LC, Ordering, MemSize);
748 case TargetOpcode::G_ATOMICRMW_XCHG: {
750 return getOutlineAtomicHelper(LC, Ordering, MemSize);
752 case TargetOpcode::G_ATOMICRMW_ADD:
753 case TargetOpcode::G_ATOMICRMW_SUB: {
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
757 case TargetOpcode::G_ATOMICRMW_AND: {
759 return getOutlineAtomicHelper(LC, Ordering, MemSize);
761 case TargetOpcode::G_ATOMICRMW_OR: {
763 return getOutlineAtomicHelper(LC, Ordering, MemSize);
765 case TargetOpcode::G_ATOMICRMW_XOR: {
767 return getOutlineAtomicHelper(LC, Ordering, MemSize);
770 return RTLIB::UNKNOWN_LIBCALL;
783 unsigned Opc =
MI.getOpcode();
785 case TargetOpcode::G_ATOMIC_CMPXCHG:
786 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
789 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
790 MI.getFirst4RegLLTs();
793 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
794 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
795 NewLLT) =
MI.getFirst5RegLLTs();
805 case TargetOpcode::G_ATOMICRMW_XCHG:
806 case TargetOpcode::G_ATOMICRMW_ADD:
807 case TargetOpcode::G_ATOMICRMW_SUB:
808 case TargetOpcode::G_ATOMICRMW_AND:
809 case TargetOpcode::G_ATOMICRMW_OR:
810 case TargetOpcode::G_ATOMICRMW_XOR: {
811 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
814 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
818 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
833 const char *
Name = TLI.getLibcallName(RTLibcall);
843 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
847 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
848 if (!CLI.lowerCall(MIRBuilder,
Info))
860 case TargetOpcode::G_FPEXT:
862 case TargetOpcode::G_FPTRUNC:
864 case TargetOpcode::G_FPTOSI:
866 case TargetOpcode::G_FPTOUI:
868 case TargetOpcode::G_SITOFP:
870 case TargetOpcode::G_UITOFP:
882 {{
MI.getOperand(1).
getReg(), FromType, 0}}, LocObserver, &
MI);
888 switch (
MI.getOpcode()) {
889 case TargetOpcode::G_GET_FPENV:
890 RTLibcall = RTLIB::FEGETENV;
892 case TargetOpcode::G_SET_FPENV:
893 case TargetOpcode::G_RESET_FPENV:
894 RTLibcall = RTLIB::FESETENV;
896 case TargetOpcode::G_GET_FPMODE:
897 RTLibcall = RTLIB::FEGETMODE;
899 case TargetOpcode::G_SET_FPMODE:
900 case TargetOpcode::G_RESET_FPMODE:
901 RTLibcall = RTLIB::FESETMODE;
930 auto &Ctx = MF.getFunction().getContext();
941 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
948 LocObserver,
nullptr);
970 auto &Ctx = MF.getFunction().getContext();
986 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
992 LocObserver,
nullptr);
1009 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1011 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1028 switch (
MI.getOpcode()) {
1031 case TargetOpcode::G_MUL:
1032 case TargetOpcode::G_SDIV:
1033 case TargetOpcode::G_UDIV:
1034 case TargetOpcode::G_SREM:
1035 case TargetOpcode::G_UREM:
1036 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1045 case TargetOpcode::G_FADD:
1046 case TargetOpcode::G_FSUB:
1047 case TargetOpcode::G_FMUL:
1048 case TargetOpcode::G_FDIV:
1049 case TargetOpcode::G_FMA:
1050 case TargetOpcode::G_FPOW:
1051 case TargetOpcode::G_FREM:
1052 case TargetOpcode::G_FCOS:
1053 case TargetOpcode::G_FSIN:
1054 case TargetOpcode::G_FTAN:
1055 case TargetOpcode::G_FACOS:
1056 case TargetOpcode::G_FASIN:
1057 case TargetOpcode::G_FATAN:
1058 case TargetOpcode::G_FCOSH:
1059 case TargetOpcode::G_FSINH:
1060 case TargetOpcode::G_FTANH:
1061 case TargetOpcode::G_FLOG10:
1062 case TargetOpcode::G_FLOG:
1063 case TargetOpcode::G_FLOG2:
1064 case TargetOpcode::G_FLDEXP:
1065 case TargetOpcode::G_FEXP:
1066 case TargetOpcode::G_FEXP2:
1067 case TargetOpcode::G_FEXP10:
1068 case TargetOpcode::G_FCEIL:
1069 case TargetOpcode::G_FFLOOR:
1070 case TargetOpcode::G_FMINNUM:
1071 case TargetOpcode::G_FMAXNUM:
1072 case TargetOpcode::G_FSQRT:
1073 case TargetOpcode::G_FRINT:
1074 case TargetOpcode::G_FNEARBYINT:
1075 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1080 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1088 case TargetOpcode::G_INTRINSIC_LRINT:
1089 case TargetOpcode::G_INTRINSIC_LLRINT: {
1096 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1102 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1105 MI.eraseFromParent();
1108 case TargetOpcode::G_FPOWI: {
1115 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1119 std::initializer_list<CallLowering::ArgInfo> Args = {
1120 {
MI.getOperand(1).getReg(), HLTy, 0},
1121 {
MI.getOperand(2).getReg(), ITy, 1}};
1124 Args, LocObserver, &
MI);
1129 case TargetOpcode::G_FPEXT:
1130 case TargetOpcode::G_FPTRUNC: {
1133 if (!FromTy || !ToTy)
1141 case TargetOpcode::G_FPTOSI:
1142 case TargetOpcode::G_FPTOUI: {
1147 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1155 case TargetOpcode::G_SITOFP:
1156 case TargetOpcode::G_UITOFP: {
1160 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1168 case TargetOpcode::G_ATOMICRMW_XCHG:
1169 case TargetOpcode::G_ATOMICRMW_ADD:
1170 case TargetOpcode::G_ATOMICRMW_SUB:
1171 case TargetOpcode::G_ATOMICRMW_AND:
1172 case TargetOpcode::G_ATOMICRMW_OR:
1173 case TargetOpcode::G_ATOMICRMW_XOR:
1174 case TargetOpcode::G_ATOMIC_CMPXCHG:
1175 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1181 case TargetOpcode::G_BZERO:
1182 case TargetOpcode::G_MEMCPY:
1183 case TargetOpcode::G_MEMMOVE:
1184 case TargetOpcode::G_MEMSET: {
1189 MI.eraseFromParent();
1192 case TargetOpcode::G_GET_FPENV:
1193 case TargetOpcode::G_GET_FPMODE: {
1199 case TargetOpcode::G_SET_FPENV:
1200 case TargetOpcode::G_SET_FPMODE: {
1206 case TargetOpcode::G_RESET_FPENV:
1207 case TargetOpcode::G_RESET_FPMODE: {
1216 MI.eraseFromParent();
1226 switch (
MI.getOpcode()) {
1229 case TargetOpcode::G_IMPLICIT_DEF: {
1239 if (SizeOp0 % NarrowSize != 0) {
1240 LLT ImplicitTy = NarrowTy;
1247 MI.eraseFromParent();
1251 int NumParts = SizeOp0 / NarrowSize;
1254 for (
int i = 0; i < NumParts; ++i)
1261 MI.eraseFromParent();
1264 case TargetOpcode::G_CONSTANT: {
1266 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1269 int NumParts = TotalSize / NarrowSize;
1272 for (
int I = 0;
I != NumParts; ++
I) {
1273 unsigned Offset =
I * NarrowSize;
1280 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1282 if (LeftoverBits != 0) {
1286 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1290 insertParts(
MI.getOperand(0).getReg(),
1291 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1293 MI.eraseFromParent();
1296 case TargetOpcode::G_SEXT:
1297 case TargetOpcode::G_ZEXT:
1298 case TargetOpcode::G_ANYEXT:
1300 case TargetOpcode::G_TRUNC: {
1306 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1312 MI.eraseFromParent();
1315 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1316 case TargetOpcode::G_FREEZE: {
1327 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1334 MI.eraseFromParent();
1337 case TargetOpcode::G_ADD:
1338 case TargetOpcode::G_SUB:
1339 case TargetOpcode::G_SADDO:
1340 case TargetOpcode::G_SSUBO:
1341 case TargetOpcode::G_SADDE:
1342 case TargetOpcode::G_SSUBE:
1343 case TargetOpcode::G_UADDO:
1344 case TargetOpcode::G_USUBO:
1345 case TargetOpcode::G_UADDE:
1346 case TargetOpcode::G_USUBE:
1348 case TargetOpcode::G_MUL:
1349 case TargetOpcode::G_UMULH:
1351 case TargetOpcode::G_EXTRACT:
1353 case TargetOpcode::G_INSERT:
1355 case TargetOpcode::G_LOAD: {
1356 auto &LoadMI = cast<GLoad>(
MI);
1357 Register DstReg = LoadMI.getDstReg();
1362 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1366 LoadMI.eraseFromParent();
1372 case TargetOpcode::G_ZEXTLOAD:
1373 case TargetOpcode::G_SEXTLOAD: {
1374 auto &LoadMI = cast<GExtLoad>(
MI);
1375 Register DstReg = LoadMI.getDstReg();
1376 Register PtrReg = LoadMI.getPointerReg();
1379 auto &MMO = LoadMI.getMMO();
1382 if (MemSize == NarrowSize) {
1384 }
else if (MemSize < NarrowSize) {
1386 }
else if (MemSize > NarrowSize) {
1391 if (isa<GZExtLoad>(LoadMI))
1396 LoadMI.eraseFromParent();
1399 case TargetOpcode::G_STORE: {
1400 auto &StoreMI = cast<GStore>(
MI);
1402 Register SrcReg = StoreMI.getValueReg();
1407 int NumParts = SizeOp0 / NarrowSize;
1409 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1410 if (SrcTy.
isVector() && LeftoverBits != 0)
1413 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1417 StoreMI.eraseFromParent();
1423 case TargetOpcode::G_SELECT:
1425 case TargetOpcode::G_AND:
1426 case TargetOpcode::G_OR:
1427 case TargetOpcode::G_XOR: {
1439 case TargetOpcode::G_SHL:
1440 case TargetOpcode::G_LSHR:
1441 case TargetOpcode::G_ASHR:
1443 case TargetOpcode::G_CTLZ:
1444 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1445 case TargetOpcode::G_CTTZ:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1447 case TargetOpcode::G_CTPOP:
1449 switch (
MI.getOpcode()) {
1450 case TargetOpcode::G_CTLZ:
1451 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1453 case TargetOpcode::G_CTTZ:
1454 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1456 case TargetOpcode::G_CTPOP:
1466 case TargetOpcode::G_INTTOPTR:
1474 case TargetOpcode::G_PTRTOINT:
1482 case TargetOpcode::G_PHI: {
1485 if (SizeOp0 % NarrowSize != 0)
1488 unsigned NumParts = SizeOp0 / NarrowSize;
1492 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1500 for (
unsigned i = 0; i < NumParts; ++i) {
1504 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1505 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1510 MI.eraseFromParent();
1513 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1514 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1518 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1524 case TargetOpcode::G_ICMP: {
1543 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1544 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1557 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1558 auto LHS = std::get<0>(LHSAndRHS);
1559 auto RHS = std::get<1>(LHSAndRHS);
1567 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1568 auto LHS = std::get<0>(LHSAndRHS);
1569 auto RHS = std::get<1>(LHSAndRHS);
1571 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1572 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1573 TargetOpcode::G_ZEXT);
1580 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1582 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1587 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1588 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1600 MI.eraseFromParent();
1603 case TargetOpcode::G_FCMP:
1612 case TargetOpcode::G_SEXT_INREG: {
1616 int64_t SizeInBits =
MI.getOperand(2).getImm();
1626 MO1.
setReg(TruncMIB.getReg(0));
1641 if (SizeOp0 % NarrowSize != 0)
1643 int NumParts = SizeOp0 / NarrowSize;
1651 for (
int i = 0; i < NumParts; ++i) {
1667 for (
int i = 0; i < NumParts; ++i) {
1670 PartialExtensionReg = DstRegs.
back();
1672 assert(PartialExtensionReg &&
1673 "Expected to visit partial extension before full");
1674 if (FullExtensionReg) {
1681 FullExtensionReg = DstRegs.
back();
1686 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1689 PartialExtensionReg = DstRegs.
back();
1696 MI.eraseFromParent();
1699 case TargetOpcode::G_BSWAP:
1700 case TargetOpcode::G_BITREVERSE: {
1701 if (SizeOp0 % NarrowSize != 0)
1706 unsigned NumParts = SizeOp0 / NarrowSize;
1707 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1710 for (
unsigned i = 0; i < NumParts; ++i) {
1712 {SrcRegs[NumParts - 1 - i]});
1719 MI.eraseFromParent();
1722 case TargetOpcode::G_PTR_ADD:
1723 case TargetOpcode::G_PTRMASK: {
1731 case TargetOpcode::G_FPTOUI:
1732 case TargetOpcode::G_FPTOSI:
1734 case TargetOpcode::G_FPEXT:
1741 case TargetOpcode::G_FLDEXP:
1742 case TargetOpcode::G_STRICT_FLDEXP:
1744 case TargetOpcode::G_VSCALE: {
1755 MI.eraseFromParent();
1783 unsigned OpIdx,
unsigned ExtOpcode) {
1786 MO.
setReg(ExtB.getReg(0));
1793 MO.
setReg(ExtB.getReg(0));
1797 unsigned OpIdx,
unsigned TruncOpcode) {
1806 unsigned OpIdx,
unsigned ExtOpcode) {
1845LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1850 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
1851 if (DstTy.isVector())
1858 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1860 unsigned NumOps =
MI.getNumOperands();
1861 unsigned NumSrc =
MI.getNumOperands() - 1;
1862 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1864 if (WideSize >= DstSize) {
1868 for (
unsigned I = 2;
I != NumOps; ++
I) {
1869 const unsigned Offset = (
I - 1) * PartSize;
1876 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1882 ResultReg = NextResult;
1885 if (WideSize > DstSize)
1887 else if (DstTy.isPointer())
1890 MI.eraseFromParent();
1915 const int GCD = std::gcd(SrcSize, WideSize);
1926 if (GCD == SrcSize) {
1930 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1936 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1938 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1942 const int PartsPerGCD = WideSize / GCD;
1946 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1961 MI.eraseFromParent();
1966LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1971 int NumDst =
MI.getNumOperands() - 1;
1972 Register SrcReg =
MI.getOperand(NumDst).getReg();
1977 Register Dst0Reg =
MI.getOperand(0).getReg();
1987 dbgs() <<
"Not casting non-integral address space integer\n");
2008 for (
int I = 1;
I != NumDst; ++
I) {
2014 MI.eraseFromParent();
2025 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2050 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2055 if (PartsPerRemerge == 1) {
2058 for (
int I = 0;
I != NumUnmerge; ++
I) {
2061 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2062 int Idx =
I * PartsPerUnmerge + J;
2064 MIB.addDef(
MI.getOperand(
Idx).getReg());
2071 MIB.addUse(Unmerge.getReg(
I));
2075 for (
int J = 0; J != NumUnmerge; ++J)
2076 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2079 for (
int I = 0;
I != NumDst; ++
I) {
2080 for (
int J = 0; J < PartsPerRemerge; ++J) {
2081 const int Idx =
I * PartsPerRemerge + J;
2086 RemergeParts.
clear();
2090 MI.eraseFromParent();
2095LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2097 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2098 unsigned Offset =
MI.getOperand(2).getImm();
2101 if (SrcTy.
isVector() || DstTy.isVector())
2117 if (DstTy.isPointer())
2124 MI.eraseFromParent();
2129 LLT ShiftTy = SrcTy;
2138 MI.eraseFromParent();
2169LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2171 if (TypeIdx != 0 || WideTy.
isVector())
2181LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2185 std::optional<Register> CarryIn;
2186 switch (
MI.getOpcode()) {
2189 case TargetOpcode::G_SADDO:
2190 Opcode = TargetOpcode::G_ADD;
2191 ExtOpcode = TargetOpcode::G_SEXT;
2193 case TargetOpcode::G_SSUBO:
2194 Opcode = TargetOpcode::G_SUB;
2195 ExtOpcode = TargetOpcode::G_SEXT;
2197 case TargetOpcode::G_UADDO:
2198 Opcode = TargetOpcode::G_ADD;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2201 case TargetOpcode::G_USUBO:
2202 Opcode = TargetOpcode::G_SUB;
2203 ExtOpcode = TargetOpcode::G_ZEXT;
2205 case TargetOpcode::G_SADDE:
2206 Opcode = TargetOpcode::G_UADDE;
2207 ExtOpcode = TargetOpcode::G_SEXT;
2208 CarryIn =
MI.getOperand(4).getReg();
2210 case TargetOpcode::G_SSUBE:
2211 Opcode = TargetOpcode::G_USUBE;
2212 ExtOpcode = TargetOpcode::G_SEXT;
2213 CarryIn =
MI.getOperand(4).getReg();
2215 case TargetOpcode::G_UADDE:
2216 Opcode = TargetOpcode::G_UADDE;
2217 ExtOpcode = TargetOpcode::G_ZEXT;
2218 CarryIn =
MI.getOperand(4).getReg();
2220 case TargetOpcode::G_USUBE:
2221 Opcode = TargetOpcode::G_USUBE;
2222 ExtOpcode = TargetOpcode::G_ZEXT;
2223 CarryIn =
MI.getOperand(4).getReg();
2244 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2247 {LHSExt, RHSExt, *CarryIn})
2259 MI.eraseFromParent();
2264LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2266 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2267 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2268 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2269 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2270 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2295 {ShiftL, ShiftR},
MI.getFlags());
2303 MI.eraseFromParent();
2308LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2317 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2320 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2327 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2336 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2339 if (WideMulCanOverflow)
2341 {LeftOperand, RightOperand});
2362 if (WideMulCanOverflow) {
2370 MI.eraseFromParent();
2376 switch (
MI.getOpcode()) {
2379 case TargetOpcode::G_ATOMICRMW_XCHG:
2380 case TargetOpcode::G_ATOMICRMW_ADD:
2381 case TargetOpcode::G_ATOMICRMW_SUB:
2382 case TargetOpcode::G_ATOMICRMW_AND:
2383 case TargetOpcode::G_ATOMICRMW_OR:
2384 case TargetOpcode::G_ATOMICRMW_XOR:
2385 case TargetOpcode::G_ATOMICRMW_MIN:
2386 case TargetOpcode::G_ATOMICRMW_MAX:
2387 case TargetOpcode::G_ATOMICRMW_UMIN:
2388 case TargetOpcode::G_ATOMICRMW_UMAX:
2389 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2395 case TargetOpcode::G_ATOMIC_CMPXCHG:
2396 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2403 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2413 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2418 case TargetOpcode::G_EXTRACT:
2419 return widenScalarExtract(
MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_INSERT:
2421 return widenScalarInsert(
MI, TypeIdx, WideTy);
2422 case TargetOpcode::G_MERGE_VALUES:
2423 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2424 case TargetOpcode::G_UNMERGE_VALUES:
2425 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2426 case TargetOpcode::G_SADDO:
2427 case TargetOpcode::G_SSUBO:
2428 case TargetOpcode::G_UADDO:
2429 case TargetOpcode::G_USUBO:
2430 case TargetOpcode::G_SADDE:
2431 case TargetOpcode::G_SSUBE:
2432 case TargetOpcode::G_UADDE:
2433 case TargetOpcode::G_USUBE:
2434 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2435 case TargetOpcode::G_UMULO:
2436 case TargetOpcode::G_SMULO:
2437 return widenScalarMulo(
MI, TypeIdx, WideTy);
2438 case TargetOpcode::G_SADDSAT:
2439 case TargetOpcode::G_SSUBSAT:
2440 case TargetOpcode::G_SSHLSAT:
2441 case TargetOpcode::G_UADDSAT:
2442 case TargetOpcode::G_USUBSAT:
2443 case TargetOpcode::G_USHLSAT:
2444 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2445 case TargetOpcode::G_CTTZ:
2446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2447 case TargetOpcode::G_CTLZ:
2448 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2449 case TargetOpcode::G_CTPOP: {
2460 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2461 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2462 ? TargetOpcode::G_ANYEXT
2463 : TargetOpcode::G_ZEXT;
2466 unsigned NewOpc =
MI.getOpcode();
2467 if (NewOpc == TargetOpcode::G_CTTZ) {
2476 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2481 if (
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2493 if (
MI.getOpcode() == TargetOpcode::G_CTLZ) {
2500 MI.eraseFromParent();
2503 case TargetOpcode::G_BSWAP: {
2512 MI.getOperand(0).setReg(DstExt);
2525 case TargetOpcode::G_BITREVERSE: {
2534 MI.getOperand(0).setReg(DstExt);
2543 case TargetOpcode::G_FREEZE:
2544 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2551 case TargetOpcode::G_ABS:
2558 case TargetOpcode::G_ADD:
2559 case TargetOpcode::G_AND:
2560 case TargetOpcode::G_MUL:
2561 case TargetOpcode::G_OR:
2562 case TargetOpcode::G_XOR:
2563 case TargetOpcode::G_SUB:
2564 case TargetOpcode::G_SHUFFLE_VECTOR:
2575 case TargetOpcode::G_SBFX:
2576 case TargetOpcode::G_UBFX:
2590 case TargetOpcode::G_SHL:
2606 case TargetOpcode::G_ROTR:
2607 case TargetOpcode::G_ROTL:
2616 case TargetOpcode::G_SDIV:
2617 case TargetOpcode::G_SREM:
2618 case TargetOpcode::G_SMIN:
2619 case TargetOpcode::G_SMAX:
2627 case TargetOpcode::G_SDIVREM:
2636 case TargetOpcode::G_ASHR:
2637 case TargetOpcode::G_LSHR:
2641 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2642 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2655 case TargetOpcode::G_UDIV:
2656 case TargetOpcode::G_UREM:
2657 case TargetOpcode::G_UMIN:
2658 case TargetOpcode::G_UMAX:
2666 case TargetOpcode::G_UDIVREM:
2675 case TargetOpcode::G_SELECT:
2692 case TargetOpcode::G_FPTOSI:
2693 case TargetOpcode::G_FPTOUI:
2694 case TargetOpcode::G_INTRINSIC_LRINT:
2695 case TargetOpcode::G_INTRINSIC_LLRINT:
2696 case TargetOpcode::G_IS_FPCLASS:
2706 case TargetOpcode::G_SITOFP:
2716 case TargetOpcode::G_UITOFP:
2726 case TargetOpcode::G_LOAD:
2727 case TargetOpcode::G_SEXTLOAD:
2728 case TargetOpcode::G_ZEXTLOAD:
2734 case TargetOpcode::G_STORE: {
2745 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2751 case TargetOpcode::G_CONSTANT: {
2755 MRI.
getType(
MI.getOperand(0).getReg()));
2756 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2757 ExtOpc == TargetOpcode::G_ANYEXT) &&
2760 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2764 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
2770 case TargetOpcode::G_FCONSTANT: {
2778 MI.eraseFromParent();
2781 case TargetOpcode::G_IMPLICIT_DEF: {
2787 case TargetOpcode::G_BRCOND:
2793 case TargetOpcode::G_FCMP:
2804 case TargetOpcode::G_ICMP:
2810 MI.getOperand(1).getPredicate()))
2811 ? TargetOpcode::G_SEXT
2812 : TargetOpcode::G_ZEXT;
2819 case TargetOpcode::G_PTR_ADD:
2820 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2826 case TargetOpcode::G_PHI: {
2827 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2830 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2842 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2850 TargetOpcode::G_ANYEXT);
2865 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2901 case TargetOpcode::G_FADD:
2902 case TargetOpcode::G_FMUL:
2903 case TargetOpcode::G_FSUB:
2904 case TargetOpcode::G_FMA:
2905 case TargetOpcode::G_FMAD:
2906 case TargetOpcode::G_FNEG:
2907 case TargetOpcode::G_FABS:
2908 case TargetOpcode::G_FCANONICALIZE:
2909 case TargetOpcode::G_FMINNUM:
2910 case TargetOpcode::G_FMAXNUM:
2911 case TargetOpcode::G_FMINNUM_IEEE:
2912 case TargetOpcode::G_FMAXNUM_IEEE:
2913 case TargetOpcode::G_FMINIMUM:
2914 case TargetOpcode::G_FMAXIMUM:
2915 case TargetOpcode::G_FDIV:
2916 case TargetOpcode::G_FREM:
2917 case TargetOpcode::G_FCEIL:
2918 case TargetOpcode::G_FFLOOR:
2919 case TargetOpcode::G_FCOS:
2920 case TargetOpcode::G_FSIN:
2921 case TargetOpcode::G_FTAN:
2922 case TargetOpcode::G_FACOS:
2923 case TargetOpcode::G_FASIN:
2924 case TargetOpcode::G_FATAN:
2925 case TargetOpcode::G_FCOSH:
2926 case TargetOpcode::G_FSINH:
2927 case TargetOpcode::G_FTANH:
2928 case TargetOpcode::G_FLOG10:
2929 case TargetOpcode::G_FLOG:
2930 case TargetOpcode::G_FLOG2:
2931 case TargetOpcode::G_FRINT:
2932 case TargetOpcode::G_FNEARBYINT:
2933 case TargetOpcode::G_FSQRT:
2934 case TargetOpcode::G_FEXP:
2935 case TargetOpcode::G_FEXP2:
2936 case TargetOpcode::G_FEXP10:
2937 case TargetOpcode::G_FPOW:
2938 case TargetOpcode::G_INTRINSIC_TRUNC:
2939 case TargetOpcode::G_INTRINSIC_ROUND:
2940 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2944 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
2950 case TargetOpcode::G_FPOWI:
2951 case TargetOpcode::G_FLDEXP:
2952 case TargetOpcode::G_STRICT_FLDEXP: {
2954 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2975 case TargetOpcode::G_FFREXP: {
2988 case TargetOpcode::G_INTTOPTR:
2996 case TargetOpcode::G_PTRTOINT:
3004 case TargetOpcode::G_BUILD_VECTOR: {
3008 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3022 case TargetOpcode::G_SEXT_INREG:
3031 case TargetOpcode::G_PTRMASK: {
3039 case TargetOpcode::G_VECREDUCE_FADD:
3040 case TargetOpcode::G_VECREDUCE_FMUL:
3041 case TargetOpcode::G_VECREDUCE_FMIN:
3042 case TargetOpcode::G_VECREDUCE_FMAX:
3043 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3044 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3058 case TargetOpcode::G_VSCALE: {
3065 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3070 case TargetOpcode::G_SPLAT_VECTOR: {
3084 auto Unmerge =
B.buildUnmerge(Ty, Src);
3085 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3094 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3096 LLT DstLLT =
MRI.getType(DstReg);
3117 MI.eraseFromParent();
3128 MI.eraseFromParent();
3135 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3140 if (DstTy.isVector()) {
3141 int NumDstElt = DstTy.getNumElements();
3145 LLT DstCastTy = DstEltTy;
3146 LLT SrcPartTy = SrcEltTy;
3150 if (NumSrcElt < NumDstElt) {
3160 SrcPartTy = SrcEltTy;
3161 }
else if (NumSrcElt > NumDstElt) {
3172 DstCastTy = DstEltTy;
3182 MI.eraseFromParent();
3186 if (DstTy.isVector()) {
3190 MI.eraseFromParent();
3206 unsigned NewEltSize,
3207 unsigned OldEltSize) {
3208 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3209 LLT IdxTy =
B.getMRI()->getType(
Idx);
3212 auto OffsetMask =
B.buildConstant(
3214 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3215 return B.buildShl(IdxTy, OffsetIdx,
3216 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3231 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3235 unsigned OldNumElts = SrcVecTy.getNumElements();
3242 if (NewNumElts > OldNumElts) {
3253 if (NewNumElts % OldNumElts != 0)
3257 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3266 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3270 NewOps[
I] = Elt.getReg(0);
3275 MI.eraseFromParent();
3279 if (NewNumElts < OldNumElts) {
3280 if (NewEltSize % OldEltSize != 0)
3302 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3321 MI.eraseFromParent();
3335 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3336 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3337 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3338 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3341 auto EltMask =
B.buildConstant(
3345 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3346 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3349 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3353 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3367 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3368 MI.getFirst4RegLLTs();
3380 if (NewNumElts < OldNumElts) {
3381 if (NewEltSize % OldEltSize != 0)
3390 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3410 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3414 MI.eraseFromParent();
3438 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3444 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3448 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3449 return UnableToLegalize;
3454 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3456 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3465 MI.eraseFromParent();
3481 if (MemSizeInBits != MemStoreSizeInBits) {
3501 if (isa<GSExtLoad>(LoadMI)) {
3504 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3513 if (DstTy != LoadTy)
3539 uint64_t LargeSplitSize, SmallSplitSize;
3544 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3554 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3579 LargeSplitSize / 8);
3583 SmallPtr, *SmallMMO);
3588 if (AnyExtTy == DstTy)
3623 if (StoreWidth != StoreSizeInBits) {
3658 uint64_t LargeSplitSize, SmallSplitSize;
3661 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3668 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3707 switch (
MI.getOpcode()) {
3708 case TargetOpcode::G_LOAD: {
3726 case TargetOpcode::G_STORE: {
3742 case TargetOpcode::G_SELECT: {
3748 dbgs() <<
"bitcast action not implemented for vector select\n");
3759 case TargetOpcode::G_AND:
3760 case TargetOpcode::G_OR:
3761 case TargetOpcode::G_XOR: {
3769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3771 case TargetOpcode::G_INSERT_VECTOR_ELT:
3773 case TargetOpcode::G_CONCAT_VECTORS:
3781void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3789 using namespace TargetOpcode;
3791 switch(
MI.getOpcode()) {
3794 case TargetOpcode::G_FCONSTANT:
3796 case TargetOpcode::G_BITCAST:
3798 case TargetOpcode::G_SREM:
3799 case TargetOpcode::G_UREM: {
3803 {MI.getOperand(1), MI.getOperand(2)});
3807 MI.eraseFromParent();
3810 case TargetOpcode::G_SADDO:
3811 case TargetOpcode::G_SSUBO:
3813 case TargetOpcode::G_UMULH:
3814 case TargetOpcode::G_SMULH:
3816 case TargetOpcode::G_SMULO:
3817 case TargetOpcode::G_UMULO: {
3820 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
3823 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3824 ? TargetOpcode::G_SMULH
3825 : TargetOpcode::G_UMULH;
3829 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3830 MI.removeOperand(1);
3841 if (Opcode == TargetOpcode::G_SMULH) {
3850 case TargetOpcode::G_FNEG: {
3851 auto [Res, SubByReg] =
MI.getFirst2Regs();
3861 MI.eraseFromParent();
3864 case TargetOpcode::G_FSUB:
3865 case TargetOpcode::G_STRICT_FSUB: {
3866 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
3872 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3877 MI.eraseFromParent();
3880 case TargetOpcode::G_FMAD:
3882 case TargetOpcode::G_FFLOOR:
3884 case TargetOpcode::G_INTRINSIC_ROUND:
3886 case TargetOpcode::G_FRINT: {
3889 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3892 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3893 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
3896 **
MI.memoperands_begin());
3899 MI.eraseFromParent();
3902 case TargetOpcode::G_LOAD:
3903 case TargetOpcode::G_SEXTLOAD:
3904 case TargetOpcode::G_ZEXTLOAD:
3906 case TargetOpcode::G_STORE:
3908 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3909 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3910 case TargetOpcode::G_CTLZ:
3911 case TargetOpcode::G_CTTZ:
3912 case TargetOpcode::G_CTPOP:
3915 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
3924 MI.eraseFromParent();
3928 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
3954 MI.eraseFromParent();
3958 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
3963 MI.eraseFromParent();
3967 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
3989 MI.eraseFromParent();
4014 case G_MERGE_VALUES:
4016 case G_UNMERGE_VALUES:
4018 case TargetOpcode::G_SEXT_INREG: {
4019 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4020 int64_t SizeInBits =
MI.getOperand(2).getImm();
4022 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4029 MI.eraseFromParent();
4032 case G_EXTRACT_VECTOR_ELT:
4033 case G_INSERT_VECTOR_ELT:
4035 case G_SHUFFLE_VECTOR:
4037 case G_VECTOR_COMPRESS:
4039 case G_DYN_STACKALLOC:
4043 case G_STACKRESTORE:
4053 case G_READ_REGISTER:
4054 case G_WRITE_REGISTER:
4099 case G_MEMCPY_INLINE:
4100 return lowerMemcpyInline(
MI);
4131 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4140 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4152 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4155 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4166 "Converting bits to bytes lost precision");
4173 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4190 std::initializer_list<unsigned> NonVecOpIndices) {
4191 if (
MI.getNumMemOperands() != 0)
4194 LLT VecTy =
MRI.getType(
MI.getReg(0));
4199 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4232 int NumParts, NumLeftover;
4233 std::tie(NumParts, NumLeftover) =
4236 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4237 for (
int i = 0; i < NumParts; ++i) {
4242 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4251 for (
unsigned i = 0; i <
N; ++i) {
4254 else if (
Op.isImm())
4256 else if (
Op.isPredicate())
4278 std::initializer_list<unsigned> NonVecOpIndices) {
4280 "Non-compatible opcode or not specified non-vector operands");
4283 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4284 unsigned NumDefs =
MI.getNumDefs();
4292 for (
unsigned i = 0; i < NumDefs; ++i) {
4301 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4302 ++UseIdx, ++UseNo) {
4305 MI.getOperand(UseIdx));
4310 for (
auto Reg : SplitPieces)
4315 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4319 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4321 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4322 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4325 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4326 Uses.push_back(InputOpsPieces[InputNo][i]);
4329 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4330 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4335 for (
unsigned i = 0; i < NumDefs; ++i)
4336 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4338 for (
unsigned i = 0; i < NumDefs; ++i)
4342 MI.eraseFromParent();
4351 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4352 unsigned NumDefs =
MI.getNumDefs();
4361 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4362 UseIdx += 2, ++UseNo) {
4370 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4372 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4378 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4379 Phi.addUse(InputOpsPieces[j][i]);
4380 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4390 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4395 MI.eraseFromParent();
4403 const int NumDst =
MI.getNumOperands() - 1;
4404 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4408 if (TypeIdx != 1 || NarrowTy == DstTy)
4434 const int PartsPerUnmerge = NumDst / NumUnmerge;
4436 for (
int I = 0;
I != NumUnmerge; ++
I) {
4439 for (
int J = 0; J != PartsPerUnmerge; ++J)
4440 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
4441 MIB.
addUse(Unmerge.getReg(
I));
4444 MI.eraseFromParent();
4451 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4455 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
4457 if (NarrowTy == SrcTy)
4467 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
4481 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
4483 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4489 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4490 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
4491 ++i,
Offset += NumNarrowTyElts) {
4498 MI.eraseFromParent();
4502 assert(TypeIdx == 0 &&
"Bad type index");
4518 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
4521 for (
unsigned i = 0; i < NumParts; ++i) {
4523 for (
unsigned j = 0; j < NumElts; ++j)
4524 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
4530 MI.eraseFromParent();
4538 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
4540 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4542 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
4544 InsertVal =
MI.getOperand(2).getReg();
4559 IdxVal = MaybeCst->Value.getSExtValue();
4563 MI.eraseFromParent();
4568 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4571 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4572 TargetOpcode::G_ANYEXT);
4577 int64_t PartIdx = IdxVal / NewNumElts;
4586 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4587 VecParts[PartIdx] = InsertPart.getReg(0);
4591 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4596 MI.eraseFromParent();
4620 bool IsLoad = isa<GLoad>(LdStMI);
4632 int NumLeftover = -1;
4638 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4640 NumParts = NarrowRegs.
size();
4641 NumLeftover = NarrowLeftoverRegs.
size();
4658 auto MMO = LdStMI.
getMMO();
4660 unsigned NumParts,
unsigned Offset) ->
unsigned {
4663 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
4665 unsigned ByteOffset =
Offset / 8;
4675 ValRegs.push_back(Dst);
4687 unsigned HandledOffset =
4688 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4692 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4695 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4696 LeftoverTy, NarrowLeftoverRegs);
4706 using namespace TargetOpcode;
4710 switch (
MI.getOpcode()) {
4711 case G_IMPLICIT_DEF:
4727 case G_FCANONICALIZE:
4744 case G_INTRINSIC_ROUND:
4745 case G_INTRINSIC_ROUNDEVEN:
4746 case G_INTRINSIC_TRUNC:
4772 case G_FMINNUM_IEEE:
4773 case G_FMAXNUM_IEEE:
4793 case G_CTLZ_ZERO_UNDEF:
4795 case G_CTTZ_ZERO_UNDEF:
4809 case G_ADDRSPACE_CAST:
4822 case G_STRICT_FLDEXP:
4836 case G_UNMERGE_VALUES:
4838 case G_BUILD_VECTOR:
4839 assert(TypeIdx == 0 &&
"not a vector type index");
4841 case G_CONCAT_VECTORS:
4845 case G_EXTRACT_VECTOR_ELT:
4846 case G_INSERT_VECTOR_ELT:
4855 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4856 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4858 case G_SHUFFLE_VECTOR:
4864 case G_INTRINSIC_FPTRUNC_ROUND:
4874 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
4875 "Not a bitcast operation");
4880 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4888 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4893 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
4898 MI.eraseFromParent();
4904 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4908 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4909 MI.getFirst3RegLLTs();
4912 if (DstTy != Src1Ty)
4914 if (DstTy != Src2Ty)
4929 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4945 unsigned InputUsed[2] = {-1U, -1U};
4946 unsigned FirstMaskIdx =
High * NewElts;
4947 bool UseBuildVector =
false;
4948 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4950 int Idx = Mask[FirstMaskIdx + MaskOffset];
4955 if (Input >= std::size(Inputs)) {
4962 Idx -= Input * NewElts;
4966 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4967 if (InputUsed[OpNo] == Input) {
4970 }
else if (InputUsed[OpNo] == -1U) {
4972 InputUsed[OpNo] = Input;
4977 if (OpNo >= std::size(InputUsed)) {
4980 UseBuildVector =
true;
4988 if (UseBuildVector) {
4993 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4995 int Idx = Mask[FirstMaskIdx + MaskOffset];
5000 if (Input >= std::size(Inputs)) {
5007 Idx -= Input * NewElts;
5011 .buildExtractVectorElement(
5012 EltTy, Inputs[Input],
5019 }
else if (InputUsed[0] == -1U) {
5023 Register Op0 = Inputs[InputUsed[0]];
5027 : Inputs[InputUsed[1]];
5036 MI.eraseFromParent();
5042 auto &RdxMI = cast<GVecReduce>(
MI);
5049 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5055 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5058 const unsigned NumParts =
5064 if (DstTy != NarrowTy)
5070 unsigned NumPartsLeft = NumParts;
5071 while (NumPartsLeft > 1) {
5072 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5075 .buildInstr(ScalarOpc, {NarrowTy},
5076 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5079 SplitSrcs = PartialResults;
5080 PartialResults.
clear();
5081 NumPartsLeft = SplitSrcs.
size();
5085 MI.eraseFromParent();
5090 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5094 MI.eraseFromParent();
5098 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5108 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5111 Register Acc = PartialReductions[0];
5112 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5113 if (Part == NumParts - 1) {
5115 {Acc, PartialReductions[Part]});
5118 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5122 MI.eraseFromParent();
5128 unsigned int TypeIdx,
5130 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5131 MI.getFirst3RegLLTs();
5132 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5136 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5137 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5138 "Unexpected vecreduce opcode");
5139 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5140 ? TargetOpcode::G_FADD
5141 : TargetOpcode::G_FMUL;
5147 for (
unsigned i = 0; i < NumParts; i++)
5152 MI.eraseFromParent();
5159 unsigned ScalarOpc) {
5167 while (SplitSrcs.
size() > 1) {
5169 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5177 SplitSrcs = std::move(PartialRdxs);
5181 MI.getOperand(1).setReg(SplitSrcs[0]);
5188 const LLT HalfTy,
const LLT AmtTy) {
5196 MI.eraseFromParent();
5202 unsigned VTBits = 2 * NVTBits;
5205 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5206 if (Amt.
ugt(VTBits)) {
5208 }
else if (Amt.
ugt(NVTBits)) {
5212 }
else if (Amt == NVTBits) {
5223 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5224 if (Amt.
ugt(VTBits)) {
5226 }
else if (Amt.
ugt(NVTBits)) {
5230 }
else if (Amt == NVTBits) {
5244 if (Amt.
ugt(VTBits)) {
5247 }
else if (Amt.
ugt(NVTBits)) {
5252 }
else if (Amt == NVTBits) {
5269 MI.eraseFromParent();
5293 if (DstEltSize % 2 != 0)
5299 const unsigned NewBitSize = DstEltSize / 2;
5325 switch (
MI.getOpcode()) {
5326 case TargetOpcode::G_SHL: {
5342 ResultRegs[0] =
Lo.getReg(0);
5343 ResultRegs[1] =
Hi.getReg(0);
5346 case TargetOpcode::G_LSHR:
5347 case TargetOpcode::G_ASHR: {
5357 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5371 ResultRegs[0] =
Lo.getReg(0);
5372 ResultRegs[1] =
Hi.getReg(0);
5380 MI.eraseFromParent();
5387 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5390 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5405 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
5410 "getNeutralElementForVecReduce called with invalid opcode!");
5411 case TargetOpcode::G_VECREDUCE_ADD:
5412 case TargetOpcode::G_VECREDUCE_OR:
5413 case TargetOpcode::G_VECREDUCE_XOR:
5414 case TargetOpcode::G_VECREDUCE_UMAX:
5416 case TargetOpcode::G_VECREDUCE_MUL:
5418 case TargetOpcode::G_VECREDUCE_AND:
5419 case TargetOpcode::G_VECREDUCE_UMIN:
5422 case TargetOpcode::G_VECREDUCE_SMAX:
5425 case TargetOpcode::G_VECREDUCE_SMIN:
5428 case TargetOpcode::G_VECREDUCE_FADD:
5430 case TargetOpcode::G_VECREDUCE_FMUL:
5432 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5433 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5434 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
5435 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5443 unsigned Opc =
MI.getOpcode();
5445 case TargetOpcode::G_IMPLICIT_DEF:
5446 case TargetOpcode::G_LOAD: {
5454 case TargetOpcode::G_STORE:
5461 case TargetOpcode::G_AND:
5462 case TargetOpcode::G_OR:
5463 case TargetOpcode::G_XOR:
5464 case TargetOpcode::G_ADD:
5465 case TargetOpcode::G_SUB:
5466 case TargetOpcode::G_MUL:
5467 case TargetOpcode::G_FADD:
5468 case TargetOpcode::G_FSUB:
5469 case TargetOpcode::G_FMUL:
5470 case TargetOpcode::G_FDIV:
5471 case TargetOpcode::G_FCOPYSIGN:
5472 case TargetOpcode::G_UADDSAT:
5473 case TargetOpcode::G_USUBSAT:
5474 case TargetOpcode::G_SADDSAT:
5475 case TargetOpcode::G_SSUBSAT:
5476 case TargetOpcode::G_SMIN:
5477 case TargetOpcode::G_SMAX:
5478 case TargetOpcode::G_UMIN:
5479 case TargetOpcode::G_UMAX:
5480 case TargetOpcode::G_FMINNUM:
5481 case TargetOpcode::G_FMAXNUM:
5482 case TargetOpcode::G_FMINNUM_IEEE:
5483 case TargetOpcode::G_FMAXNUM_IEEE:
5484 case TargetOpcode::G_FMINIMUM:
5485 case TargetOpcode::G_FMAXIMUM:
5486 case TargetOpcode::G_STRICT_FADD:
5487 case TargetOpcode::G_STRICT_FSUB:
5488 case TargetOpcode::G_STRICT_FMUL:
5489 case TargetOpcode::G_SHL:
5490 case TargetOpcode::G_ASHR:
5491 case TargetOpcode::G_LSHR: {
5499 case TargetOpcode::G_FMA:
5500 case TargetOpcode::G_STRICT_FMA:
5501 case TargetOpcode::G_FSHR:
5502 case TargetOpcode::G_FSHL: {
5511 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5512 case TargetOpcode::G_EXTRACT:
5519 case TargetOpcode::G_INSERT:
5520 case TargetOpcode::G_INSERT_VECTOR_ELT:
5521 case TargetOpcode::G_FREEZE:
5522 case TargetOpcode::G_FNEG:
5523 case TargetOpcode::G_FABS:
5524 case TargetOpcode::G_FSQRT:
5525 case TargetOpcode::G_FCEIL:
5526 case TargetOpcode::G_FFLOOR:
5527 case TargetOpcode::G_FNEARBYINT:
5528 case TargetOpcode::G_FRINT:
5529 case TargetOpcode::G_INTRINSIC_ROUND:
5530 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5531 case TargetOpcode::G_INTRINSIC_TRUNC:
5532 case TargetOpcode::G_BSWAP:
5533 case TargetOpcode::G_FCANONICALIZE:
5534 case TargetOpcode::G_SEXT_INREG:
5535 case TargetOpcode::G_ABS:
5543 case TargetOpcode::G_SELECT: {
5544 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
5546 if (!CondTy.isScalar() ||
5554 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5559 if (CondTy.isVector())
5569 case TargetOpcode::G_UNMERGE_VALUES:
5571 case TargetOpcode::G_PHI:
5573 case TargetOpcode::G_SHUFFLE_VECTOR:
5575 case TargetOpcode::G_BUILD_VECTOR: {
5577 for (
auto Op :
MI.uses()) {
5587 MI.eraseFromParent();
5590 case TargetOpcode::G_SEXT:
5591 case TargetOpcode::G_ZEXT:
5592 case TargetOpcode::G_ANYEXT:
5593 case TargetOpcode::G_TRUNC:
5594 case TargetOpcode::G_FPTRUNC:
5595 case TargetOpcode::G_FPEXT:
5596 case TargetOpcode::G_FPTOSI:
5597 case TargetOpcode::G_FPTOUI:
5598 case TargetOpcode::G_SITOFP:
5599 case TargetOpcode::G_UITOFP: {
5619 case TargetOpcode::G_ICMP:
5620 case TargetOpcode::G_FCMP: {
5634 case TargetOpcode::G_BITCAST: {
5655 case TargetOpcode::G_VECREDUCE_FADD:
5656 case TargetOpcode::G_VECREDUCE_FMUL:
5657 case TargetOpcode::G_VECREDUCE_ADD:
5658 case TargetOpcode::G_VECREDUCE_MUL:
5659 case TargetOpcode::G_VECREDUCE_AND:
5660 case TargetOpcode::G_VECREDUCE_OR:
5661 case TargetOpcode::G_VECREDUCE_XOR:
5662 case TargetOpcode::G_VECREDUCE_SMAX:
5663 case TargetOpcode::G_VECREDUCE_SMIN:
5664 case TargetOpcode::G_VECREDUCE_UMAX:
5665 case TargetOpcode::G_VECREDUCE_UMIN: {
5669 auto NeutralElement = getNeutralElementForVecReduce(
5677 NeutralElement,
Idx);
5681 MO.
setReg(NewVec.getReg(0));
5693 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5695 unsigned MaskNumElts = Mask.size();
5699 if (MaskNumElts == SrcNumElts)
5702 if (MaskNumElts < SrcNumElts) {
5706 for (
unsigned I = MaskNumElts;
I < SrcNumElts; ++
I)
5712 MI.getOperand(1).getReg(),
5713 MI.getOperand(2).getReg(), NewMask);
5714 MI.eraseFromParent();
5719 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
5720 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5728 MOps1[0] =
MI.getOperand(1).getReg();
5729 MOps2[0] =
MI.getOperand(2).getReg();
5736 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
5738 if (
Idx >=
static_cast<int>(SrcNumElts))
5739 Idx += PaddedMaskNumElts - SrcNumElts;
5744 if (MaskNumElts != PaddedMaskNumElts) {
5749 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
5759 MI.eraseFromParent();
5765 unsigned int TypeIdx,
LLT MoreTy) {
5766 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
5768 unsigned NumElts = DstTy.getNumElements();
5771 if (DstTy.isVector() && Src1Ty.isVector() &&
5772 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5780 if (DstTy != Src1Ty || DstTy != Src2Ty)
5788 for (
unsigned I = 0;
I != NumElts; ++
I) {
5790 if (
Idx <
static_cast<int>(NumElts))
5795 for (
unsigned I = NumElts;
I != WidenNumElts; ++
I)
5800 MI.getOperand(1).getReg(),
5801 MI.getOperand(2).getReg(), NewMask);
5802 MI.eraseFromParent();
5811 unsigned SrcParts = Src1Regs.
size();
5812 unsigned DstParts = DstRegs.
size();
5814 unsigned DstIdx = 0;
5816 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5817 DstRegs[DstIdx] = FactorSum;
5819 unsigned CarrySumPrevDstIdx;
5822 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5824 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5825 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5827 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5831 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5832 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5834 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5844 if (DstIdx != DstParts - 1) {
5846 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
5847 FactorSum = Uaddo.
getReg(0);
5848 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
5849 for (
unsigned i = 2; i < Factors.
size(); ++i) {
5851 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
5852 FactorSum = Uaddo.
getReg(0);
5854 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5858 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5859 for (
unsigned i = 2; i < Factors.
size(); ++i)
5860 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5863 CarrySumPrevDstIdx = CarrySum;
5864 DstRegs[DstIdx] = FactorSum;
5881 unsigned Opcode =
MI.getOpcode();
5882 unsigned OpO, OpE, OpF;
5884 case TargetOpcode::G_SADDO:
5885 case TargetOpcode::G_SADDE:
5886 case TargetOpcode::G_UADDO:
5887 case TargetOpcode::G_UADDE:
5888 case TargetOpcode::G_ADD:
5889 OpO = TargetOpcode::G_UADDO;
5890 OpE = TargetOpcode::G_UADDE;
5891 OpF = TargetOpcode::G_UADDE;
5892 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5893 OpF = TargetOpcode::G_SADDE;
5895 case TargetOpcode::G_SSUBO:
5896 case TargetOpcode::G_SSUBE:
5897 case TargetOpcode::G_USUBO:
5898 case TargetOpcode::G_USUBE:
5899 case TargetOpcode::G_SUB:
5900 OpO = TargetOpcode::G_USUBO;
5901 OpE = TargetOpcode::G_USUBE;
5902 OpF = TargetOpcode::G_USUBE;
5903 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5904 OpF = TargetOpcode::G_SSUBE;
5911 unsigned NumDefs =
MI.getNumExplicitDefs();
5912 Register Src1 =
MI.getOperand(NumDefs).getReg();
5913 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
5916 CarryDst =
MI.getOperand(1).getReg();
5917 if (
MI.getNumOperands() == NumDefs + 3)
5918 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
5921 LLT LeftoverTy, DummyTy;
5923 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5928 int NarrowParts = Src1Regs.
size();
5929 for (
int I = 0, E = Src1Left.
size();
I != E; ++
I) {
5935 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
5940 if (i == e - 1 && CarryDst)
5941 CarryOut = CarryDst;
5945 {Src1Regs[i], Src2Regs[i]});
5946 }
else if (i == e - 1) {
5948 {Src1Regs[i], Src2Regs[i], CarryIn});
5951 {Src1Regs[i], Src2Regs[i], CarryIn});
5957 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
5958 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5959 ArrayRef(DstRegs).drop_front(NarrowParts));
5961 MI.eraseFromParent();
5967 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
5975 if (
Size % NarrowSize != 0)
5978 unsigned NumParts =
Size / NarrowSize;
5979 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
5980 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5986 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5991 MI.eraseFromParent();
6001 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6015 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6031 if (SizeOp1 % NarrowSize != 0)
6033 int NumParts = SizeOp1 / NarrowSize;
6037 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6041 uint64_t OpStart =
MI.getOperand(2).getImm();
6043 for (
int i = 0; i < NumParts; ++i) {
6044 unsigned SrcStart = i * NarrowSize;
6046 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6049 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6057 int64_t ExtractOffset;
6059 if (OpStart < SrcStart) {
6061 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6063 ExtractOffset = OpStart - SrcStart;
6064 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6068 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6080 else if (DstRegs.
size() > 1)
6084 MI.eraseFromParent();
6099 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6107 uint64_t OpStart =
MI.getOperand(3).getImm();
6109 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6110 unsigned DstStart =
I * NarrowSize;
6112 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6120 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6126 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6134 int64_t ExtractOffset, InsertOffset;
6136 if (OpStart < DstStart) {
6138 ExtractOffset = DstStart - OpStart;
6139 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6141 InsertOffset = OpStart - DstStart;
6144 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6148 if (ExtractOffset != 0 || SegSize != OpSize) {
6168 MI.eraseFromParent();
6178 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6184 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6185 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6189 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6190 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6193 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6195 {Src0Regs[I], Src1Regs[I]});
6199 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6202 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6203 DstLeftoverRegs.
push_back(Inst.getReg(0));
6206 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6207 LeftoverTy, DstLeftoverRegs);
6209 MI.eraseFromParent();
6219 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6226 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6227 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6228 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6230 MI.eraseFromParent();
6240 Register CondReg =
MI.getOperand(1).getReg();
6252 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6253 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6257 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6258 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6261 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6263 CondReg, Src1Regs[
I], Src2Regs[
I]);
6267 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6269 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6273 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6274 LeftoverTy, DstLeftoverRegs);
6276 MI.eraseFromParent();
6286 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6290 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6293 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6295 auto C_0 =
B.buildConstant(NarrowTy, 0);
6297 UnmergeSrc.getReg(1), C_0);
6298 auto LoCTLZ = IsUndef ?
6299 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6300 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6301 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6302 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6303 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6304 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6306 MI.eraseFromParent();
6319 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6323 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6326 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6328 auto C_0 =
B.buildConstant(NarrowTy, 0);
6330 UnmergeSrc.getReg(0), C_0);
6331 auto HiCTTZ = IsUndef ?
6332 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6333 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6334 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6335 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6336 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6337 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6339 MI.eraseFromParent();
6352 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6362 MI.eraseFromParent();
6382 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6383 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6384 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6385 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6387 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6389 MI.getOperand(2).setReg(Trunc.getReg(0));
6396 unsigned Opc =
MI.getOpcode();
6405 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6408 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
6412 case TargetOpcode::G_CTLZ: {
6413 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6416 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6424 MI.eraseFromParent();
6440 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6444 Op = MIBOp.getReg(0);
6449 MI.eraseFromParent();
6452 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6455 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
6459 case TargetOpcode::G_CTTZ: {
6460 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6463 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6472 MI.eraseFromParent();
6483 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6484 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6488 MI.eraseFromParent();
6492 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
6493 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6497 case TargetOpcode::G_CTPOP: {
6508 auto C_1 =
B.buildConstant(Ty, 1);
6509 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
6511 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
6512 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6513 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
6517 auto C_2 =
B.buildConstant(Ty, 2);
6518 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
6520 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
6521 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6522 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6523 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6530 auto C_4 =
B.buildConstant(Ty, 4);
6531 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
6532 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
6534 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
6535 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6537 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
6543 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
6545 auto IsMulSupported = [
this](
const LLT Ty) {
6546 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6549 if (IsMulSupported(Ty)) {
6550 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
6551 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6553 auto ResTmp = B8Count;
6554 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
6555 auto ShiftC =
B.buildConstant(Ty, Shift);
6556 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
6557 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
6559 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6561 MI.eraseFromParent();
6574 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
6582 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6591 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6592 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6615 MI.eraseFromParent();
6621 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6626 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6670 MI.eraseFromParent();
6684 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6685 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6689 return lowerFunnelShiftAsShifts(
MI);
6693 if (Result == UnableToLegalize)
6694 return lowerFunnelShiftAsShifts(
MI);
6699 auto [Dst, Src] =
MI.getFirst2Regs();
6713 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6727 {UnmergeSrc.getReg(0)});
6729 {UnmergeSrc.getReg(1)});
6734 MI.eraseFromParent();
6751 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
6755 LLT DstTy =
MRI.getType(DstReg);
6756 LLT SrcTy =
MRI.getType(SrcReg);
6776 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
6790 MI.eraseFromParent();
6799 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6801 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6802 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6805 MI.eraseFromParent();
6810 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6812 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6813 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6818 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6821 return lowerRotateWithReverseRotate(
MI);
6824 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6825 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6826 bool IsFShLegal =
false;
6827 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6828 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6832 MI.eraseFromParent();
6837 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6840 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6845 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6846 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6847 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
6853 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
6854 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6856 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6862 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
6863 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
6865 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6867 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6871 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
6872 MI.eraseFromParent();
6880 auto [Dst, Src] =
MI.getFirst2Regs();
6930 MI.eraseFromParent();
6935 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6941 MI.eraseFromParent();
6960 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6970 MI.eraseFromParent();
6995 MI.eraseFromParent();
7003 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7007 if (SrcTy !=
S64 && SrcTy !=
S32)
7009 if (DstTy !=
S32 && DstTy !=
S64)
7038 MI.eraseFromParent();
7043 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7102 MI.eraseFromParent();
7112 auto [Dst, Src] =
MI.getFirst2Regs();
7120 unsigned Flags =
MI.getFlags();
7123 MI.eraseFromParent();
7127 const unsigned ExpMask = 0x7ff;
7128 const unsigned ExpBiasf64 = 1023;
7129 const unsigned ExpBiasf16 = 15;
7218 MI.eraseFromParent();
7224 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7235 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7240 MI.eraseFromParent();
7246 case TargetOpcode::G_SMIN:
7248 case TargetOpcode::G_SMAX:
7250 case TargetOpcode::G_UMIN:
7252 case TargetOpcode::G_UMAX:
7260 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7268 MI.eraseFromParent();
7274 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
7275 const int Src0Size = Src0Ty.getScalarSizeInBits();
7276 const int Src1Size = Src1Ty.getScalarSizeInBits();
7286 if (Src0Ty == Src1Ty) {
7288 }
else if (Src0Size > Src1Size) {
7303 unsigned Flags =
MI.getFlags();
7310 MI.eraseFromParent();
7316 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7317 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7319 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7339 MI.eraseFromParent();
7347 unsigned Flags =
MI.getFlags();
7352 MI.eraseFromParent();
7358 auto [DstReg,
X] =
MI.getFirst2Regs();
7359 const unsigned Flags =
MI.getFlags();
7386 MI.eraseFromParent();
7391 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7392 unsigned Flags =
MI.getFlags();
7404 SrcReg, Zero, Flags);
7406 SrcReg, Trunc, Flags);
7411 MI.eraseFromParent();
7417 const unsigned NumOps =
MI.getNumOperands();
7418 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
7419 unsigned PartSize = Src0Ty.getSizeInBits();
7424 for (
unsigned I = 2;
I != NumOps; ++
I) {
7425 const unsigned Offset = (
I - 1) * PartSize;
7430 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
7436 ResultReg = NextResult;
7439 if (DstTy.isPointer()) {
7441 DstTy.getAddressSpace())) {
7449 MI.eraseFromParent();
7455 const unsigned NumDst =
MI.getNumOperands() - 1;
7456 Register SrcReg =
MI.getOperand(NumDst).getReg();
7457 Register Dst0Reg =
MI.getOperand(0).getReg();
7472 unsigned Offset = DstSize;
7473 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
7479 MI.eraseFromParent();
7498 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7499 InsertVal =
MI.getOperand(2).getReg();
7513 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
7519 MI.eraseFromParent();
7524 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
7542 int64_t
Offset = IdxVal * EltBytes;
7561 MI.eraseFromParent();
7567 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7568 MI.getFirst3RegLLTs();
7576 for (
int Idx : Mask) {
7578 if (!Undef.isValid())
7584 if (Src0Ty.isScalar()) {
7587 int NumElts = Src0Ty.getNumElements();
7588 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
7589 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
7596 if (DstTy.isScalar())
7600 MI.eraseFromParent();
7606 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7607 MI.getFirst4RegLLTs();
7609 if (VecTy.isScalableVector())
7634 std::optional<APInt> PassthruSplatVal =
7637 if (PassthruSplatVal.has_value()) {
7640 }
else if (HasPassthru) {
7652 unsigned NumElmts = VecTy.getNumElements();
7653 for (
unsigned I = 0;
I < NumElmts; ++
I) {
7668 if (HasPassthru &&
I == NumElmts - 1) {
7674 {OutPos, EndOfVector});
7687 MI.eraseFromParent();
7704 if (Alignment >
Align(1)) {
7716 const auto &MF = *
MI.getMF();
7717 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7722 Register AllocSize =
MI.getOperand(1).getReg();
7733 MI.eraseFromParent();
7744 MI.eraseFromParent();
7755 MI.eraseFromParent();
7761 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7762 unsigned Offset =
MI.getOperand(2).getImm();
7767 unsigned DstSize = DstTy.getSizeInBits();
7769 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7776 for (
unsigned Idx =
Offset / SrcEltSize;
7780 if (SubVectorElts.
size() == 1)
7785 MI.eraseFromParent();
7790 if (DstTy.isScalar() &&
7793 LLT SrcIntTy = SrcTy;
7807 MI.eraseFromParent();
7815 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
7827 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7840 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
7842 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
7855 MI.eraseFromParent();
7869 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
7873 LLT IntDstTy = DstTy;
7899 MI.eraseFromParent();
7905 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
7906 MI.getFirst4RegLLTs();
7907 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
7910 LLT BoolTy = Dst1Ty;
7929 auto ResultLowerThanLHS =
7937 MI.eraseFromParent();
7944 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
7949 switch (
MI.getOpcode()) {
7952 case TargetOpcode::G_UADDSAT:
7955 BaseOp = TargetOpcode::G_ADD;
7957 case TargetOpcode::G_SADDSAT:
7960 BaseOp = TargetOpcode::G_ADD;
7962 case TargetOpcode::G_USUBSAT:
7965 BaseOp = TargetOpcode::G_SUB;
7967 case TargetOpcode::G_SSUBSAT:
7970 BaseOp = TargetOpcode::G_SUB;
8013 MI.eraseFromParent();
8019 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8024 unsigned OverflowOp;
8025 switch (
MI.getOpcode()) {
8028 case TargetOpcode::G_UADDSAT:
8031 OverflowOp = TargetOpcode::G_UADDO;
8033 case TargetOpcode::G_SADDSAT:
8036 OverflowOp = TargetOpcode::G_SADDO;
8038 case TargetOpcode::G_USUBSAT:
8041 OverflowOp = TargetOpcode::G_USUBO;
8043 case TargetOpcode::G_SSUBSAT:
8046 OverflowOp = TargetOpcode::G_SSUBO;
8052 Register Tmp = OverflowRes.getReg(0);
8053 Register Ov = OverflowRes.getReg(1);
8079 MI.eraseFromParent();
8085 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8086 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8087 "Expected shlsat opcode!");
8088 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8089 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8111 MI.eraseFromParent();
8116 auto [Dst, Src] =
MI.getFirst2Regs();
8119 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8128 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8130 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8142 Res.getInstr()->getOperand(0).setReg(Dst);
8144 MI.eraseFromParent();
8151 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8154 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8155 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8156 return B.buildOr(Dst,
LHS,
RHS);
8161 auto [Dst, Src] =
MI.getFirst2Regs();
8189 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
8209 MI.eraseFromParent();
8217 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8218 int NameOpIdx = IsRead ? 1 : 0;
8219 int ValRegIndex = IsRead ? 0 : 1;
8221 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
8223 const MDString *RegStr = cast<MDString>(
8224 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8235 MI.eraseFromParent();
8241 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
8242 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8251 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8257 MI.eraseFromParent();
8263 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8268 MI.eraseFromParent();
8273 MI.eraseFromParent();
8292 APInt ExpMask = Inf;
8310 LLT DstTyCopy = DstTy;
8325 Mask &= ~fcPosFinite;
8332 Mask &= ~fcNegFinite;
8343 Mask &= ~PartialCheck;
8352 else if (PartialCheck ==
fcZero)
8371 appendToRes(SubnormalRes);
8378 else if (PartialCheck ==
fcInf)
8391 if (PartialCheck ==
fcNan) {
8395 }
else if (PartialCheck ==
fcQNan) {
8405 Abs, InfWithQnanBitC);
8413 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
8416 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8427 appendToRes(NormalRes);
8431 MI.eraseFromParent();
8437 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8438 MI.getFirst4RegLLTs();
8440 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8449 if (MaskTy.isScalar()) {
8463 if (DstTy.isVector()) {
8466 MaskReg = ShufSplat.
getReg(0);
8471 }
else if (!DstTy.isVector()) {
8476 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8489 MI.eraseFromParent();
8495 unsigned Opcode =
MI.getOpcode();
8498 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8499 : TargetOpcode::G_UDIV,
8500 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8502 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8503 : TargetOpcode::G_UREM,
8504 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8505 MI.eraseFromParent();
8522 MI.eraseFromParent();
8537 MI.eraseFromParent();
8544 Register DestReg =
MI.getOperand(0).getReg();
8550 MI.eraseFromParent();
8577 Register ListPtr =
MI.getOperand(1).getReg();
8587 const Align A(
MI.getOperand(2).getImm());
8594 VAList = AndDst.
getReg(0);
8612 Align EltAlignment =
DL.getABITypeAlign(Ty);
8617 MI.eraseFromParent();
8632 unsigned Limit,
const MemOp &
Op,
8633 unsigned DstAS,
unsigned SrcAS,
8636 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
8646 if (
Op.isFixedDstAlign())
8654 unsigned NumMemOps = 0;
8658 while (TySize >
Size) {
8667 assert(NewTySize > 0 &&
"Could not find appropriate type");
8674 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
8676 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
8686 if (++NumMemOps > Limit)
8689 MemOps.push_back(Ty);
8701 if (!Ty.
isVector() && ValVRegAndVal) {
8702 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8710 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8733 auto &MF = *
MI.getParent()->getParent();
8734 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8735 auto &
DL = MF.getDataLayout();
8738 assert(KnownLen != 0 &&
"Have a zero length memset length!");
8740 bool DstAlignCanChange =
false;
8746 DstAlignCanChange =
true;
8749 std::vector<LLT> MemOps;
8751 const auto &DstMMO = **
MI.memoperands_begin();
8755 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8763 MF.getFunction().getAttributes(), TLI))
8766 if (DstAlignCanChange) {
8769 Align NewAlign =
DL.getABITypeAlign(IRTy);
8770 if (NewAlign > Alignment) {
8771 Alignment = NewAlign;
8781 LLT LargestTy = MemOps[0];
8782 for (
unsigned i = 1; i < MemOps.size(); i++)
8784 LargestTy = MemOps[i];
8797 unsigned DstOff = 0;
8798 unsigned Size = KnownLen;
8799 for (
unsigned I = 0;
I < MemOps.size();
I++) {
8802 if (TySize >
Size) {
8805 assert(
I == MemOps.size() - 1 &&
I != 0);
8806 DstOff -= TySize -
Size;
8817 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8824 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8830 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
8833 MIB.buildStore(
Value,
Ptr, *StoreMMO);
8838 MI.eraseFromParent();
8844 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8846 auto [Dst, Src, Len] =
MI.getFirst3Regs();
8848 const auto *MMOIt =
MI.memoperands_begin();
8850 bool IsVolatile =
MemOp->isVolatile();
8856 "inline memcpy with dynamic size is not yet supported");
8857 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8858 if (KnownLen == 0) {
8859 MI.eraseFromParent();
8863 const auto &DstMMO = **
MI.memoperands_begin();
8864 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8865 Align DstAlign = DstMMO.getBaseAlign();
8866 Align SrcAlign = SrcMMO.getBaseAlign();
8868 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8875 Align SrcAlign,
bool IsVolatile) {
8876 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8877 return lowerMemcpy(
MI, Dst, Src, KnownLen,
8878 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8885 Align SrcAlign,
bool IsVolatile) {
8886 auto &MF = *
MI.getParent()->getParent();
8887 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8888 auto &
DL = MF.getDataLayout();
8891 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
8893 bool DstAlignCanChange =
false;
8895 Align Alignment = std::min(DstAlign, SrcAlign);
8899 DstAlignCanChange =
true;
8905 std::vector<LLT> MemOps;
8907 const auto &DstMMO = **
MI.memoperands_begin();
8908 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8914 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8917 MF.getFunction().getAttributes(), TLI))
8920 if (DstAlignCanChange) {
8923 Align NewAlign =
DL.getABITypeAlign(IRTy);
8928 if (!
TRI->hasStackRealignment(MF))
8929 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
8932 if (NewAlign > Alignment) {
8933 Alignment = NewAlign;
8941 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
8949 unsigned CurrOffset = 0;
8950 unsigned Size = KnownLen;
8951 for (
auto CopyTy : MemOps) {
8954 if (CopyTy.getSizeInBytes() >
Size)
8955 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
8959 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8961 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8966 if (CurrOffset != 0) {
8970 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
8972 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8976 if (CurrOffset != 0) {
8978 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
8980 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8981 CurrOffset += CopyTy.getSizeInBytes();
8982 Size -= CopyTy.getSizeInBytes();
8985 MI.eraseFromParent();
8993 auto &MF = *
MI.getParent()->getParent();
8994 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8995 auto &
DL = MF.getDataLayout();
8998 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9000 bool DstAlignCanChange =
false;
9003 Align Alignment = std::min(DstAlign, SrcAlign);
9007 DstAlignCanChange =
true;
9010 std::vector<LLT> MemOps;
9012 const auto &DstMMO = **
MI.memoperands_begin();
9013 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9022 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9025 MF.getFunction().getAttributes(), TLI))
9028 if (DstAlignCanChange) {
9031 Align NewAlign =
DL.getABITypeAlign(IRTy);
9036 if (!
TRI->hasStackRealignment(MF))
9037 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
9040 if (NewAlign > Alignment) {
9041 Alignment = NewAlign;
9049 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9055 unsigned CurrOffset = 0;
9057 for (
auto CopyTy : MemOps) {
9060 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9064 if (CurrOffset != 0) {
9068 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9070 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9071 CurrOffset += CopyTy.getSizeInBytes();
9075 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9076 LLT CopyTy = MemOps[
I];
9079 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9082 if (CurrOffset != 0) {
9086 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9088 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9091 MI.eraseFromParent();
9097 const unsigned Opc =
MI.getOpcode();
9100 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9101 Opc == TargetOpcode::G_MEMSET) &&
9102 "Expected memcpy like instruction");
9104 auto MMOIt =
MI.memoperands_begin();
9109 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9111 if (Opc != TargetOpcode::G_MEMSET) {
9112 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9114 SrcAlign =
MemOp->getBaseAlign();
9121 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9123 if (KnownLen == 0) {
9124 MI.eraseFromParent();
9128 bool IsVolatile =
MemOp->isVolatile();
9129 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9130 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9137 if (MaxLen && KnownLen > MaxLen)
9140 if (Opc == TargetOpcode::G_MEMCPY) {
9141 auto &MF = *
MI.getParent()->getParent();
9142 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9145 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9148 if (Opc == TargetOpcode::G_MEMMOVE)
9149 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9150 if (Opc == TargetOpcode::G_MEMSET)
9151 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ ICMP_ULT
unsigned less than
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)