42#define DEBUG_TYPE "legalizer"
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
55static std::pair<int, int>
61 unsigned NumParts =
Size / NarrowSize;
62 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
65 if (LeftoverSize == 0)
70 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
107 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
114 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
124 if (isa<GIntrinsic>(
MI))
127 switch (Step.Action) {
142 return bitcast(
MI, Step.TypeIdx, Step.NewType);
145 return lower(
MI, Step.TypeIdx, Step.NewType);
162void LegalizerHelper::insertParts(
Register DstReg,
184 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
186 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
188 return mergeMixedSubvectors(DstReg, AllRegs);
193 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
209void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
212 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
219 appendVectorElts(AllElts, Leftover);
227 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
229 const int StartIdx = Regs.
size();
230 const int NumResults =
MI.getNumOperands() - 1;
232 for (
int I = 0;
I != NumResults; ++
I)
233 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
239 if (SrcTy == GCDTy) {
254 extractGCDType(Parts, GCDTy, SrcReg);
258LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
260 unsigned PadStrategy) {
265 int NumOrigSrc = VRegs.
size();
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 assert(PadStrategy == TargetOpcode::G_SEXT);
298 for (
int I = 0;
I != NumParts; ++
I) {
299 bool AllMergePartsArePadding =
true;
302 for (
int J = 0; J != NumSubParts; ++J) {
303 int Idx =
I * NumSubParts + J;
304 if (
Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
309 SubMerge[J] = VRegs[
Idx];
312 AllMergePartsArePadding =
false;
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 Remerge[
I] = AllPadReg;
335 if (NumSubParts == 1)
336 Remerge[
I] = SubMerge[0];
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[
I];
345 VRegs = std::move(Remerge);
349void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
356 if (DstTy == LCMTy) {
370 UnmergeDefs[0] = DstReg;
371 for (
unsigned I = 1;
I != NumDefs; ++
I)
383#define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397#define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_MUL:
416 case TargetOpcode::G_SDIV:
418 case TargetOpcode::G_UDIV:
420 case TargetOpcode::G_SREM:
422 case TargetOpcode::G_UREM:
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
426 case TargetOpcode::G_FADD:
428 case TargetOpcode::G_FSUB:
430 case TargetOpcode::G_FMUL:
432 case TargetOpcode::G_FDIV:
434 case TargetOpcode::G_FEXP:
436 case TargetOpcode::G_FEXP2:
438 case TargetOpcode::G_FEXP10:
440 case TargetOpcode::G_FREM:
442 case TargetOpcode::G_FPOW:
444 case TargetOpcode::G_FPOWI:
446 case TargetOpcode::G_FMA:
448 case TargetOpcode::G_FSIN:
450 case TargetOpcode::G_FCOS:
452 case TargetOpcode::G_FTAN:
454 case TargetOpcode::G_FASIN:
456 case TargetOpcode::G_FACOS:
458 case TargetOpcode::G_FATAN:
460 case TargetOpcode::G_FSINH:
462 case TargetOpcode::G_FCOSH:
464 case TargetOpcode::G_FTANH:
466 case TargetOpcode::G_FLOG10:
468 case TargetOpcode::G_FLOG:
470 case TargetOpcode::G_FLOG2:
472 case TargetOpcode::G_FLDEXP:
474 case TargetOpcode::G_FCEIL:
476 case TargetOpcode::G_FFLOOR:
478 case TargetOpcode::G_FMINNUM:
480 case TargetOpcode::G_FMAXNUM:
482 case TargetOpcode::G_FSQRT:
484 case TargetOpcode::G_FRINT:
486 case TargetOpcode::G_FNEARBYINT:
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
490 case TargetOpcode::G_INTRINSIC_LRINT:
492 case TargetOpcode::G_INTRINSIC_LLRINT:
518 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
530 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
537 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
540 Register PReg = Next->getOperand(0).getReg();
548 if (Ret->getNumImplicitOperands() != 1)
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
575 Info.OrigRet = Result;
578 (Result.Ty->isVoidTy() ||
583 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder,
Info))
587 if (
MI &&
Info.LoweredTailCall) {
588 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
599 "Expected instr following MI to be return or debug inst?");
603 }
while (
MI->getNextNode());
633 Args.push_back({MO.getReg(), OpType, 0});
635 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
646 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
650 LLT OpLLT =
MRI.getType(Reg);
651 Type *OpTy =
nullptr;
656 Args.push_back({Reg, OpTy, 0});
662 unsigned Opc =
MI.getOpcode();
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
682 const char *
Name = TLI.getLibcallName(RTLibcall);
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
696 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
699 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder,
Info))
703 if (
Info.LoweredTailCall) {
704 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
715 "Expected instr following MI to be return or debug inst?");
719 }
while (
MI.getNextNode());
729 unsigned Opc =
MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(
MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
736 return RTLIB::UNKNOWN_LIBCALL;
738#define LCALLS(A, B) \
739 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
741 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
743 case TargetOpcode::G_ATOMIC_CMPXCHG:
744 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
746 return getOutlineAtomicHelper(LC, Ordering, MemSize);
748 case TargetOpcode::G_ATOMICRMW_XCHG: {
750 return getOutlineAtomicHelper(LC, Ordering, MemSize);
752 case TargetOpcode::G_ATOMICRMW_ADD:
753 case TargetOpcode::G_ATOMICRMW_SUB: {
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
757 case TargetOpcode::G_ATOMICRMW_AND: {
759 return getOutlineAtomicHelper(LC, Ordering, MemSize);
761 case TargetOpcode::G_ATOMICRMW_OR: {
763 return getOutlineAtomicHelper(LC, Ordering, MemSize);
765 case TargetOpcode::G_ATOMICRMW_XOR: {
767 return getOutlineAtomicHelper(LC, Ordering, MemSize);
770 return RTLIB::UNKNOWN_LIBCALL;
783 unsigned Opc =
MI.getOpcode();
785 case TargetOpcode::G_ATOMIC_CMPXCHG:
786 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
789 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
790 MI.getFirst4RegLLTs();
793 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
794 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
795 NewLLT) =
MI.getFirst5RegLLTs();
805 case TargetOpcode::G_ATOMICRMW_XCHG:
806 case TargetOpcode::G_ATOMICRMW_ADD:
807 case TargetOpcode::G_ATOMICRMW_SUB:
808 case TargetOpcode::G_ATOMICRMW_AND:
809 case TargetOpcode::G_ATOMICRMW_OR:
810 case TargetOpcode::G_ATOMICRMW_XOR: {
811 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
814 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
818 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
833 const char *
Name = TLI.getLibcallName(RTLibcall);
843 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
847 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
848 if (!CLI.lowerCall(MIRBuilder,
Info))
860 case TargetOpcode::G_FPEXT:
862 case TargetOpcode::G_FPTRUNC:
864 case TargetOpcode::G_FPTOSI:
866 case TargetOpcode::G_FPTOUI:
868 case TargetOpcode::G_SITOFP:
870 case TargetOpcode::G_UITOFP:
882 {{
MI.getOperand(1).
getReg(), FromType, 0}}, LocObserver, &
MI);
888 switch (
MI.getOpcode()) {
889 case TargetOpcode::G_GET_FPENV:
890 RTLibcall = RTLIB::FEGETENV;
892 case TargetOpcode::G_SET_FPENV:
893 case TargetOpcode::G_RESET_FPENV:
894 RTLibcall = RTLIB::FESETENV;
896 case TargetOpcode::G_GET_FPMODE:
897 RTLibcall = RTLIB::FEGETMODE;
899 case TargetOpcode::G_SET_FPMODE:
900 case TargetOpcode::G_RESET_FPMODE:
901 RTLibcall = RTLIB::FESETMODE;
930 auto &Ctx = MF.getFunction().getContext();
941 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
948 LocObserver,
nullptr);
970 auto &Ctx = MF.getFunction().getContext();
986 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
992 LocObserver,
nullptr);
1009 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1011 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1028 switch (
MI.getOpcode()) {
1031 case TargetOpcode::G_MUL:
1032 case TargetOpcode::G_SDIV:
1033 case TargetOpcode::G_UDIV:
1034 case TargetOpcode::G_SREM:
1035 case TargetOpcode::G_UREM:
1036 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1045 case TargetOpcode::G_FADD:
1046 case TargetOpcode::G_FSUB:
1047 case TargetOpcode::G_FMUL:
1048 case TargetOpcode::G_FDIV:
1049 case TargetOpcode::G_FMA:
1050 case TargetOpcode::G_FPOW:
1051 case TargetOpcode::G_FREM:
1052 case TargetOpcode::G_FCOS:
1053 case TargetOpcode::G_FSIN:
1054 case TargetOpcode::G_FTAN:
1055 case TargetOpcode::G_FACOS:
1056 case TargetOpcode::G_FASIN:
1057 case TargetOpcode::G_FATAN:
1058 case TargetOpcode::G_FCOSH:
1059 case TargetOpcode::G_FSINH:
1060 case TargetOpcode::G_FTANH:
1061 case TargetOpcode::G_FLOG10:
1062 case TargetOpcode::G_FLOG:
1063 case TargetOpcode::G_FLOG2:
1064 case TargetOpcode::G_FLDEXP:
1065 case TargetOpcode::G_FEXP:
1066 case TargetOpcode::G_FEXP2:
1067 case TargetOpcode::G_FEXP10:
1068 case TargetOpcode::G_FCEIL:
1069 case TargetOpcode::G_FFLOOR:
1070 case TargetOpcode::G_FMINNUM:
1071 case TargetOpcode::G_FMAXNUM:
1072 case TargetOpcode::G_FSQRT:
1073 case TargetOpcode::G_FRINT:
1074 case TargetOpcode::G_FNEARBYINT:
1075 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1080 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1088 case TargetOpcode::G_INTRINSIC_LRINT:
1089 case TargetOpcode::G_INTRINSIC_LLRINT: {
1096 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1102 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1105 MI.eraseFromParent();
1108 case TargetOpcode::G_FPOWI: {
1115 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1119 std::initializer_list<CallLowering::ArgInfo> Args = {
1120 {
MI.getOperand(1).getReg(), HLTy, 0},
1121 {
MI.getOperand(2).getReg(), ITy, 1}};
1124 Args, LocObserver, &
MI);
1129 case TargetOpcode::G_FPEXT:
1130 case TargetOpcode::G_FPTRUNC: {
1133 if (!FromTy || !ToTy)
1141 case TargetOpcode::G_FPTOSI:
1142 case TargetOpcode::G_FPTOUI: {
1147 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1155 case TargetOpcode::G_SITOFP:
1156 case TargetOpcode::G_UITOFP: {
1160 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1168 case TargetOpcode::G_ATOMICRMW_XCHG:
1169 case TargetOpcode::G_ATOMICRMW_ADD:
1170 case TargetOpcode::G_ATOMICRMW_SUB:
1171 case TargetOpcode::G_ATOMICRMW_AND:
1172 case TargetOpcode::G_ATOMICRMW_OR:
1173 case TargetOpcode::G_ATOMICRMW_XOR:
1174 case TargetOpcode::G_ATOMIC_CMPXCHG:
1175 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1181 case TargetOpcode::G_BZERO:
1182 case TargetOpcode::G_MEMCPY:
1183 case TargetOpcode::G_MEMMOVE:
1184 case TargetOpcode::G_MEMSET: {
1189 MI.eraseFromParent();
1192 case TargetOpcode::G_GET_FPENV:
1193 case TargetOpcode::G_GET_FPMODE: {
1199 case TargetOpcode::G_SET_FPENV:
1200 case TargetOpcode::G_SET_FPMODE: {
1206 case TargetOpcode::G_RESET_FPENV:
1207 case TargetOpcode::G_RESET_FPMODE: {
1216 MI.eraseFromParent();
1226 switch (
MI.getOpcode()) {
1229 case TargetOpcode::G_IMPLICIT_DEF: {
1239 if (SizeOp0 % NarrowSize != 0) {
1240 LLT ImplicitTy = NarrowTy;
1247 MI.eraseFromParent();
1251 int NumParts = SizeOp0 / NarrowSize;
1254 for (
int i = 0; i < NumParts; ++i)
1261 MI.eraseFromParent();
1264 case TargetOpcode::G_CONSTANT: {
1266 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1269 int NumParts = TotalSize / NarrowSize;
1272 for (
int I = 0;
I != NumParts; ++
I) {
1273 unsigned Offset =
I * NarrowSize;
1280 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1282 if (LeftoverBits != 0) {
1286 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1290 insertParts(
MI.getOperand(0).getReg(),
1291 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1293 MI.eraseFromParent();
1296 case TargetOpcode::G_SEXT:
1297 case TargetOpcode::G_ZEXT:
1298 case TargetOpcode::G_ANYEXT:
1300 case TargetOpcode::G_TRUNC: {
1306 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1312 MI.eraseFromParent();
1315 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1316 case TargetOpcode::G_FREEZE: {
1327 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1334 MI.eraseFromParent();
1337 case TargetOpcode::G_ADD:
1338 case TargetOpcode::G_SUB:
1339 case TargetOpcode::G_SADDO:
1340 case TargetOpcode::G_SSUBO:
1341 case TargetOpcode::G_SADDE:
1342 case TargetOpcode::G_SSUBE:
1343 case TargetOpcode::G_UADDO:
1344 case TargetOpcode::G_USUBO:
1345 case TargetOpcode::G_UADDE:
1346 case TargetOpcode::G_USUBE:
1348 case TargetOpcode::G_MUL:
1349 case TargetOpcode::G_UMULH:
1351 case TargetOpcode::G_EXTRACT:
1353 case TargetOpcode::G_INSERT:
1355 case TargetOpcode::G_LOAD: {
1356 auto &LoadMI = cast<GLoad>(
MI);
1357 Register DstReg = LoadMI.getDstReg();
1362 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1366 LoadMI.eraseFromParent();
1372 case TargetOpcode::G_ZEXTLOAD:
1373 case TargetOpcode::G_SEXTLOAD: {
1374 auto &LoadMI = cast<GExtLoad>(
MI);
1375 Register DstReg = LoadMI.getDstReg();
1376 Register PtrReg = LoadMI.getPointerReg();
1379 auto &MMO = LoadMI.getMMO();
1382 if (MemSize == NarrowSize) {
1384 }
else if (MemSize < NarrowSize) {
1386 }
else if (MemSize > NarrowSize) {
1391 if (isa<GZExtLoad>(LoadMI))
1396 LoadMI.eraseFromParent();
1399 case TargetOpcode::G_STORE: {
1400 auto &StoreMI = cast<GStore>(
MI);
1402 Register SrcReg = StoreMI.getValueReg();
1407 int NumParts = SizeOp0 / NarrowSize;
1409 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1410 if (SrcTy.
isVector() && LeftoverBits != 0)
1413 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1417 StoreMI.eraseFromParent();
1423 case TargetOpcode::G_SELECT:
1425 case TargetOpcode::G_AND:
1426 case TargetOpcode::G_OR:
1427 case TargetOpcode::G_XOR: {
1439 case TargetOpcode::G_SHL:
1440 case TargetOpcode::G_LSHR:
1441 case TargetOpcode::G_ASHR:
1443 case TargetOpcode::G_CTLZ:
1444 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1445 case TargetOpcode::G_CTTZ:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1447 case TargetOpcode::G_CTPOP:
1449 switch (
MI.getOpcode()) {
1450 case TargetOpcode::G_CTLZ:
1451 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1453 case TargetOpcode::G_CTTZ:
1454 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1456 case TargetOpcode::G_CTPOP:
1466 case TargetOpcode::G_INTTOPTR:
1474 case TargetOpcode::G_PTRTOINT:
1482 case TargetOpcode::G_PHI: {
1485 if (SizeOp0 % NarrowSize != 0)
1488 unsigned NumParts = SizeOp0 / NarrowSize;
1492 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1500 for (
unsigned i = 0; i < NumParts; ++i) {
1504 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1505 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1510 MI.eraseFromParent();
1513 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1514 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1518 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1524 case TargetOpcode::G_ICMP: {
1543 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1544 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1557 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1558 auto LHS = std::get<0>(LHSAndRHS);
1559 auto RHS = std::get<1>(LHSAndRHS);
1567 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1568 auto LHS = std::get<0>(LHSAndRHS);
1569 auto RHS = std::get<1>(LHSAndRHS);
1571 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1572 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1573 TargetOpcode::G_ZEXT);
1580 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1582 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1587 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1588 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1600 MI.eraseFromParent();
1603 case TargetOpcode::G_FCMP:
1612 case TargetOpcode::G_SEXT_INREG: {
1616 int64_t SizeInBits =
MI.getOperand(2).getImm();
1626 MO1.
setReg(TruncMIB.getReg(0));
1641 if (SizeOp0 % NarrowSize != 0)
1643 int NumParts = SizeOp0 / NarrowSize;
1651 for (
int i = 0; i < NumParts; ++i) {
1667 for (
int i = 0; i < NumParts; ++i) {
1670 PartialExtensionReg = DstRegs.
back();
1672 assert(PartialExtensionReg &&
1673 "Expected to visit partial extension before full");
1674 if (FullExtensionReg) {
1681 FullExtensionReg = DstRegs.
back();
1686 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1689 PartialExtensionReg = DstRegs.
back();
1696 MI.eraseFromParent();
1699 case TargetOpcode::G_BSWAP:
1700 case TargetOpcode::G_BITREVERSE: {
1701 if (SizeOp0 % NarrowSize != 0)
1706 unsigned NumParts = SizeOp0 / NarrowSize;
1707 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1710 for (
unsigned i = 0; i < NumParts; ++i) {
1712 {SrcRegs[NumParts - 1 - i]});
1719 MI.eraseFromParent();
1722 case TargetOpcode::G_PTR_ADD:
1723 case TargetOpcode::G_PTRMASK: {
1731 case TargetOpcode::G_FPTOUI:
1732 case TargetOpcode::G_FPTOSI:
1734 case TargetOpcode::G_FPEXT:
1741 case TargetOpcode::G_FLDEXP:
1742 case TargetOpcode::G_STRICT_FLDEXP:
1744 case TargetOpcode::G_VSCALE: {
1755 MI.eraseFromParent();
1783 unsigned OpIdx,
unsigned ExtOpcode) {
1786 MO.
setReg(ExtB.getReg(0));
1793 MO.
setReg(ExtB.getReg(0));
1797 unsigned OpIdx,
unsigned TruncOpcode) {
1806 unsigned OpIdx,
unsigned ExtOpcode) {
1845LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1850 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
1851 if (DstTy.isVector())
1858 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1860 unsigned NumOps =
MI.getNumOperands();
1861 unsigned NumSrc =
MI.getNumOperands() - 1;
1862 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1864 if (WideSize >= DstSize) {
1868 for (
unsigned I = 2;
I != NumOps; ++
I) {
1869 const unsigned Offset = (
I - 1) * PartSize;
1876 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1882 ResultReg = NextResult;
1885 if (WideSize > DstSize)
1887 else if (DstTy.isPointer())
1890 MI.eraseFromParent();
1915 const int GCD = std::gcd(SrcSize, WideSize);
1926 if (GCD == SrcSize) {
1930 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1936 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1938 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1942 const int PartsPerGCD = WideSize / GCD;
1946 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1961 MI.eraseFromParent();
1966LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1971 int NumDst =
MI.getNumOperands() - 1;
1972 Register SrcReg =
MI.getOperand(NumDst).getReg();
1977 Register Dst0Reg =
MI.getOperand(0).getReg();
1987 dbgs() <<
"Not casting non-integral address space integer\n");
2008 for (
int I = 1;
I != NumDst; ++
I) {
2014 MI.eraseFromParent();
2025 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2050 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2055 if (PartsPerRemerge == 1) {
2058 for (
int I = 0;
I != NumUnmerge; ++
I) {
2061 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2062 int Idx =
I * PartsPerUnmerge + J;
2064 MIB.addDef(
MI.getOperand(
Idx).getReg());
2071 MIB.addUse(Unmerge.getReg(
I));
2075 for (
int J = 0; J != NumUnmerge; ++J)
2076 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2079 for (
int I = 0;
I != NumDst; ++
I) {
2080 for (
int J = 0; J < PartsPerRemerge; ++J) {
2081 const int Idx =
I * PartsPerRemerge + J;
2086 RemergeParts.
clear();
2090 MI.eraseFromParent();
2095LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2097 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2098 unsigned Offset =
MI.getOperand(2).getImm();
2101 if (SrcTy.
isVector() || DstTy.isVector())
2117 if (DstTy.isPointer())
2124 MI.eraseFromParent();
2129 LLT ShiftTy = SrcTy;
2138 MI.eraseFromParent();
2169LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2171 if (TypeIdx != 0 || WideTy.
isVector())
2181LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2185 std::optional<Register> CarryIn;
2186 switch (
MI.getOpcode()) {
2189 case TargetOpcode::G_SADDO:
2190 Opcode = TargetOpcode::G_ADD;
2191 ExtOpcode = TargetOpcode::G_SEXT;
2193 case TargetOpcode::G_SSUBO:
2194 Opcode = TargetOpcode::G_SUB;
2195 ExtOpcode = TargetOpcode::G_SEXT;
2197 case TargetOpcode::G_UADDO:
2198 Opcode = TargetOpcode::G_ADD;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2201 case TargetOpcode::G_USUBO:
2202 Opcode = TargetOpcode::G_SUB;
2203 ExtOpcode = TargetOpcode::G_ZEXT;
2205 case TargetOpcode::G_SADDE:
2206 Opcode = TargetOpcode::G_UADDE;
2207 ExtOpcode = TargetOpcode::G_SEXT;
2208 CarryIn =
MI.getOperand(4).getReg();
2210 case TargetOpcode::G_SSUBE:
2211 Opcode = TargetOpcode::G_USUBE;
2212 ExtOpcode = TargetOpcode::G_SEXT;
2213 CarryIn =
MI.getOperand(4).getReg();
2215 case TargetOpcode::G_UADDE:
2216 Opcode = TargetOpcode::G_UADDE;
2217 ExtOpcode = TargetOpcode::G_ZEXT;
2218 CarryIn =
MI.getOperand(4).getReg();
2220 case TargetOpcode::G_USUBE:
2221 Opcode = TargetOpcode::G_USUBE;
2222 ExtOpcode = TargetOpcode::G_ZEXT;
2223 CarryIn =
MI.getOperand(4).getReg();
2244 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2247 {LHSExt, RHSExt, *CarryIn})
2259 MI.eraseFromParent();
2264LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2266 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2267 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2268 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2269 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2270 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2295 {ShiftL, ShiftR},
MI.getFlags());
2303 MI.eraseFromParent();
2308LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2317 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2320 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2327 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2336 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2339 if (WideMulCanOverflow)
2341 {LeftOperand, RightOperand});
2362 if (WideMulCanOverflow) {
2370 MI.eraseFromParent();
2376 switch (
MI.getOpcode()) {
2379 case TargetOpcode::G_ATOMICRMW_XCHG:
2380 case TargetOpcode::G_ATOMICRMW_ADD:
2381 case TargetOpcode::G_ATOMICRMW_SUB:
2382 case TargetOpcode::G_ATOMICRMW_AND:
2383 case TargetOpcode::G_ATOMICRMW_OR:
2384 case TargetOpcode::G_ATOMICRMW_XOR:
2385 case TargetOpcode::G_ATOMICRMW_MIN:
2386 case TargetOpcode::G_ATOMICRMW_MAX:
2387 case TargetOpcode::G_ATOMICRMW_UMIN:
2388 case TargetOpcode::G_ATOMICRMW_UMAX:
2389 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2395 case TargetOpcode::G_ATOMIC_CMPXCHG:
2396 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2403 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2413 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2418 case TargetOpcode::G_EXTRACT:
2419 return widenScalarExtract(
MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_INSERT:
2421 return widenScalarInsert(
MI, TypeIdx, WideTy);
2422 case TargetOpcode::G_MERGE_VALUES:
2423 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2424 case TargetOpcode::G_UNMERGE_VALUES:
2425 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2426 case TargetOpcode::G_SADDO:
2427 case TargetOpcode::G_SSUBO:
2428 case TargetOpcode::G_UADDO:
2429 case TargetOpcode::G_USUBO:
2430 case TargetOpcode::G_SADDE:
2431 case TargetOpcode::G_SSUBE:
2432 case TargetOpcode::G_UADDE:
2433 case TargetOpcode::G_USUBE:
2434 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2435 case TargetOpcode::G_UMULO:
2436 case TargetOpcode::G_SMULO:
2437 return widenScalarMulo(
MI, TypeIdx, WideTy);
2438 case TargetOpcode::G_SADDSAT:
2439 case TargetOpcode::G_SSUBSAT:
2440 case TargetOpcode::G_SSHLSAT:
2441 case TargetOpcode::G_UADDSAT:
2442 case TargetOpcode::G_USUBSAT:
2443 case TargetOpcode::G_USHLSAT:
2444 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2445 case TargetOpcode::G_CTTZ:
2446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2447 case TargetOpcode::G_CTLZ:
2448 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2449 case TargetOpcode::G_CTPOP: {
2460 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2461 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2462 ? TargetOpcode::G_ANYEXT
2463 : TargetOpcode::G_ZEXT;
2466 unsigned NewOpc =
MI.getOpcode();
2467 if (NewOpc == TargetOpcode::G_CTTZ) {
2476 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2481 if (
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2493 if (
MI.getOpcode() == TargetOpcode::G_CTLZ) {
2500 MI.eraseFromParent();
2503 case TargetOpcode::G_BSWAP: {
2512 MI.getOperand(0).setReg(DstExt);
2525 case TargetOpcode::G_BITREVERSE: {
2534 MI.getOperand(0).setReg(DstExt);
2543 case TargetOpcode::G_FREEZE:
2544 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2551 case TargetOpcode::G_ABS:
2558 case TargetOpcode::G_ADD:
2559 case TargetOpcode::G_AND:
2560 case TargetOpcode::G_MUL:
2561 case TargetOpcode::G_OR:
2562 case TargetOpcode::G_XOR:
2563 case TargetOpcode::G_SUB:
2564 case TargetOpcode::G_SHUFFLE_VECTOR:
2575 case TargetOpcode::G_SBFX:
2576 case TargetOpcode::G_UBFX:
2590 case TargetOpcode::G_SHL:
2606 case TargetOpcode::G_ROTR:
2607 case TargetOpcode::G_ROTL:
2616 case TargetOpcode::G_SDIV:
2617 case TargetOpcode::G_SREM:
2618 case TargetOpcode::G_SMIN:
2619 case TargetOpcode::G_SMAX:
2627 case TargetOpcode::G_SDIVREM:
2636 case TargetOpcode::G_ASHR:
2637 case TargetOpcode::G_LSHR:
2641 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2642 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2655 case TargetOpcode::G_UDIV:
2656 case TargetOpcode::G_UREM:
2657 case TargetOpcode::G_UMIN:
2658 case TargetOpcode::G_UMAX:
2666 case TargetOpcode::G_UDIVREM:
2675 case TargetOpcode::G_SELECT:
2692 case TargetOpcode::G_FPTOSI:
2693 case TargetOpcode::G_FPTOUI:
2694 case TargetOpcode::G_INTRINSIC_LRINT:
2695 case TargetOpcode::G_INTRINSIC_LLRINT:
2696 case TargetOpcode::G_IS_FPCLASS:
2706 case TargetOpcode::G_SITOFP:
2716 case TargetOpcode::G_UITOFP:
2726 case TargetOpcode::G_LOAD:
2727 case TargetOpcode::G_SEXTLOAD:
2728 case TargetOpcode::G_ZEXTLOAD:
2734 case TargetOpcode::G_STORE: {
2745 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2751 case TargetOpcode::G_CONSTANT: {
2755 MRI.
getType(
MI.getOperand(0).getReg()));
2756 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2757 ExtOpc == TargetOpcode::G_ANYEXT) &&
2760 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2764 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
2770 case TargetOpcode::G_FCONSTANT: {
2778 MI.eraseFromParent();
2781 case TargetOpcode::G_IMPLICIT_DEF: {
2787 case TargetOpcode::G_BRCOND:
2793 case TargetOpcode::G_FCMP:
2804 case TargetOpcode::G_ICMP:
2810 MI.getOperand(1).getPredicate()))
2811 ? TargetOpcode::G_SEXT
2812 : TargetOpcode::G_ZEXT;
2819 case TargetOpcode::G_PTR_ADD:
2820 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2826 case TargetOpcode::G_PHI: {
2827 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2830 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2842 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2850 TargetOpcode::G_ANYEXT);
2865 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2901 case TargetOpcode::G_FADD:
2902 case TargetOpcode::G_FMUL:
2903 case TargetOpcode::G_FSUB:
2904 case TargetOpcode::G_FMA:
2905 case TargetOpcode::G_FMAD:
2906 case TargetOpcode::G_FNEG:
2907 case TargetOpcode::G_FABS:
2908 case TargetOpcode::G_FCANONICALIZE:
2909 case TargetOpcode::G_FMINNUM:
2910 case TargetOpcode::G_FMAXNUM:
2911 case TargetOpcode::G_FMINNUM_IEEE:
2912 case TargetOpcode::G_FMAXNUM_IEEE:
2913 case TargetOpcode::G_FMINIMUM:
2914 case TargetOpcode::G_FMAXIMUM:
2915 case TargetOpcode::G_FDIV:
2916 case TargetOpcode::G_FREM:
2917 case TargetOpcode::G_FCEIL:
2918 case TargetOpcode::G_FFLOOR:
2919 case TargetOpcode::G_FCOS:
2920 case TargetOpcode::G_FSIN:
2921 case TargetOpcode::G_FTAN:
2922 case TargetOpcode::G_FACOS:
2923 case TargetOpcode::G_FASIN:
2924 case TargetOpcode::G_FATAN:
2925 case TargetOpcode::G_FCOSH:
2926 case TargetOpcode::G_FSINH:
2927 case TargetOpcode::G_FTANH:
2928 case TargetOpcode::G_FLOG10:
2929 case TargetOpcode::G_FLOG:
2930 case TargetOpcode::G_FLOG2:
2931 case TargetOpcode::G_FRINT:
2932 case TargetOpcode::G_FNEARBYINT:
2933 case TargetOpcode::G_FSQRT:
2934 case TargetOpcode::G_FEXP:
2935 case TargetOpcode::G_FEXP2:
2936 case TargetOpcode::G_FEXP10:
2937 case TargetOpcode::G_FPOW:
2938 case TargetOpcode::G_INTRINSIC_TRUNC:
2939 case TargetOpcode::G_INTRINSIC_ROUND:
2940 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2944 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
2950 case TargetOpcode::G_FPOWI:
2951 case TargetOpcode::G_FLDEXP:
2952 case TargetOpcode::G_STRICT_FLDEXP: {
2954 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2975 case TargetOpcode::G_FFREXP: {
2988 case TargetOpcode::G_INTTOPTR:
2996 case TargetOpcode::G_PTRTOINT:
3004 case TargetOpcode::G_BUILD_VECTOR: {
3008 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3022 case TargetOpcode::G_SEXT_INREG:
3031 case TargetOpcode::G_PTRMASK: {
3039 case TargetOpcode::G_VECREDUCE_FADD:
3040 case TargetOpcode::G_VECREDUCE_FMUL:
3041 case TargetOpcode::G_VECREDUCE_FMIN:
3042 case TargetOpcode::G_VECREDUCE_FMAX:
3043 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3044 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3058 case TargetOpcode::G_VSCALE: {
3065 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3070 case TargetOpcode::G_SPLAT_VECTOR: {
3084 auto Unmerge =
B.buildUnmerge(Ty, Src);
3085 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3094 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3096 LLT DstLLT =
MRI.getType(DstReg);
3117 MI.eraseFromParent();
3128 MI.eraseFromParent();
3135 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3140 if (DstTy.isVector()) {
3141 int NumDstElt = DstTy.getNumElements();
3145 LLT DstCastTy = DstEltTy;
3146 LLT SrcPartTy = SrcEltTy;
3150 if (NumSrcElt < NumDstElt) {
3160 SrcPartTy = SrcEltTy;
3161 }
else if (NumSrcElt > NumDstElt) {
3172 DstCastTy = DstEltTy;
3182 MI.eraseFromParent();
3186 if (DstTy.isVector()) {
3190 MI.eraseFromParent();
3206 unsigned NewEltSize,
3207 unsigned OldEltSize) {
3208 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3209 LLT IdxTy =
B.getMRI()->getType(
Idx);
3212 auto OffsetMask =
B.buildConstant(
3214 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3215 return B.buildShl(IdxTy, OffsetIdx,
3216 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3231 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3235 unsigned OldNumElts = SrcVecTy.getNumElements();
3242 if (NewNumElts > OldNumElts) {
3253 if (NewNumElts % OldNumElts != 0)
3257 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3266 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3270 NewOps[
I] = Elt.getReg(0);
3275 MI.eraseFromParent();
3279 if (NewNumElts < OldNumElts) {
3280 if (NewEltSize % OldEltSize != 0)
3302 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3321 MI.eraseFromParent();
3335 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3336 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3337 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3338 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3341 auto EltMask =
B.buildConstant(
3345 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3346 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3349 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3353 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3367 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3368 MI.getFirst4RegLLTs();
3380 if (NewNumElts < OldNumElts) {
3381 if (NewEltSize % OldEltSize != 0)
3390 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3410 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3414 MI.eraseFromParent();
3438 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3444 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3448 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3449 return UnableToLegalize;
3454 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3456 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3465 MI.eraseFromParent();
3481 if (MemSizeInBits != MemStoreSizeInBits) {
3501 if (isa<GSExtLoad>(LoadMI)) {
3504 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3513 if (DstTy != LoadTy)
3539 uint64_t LargeSplitSize, SmallSplitSize;
3544 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3554 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3579 LargeSplitSize / 8);
3583 SmallPtr, *SmallMMO);
3588 if (AnyExtTy == DstTy)
3623 if (StoreWidth != StoreSizeInBits) {
3658 uint64_t LargeSplitSize, SmallSplitSize;
3661 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3668 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3707 switch (
MI.getOpcode()) {
3708 case TargetOpcode::G_LOAD: {
3726 case TargetOpcode::G_STORE: {
3742 case TargetOpcode::G_SELECT: {
3748 dbgs() <<
"bitcast action not implemented for vector select\n");
3759 case TargetOpcode::G_AND:
3760 case TargetOpcode::G_OR:
3761 case TargetOpcode::G_XOR: {
3769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3771 case TargetOpcode::G_INSERT_VECTOR_ELT:
3773 case TargetOpcode::G_CONCAT_VECTORS:
3781void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3789 using namespace TargetOpcode;
3791 switch(
MI.getOpcode()) {
3794 case TargetOpcode::G_FCONSTANT:
3796 case TargetOpcode::G_BITCAST:
3798 case TargetOpcode::G_SREM:
3799 case TargetOpcode::G_UREM: {
3803 {MI.getOperand(1), MI.getOperand(2)});
3807 MI.eraseFromParent();
3810 case TargetOpcode::G_SADDO:
3811 case TargetOpcode::G_SSUBO:
3813 case TargetOpcode::G_UMULH:
3814 case TargetOpcode::G_SMULH:
3816 case TargetOpcode::G_SMULO:
3817 case TargetOpcode::G_UMULO: {
3820 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
3823 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3824 ? TargetOpcode::G_SMULH
3825 : TargetOpcode::G_UMULH;
3829 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3830 MI.removeOperand(1);
3841 if (Opcode == TargetOpcode::G_SMULH) {
3850 case TargetOpcode::G_FNEG: {
3851 auto [Res, SubByReg] =
MI.getFirst2Regs();
3861 MI.eraseFromParent();
3864 case TargetOpcode::G_FSUB:
3865 case TargetOpcode::G_STRICT_FSUB: {
3866 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
3872 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3877 MI.eraseFromParent();
3880 case TargetOpcode::G_FMAD:
3882 case TargetOpcode::G_FFLOOR:
3884 case TargetOpcode::G_LROUND:
3885 case TargetOpcode::G_LLROUND: {
3892 MI.eraseFromParent();
3895 case TargetOpcode::G_INTRINSIC_ROUND:
3897 case TargetOpcode::G_FRINT: {
3900 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3903 case TargetOpcode::G_INTRINSIC_LRINT:
3904 case TargetOpcode::G_INTRINSIC_LLRINT: {
3911 MI.eraseFromParent();
3914 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3915 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
3918 **
MI.memoperands_begin());
3921 MI.eraseFromParent();
3924 case TargetOpcode::G_LOAD:
3925 case TargetOpcode::G_SEXTLOAD:
3926 case TargetOpcode::G_ZEXTLOAD:
3928 case TargetOpcode::G_STORE:
3930 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3931 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3932 case TargetOpcode::G_CTLZ:
3933 case TargetOpcode::G_CTTZ:
3934 case TargetOpcode::G_CTPOP:
3937 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
3946 MI.eraseFromParent();
3950 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
3976 MI.eraseFromParent();
3980 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
3985 MI.eraseFromParent();
3989 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
4011 MI.eraseFromParent();
4039 case G_MERGE_VALUES:
4041 case G_UNMERGE_VALUES:
4043 case TargetOpcode::G_SEXT_INREG: {
4044 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4045 int64_t SizeInBits =
MI.getOperand(2).getImm();
4047 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4054 MI.eraseFromParent();
4057 case G_EXTRACT_VECTOR_ELT:
4058 case G_INSERT_VECTOR_ELT:
4060 case G_SHUFFLE_VECTOR:
4062 case G_VECTOR_COMPRESS:
4064 case G_DYN_STACKALLOC:
4068 case G_STACKRESTORE:
4078 case G_READ_REGISTER:
4079 case G_WRITE_REGISTER:
4124 case G_MEMCPY_INLINE:
4125 return lowerMemcpyInline(
MI);
4156 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4165 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4177 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4180 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4191 "Converting bits to bytes lost precision");
4198 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4215 std::initializer_list<unsigned> NonVecOpIndices) {
4216 if (
MI.getNumMemOperands() != 0)
4219 LLT VecTy =
MRI.getType(
MI.getReg(0));
4224 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4257 int NumParts, NumLeftover;
4258 std::tie(NumParts, NumLeftover) =
4261 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4262 for (
int i = 0; i < NumParts; ++i) {
4267 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4276 for (
unsigned i = 0; i <
N; ++i) {
4279 else if (
Op.isImm())
4281 else if (
Op.isPredicate())
4303 std::initializer_list<unsigned> NonVecOpIndices) {
4305 "Non-compatible opcode or not specified non-vector operands");
4308 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4309 unsigned NumDefs =
MI.getNumDefs();
4317 for (
unsigned i = 0; i < NumDefs; ++i) {
4326 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4327 ++UseIdx, ++UseNo) {
4330 MI.getOperand(UseIdx));
4335 for (
auto Reg : SplitPieces)
4340 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4344 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4346 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4347 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4350 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4351 Uses.push_back(InputOpsPieces[InputNo][i]);
4354 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4355 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4360 for (
unsigned i = 0; i < NumDefs; ++i)
4361 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4363 for (
unsigned i = 0; i < NumDefs; ++i)
4367 MI.eraseFromParent();
4376 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4377 unsigned NumDefs =
MI.getNumDefs();
4386 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4387 UseIdx += 2, ++UseNo) {
4395 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4397 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4403 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4404 Phi.addUse(InputOpsPieces[j][i]);
4405 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4415 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4420 MI.eraseFromParent();
4428 const int NumDst =
MI.getNumOperands() - 1;
4429 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4433 if (TypeIdx != 1 || NarrowTy == DstTy)
4459 const int PartsPerUnmerge = NumDst / NumUnmerge;
4461 for (
int I = 0;
I != NumUnmerge; ++
I) {
4464 for (
int J = 0; J != PartsPerUnmerge; ++J)
4465 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
4466 MIB.
addUse(Unmerge.getReg(
I));
4469 MI.eraseFromParent();
4476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4480 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
4482 if (NarrowTy == SrcTy)
4492 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
4506 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
4508 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4514 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4515 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
4516 ++i,
Offset += NumNarrowTyElts) {
4523 MI.eraseFromParent();
4527 assert(TypeIdx == 0 &&
"Bad type index");
4543 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
4546 for (
unsigned i = 0; i < NumParts; ++i) {
4548 for (
unsigned j = 0; j < NumElts; ++j)
4549 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
4555 MI.eraseFromParent();
4563 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
4565 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4567 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
4569 InsertVal =
MI.getOperand(2).getReg();
4584 IdxVal = MaybeCst->Value.getSExtValue();
4588 MI.eraseFromParent();
4593 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4596 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4597 TargetOpcode::G_ANYEXT);
4602 int64_t PartIdx = IdxVal / NewNumElts;
4611 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4612 VecParts[PartIdx] = InsertPart.getReg(0);
4616 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4621 MI.eraseFromParent();
4645 bool IsLoad = isa<GLoad>(LdStMI);
4657 int NumLeftover = -1;
4663 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4665 NumParts = NarrowRegs.
size();
4666 NumLeftover = NarrowLeftoverRegs.
size();
4683 auto MMO = LdStMI.
getMMO();
4685 unsigned NumParts,
unsigned Offset) ->
unsigned {
4688 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
4690 unsigned ByteOffset =
Offset / 8;
4700 ValRegs.push_back(Dst);
4712 unsigned HandledOffset =
4713 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4717 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4720 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4721 LeftoverTy, NarrowLeftoverRegs);
4731 using namespace TargetOpcode;
4735 switch (
MI.getOpcode()) {
4736 case G_IMPLICIT_DEF:
4752 case G_FCANONICALIZE:
4769 case G_INTRINSIC_LRINT:
4770 case G_INTRINSIC_LLRINT:
4771 case G_INTRINSIC_ROUND:
4772 case G_INTRINSIC_ROUNDEVEN:
4773 case G_INTRINSIC_TRUNC:
4799 case G_FMINNUM_IEEE:
4800 case G_FMAXNUM_IEEE:
4820 case G_CTLZ_ZERO_UNDEF:
4822 case G_CTTZ_ZERO_UNDEF:
4836 case G_ADDRSPACE_CAST:
4849 case G_STRICT_FLDEXP:
4863 case G_UNMERGE_VALUES:
4865 case G_BUILD_VECTOR:
4866 assert(TypeIdx == 0 &&
"not a vector type index");
4868 case G_CONCAT_VECTORS:
4872 case G_EXTRACT_VECTOR_ELT:
4873 case G_INSERT_VECTOR_ELT:
4882 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4883 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4885 case G_SHUFFLE_VECTOR:
4891 case G_INTRINSIC_FPTRUNC_ROUND:
4901 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
4902 "Not a bitcast operation");
4907 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4915 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4920 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
4925 MI.eraseFromParent();
4931 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4935 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4936 MI.getFirst3RegLLTs();
4939 if (DstTy != Src1Ty)
4941 if (DstTy != Src2Ty)
4956 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4972 unsigned InputUsed[2] = {-1U, -1U};
4973 unsigned FirstMaskIdx =
High * NewElts;
4974 bool UseBuildVector =
false;
4975 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4977 int Idx = Mask[FirstMaskIdx + MaskOffset];
4982 if (Input >= std::size(Inputs)) {
4989 Idx -= Input * NewElts;
4993 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4994 if (InputUsed[OpNo] == Input) {
4997 }
else if (InputUsed[OpNo] == -1U) {
4999 InputUsed[OpNo] = Input;
5004 if (OpNo >= std::size(InputUsed)) {
5007 UseBuildVector =
true;
5015 if (UseBuildVector) {
5020 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5022 int Idx = Mask[FirstMaskIdx + MaskOffset];
5027 if (Input >= std::size(Inputs)) {
5034 Idx -= Input * NewElts;
5038 .buildExtractVectorElement(
5039 EltTy, Inputs[Input],
5046 }
else if (InputUsed[0] == -1U) {
5050 Register Op0 = Inputs[InputUsed[0]];
5054 : Inputs[InputUsed[1]];
5063 MI.eraseFromParent();
5069 auto &RdxMI = cast<GVecReduce>(
MI);
5076 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5082 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5085 const unsigned NumParts =
5091 if (DstTy != NarrowTy)
5097 unsigned NumPartsLeft = NumParts;
5098 while (NumPartsLeft > 1) {
5099 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5102 .buildInstr(ScalarOpc, {NarrowTy},
5103 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5106 SplitSrcs = PartialResults;
5107 PartialResults.
clear();
5108 NumPartsLeft = SplitSrcs.
size();
5112 MI.eraseFromParent();
5117 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5121 MI.eraseFromParent();
5125 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5135 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5138 Register Acc = PartialReductions[0];
5139 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5140 if (Part == NumParts - 1) {
5142 {Acc, PartialReductions[Part]});
5145 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5149 MI.eraseFromParent();
5155 unsigned int TypeIdx,
5157 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5158 MI.getFirst3RegLLTs();
5159 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5163 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5164 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5165 "Unexpected vecreduce opcode");
5166 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5167 ? TargetOpcode::G_FADD
5168 : TargetOpcode::G_FMUL;
5174 for (
unsigned i = 0; i < NumParts; i++)
5179 MI.eraseFromParent();
5186 unsigned ScalarOpc) {
5194 while (SplitSrcs.
size() > 1) {
5196 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5204 SplitSrcs = std::move(PartialRdxs);
5208 MI.getOperand(1).setReg(SplitSrcs[0]);
5215 const LLT HalfTy,
const LLT AmtTy) {
5223 MI.eraseFromParent();
5229 unsigned VTBits = 2 * NVTBits;
5232 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5233 if (Amt.
ugt(VTBits)) {
5235 }
else if (Amt.
ugt(NVTBits)) {
5239 }
else if (Amt == NVTBits) {
5250 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5251 if (Amt.
ugt(VTBits)) {
5253 }
else if (Amt.
ugt(NVTBits)) {
5257 }
else if (Amt == NVTBits) {
5271 if (Amt.
ugt(VTBits)) {
5274 }
else if (Amt.
ugt(NVTBits)) {
5279 }
else if (Amt == NVTBits) {
5296 MI.eraseFromParent();
5320 if (DstEltSize % 2 != 0)
5326 const unsigned NewBitSize = DstEltSize / 2;
5352 switch (
MI.getOpcode()) {
5353 case TargetOpcode::G_SHL: {
5369 ResultRegs[0] =
Lo.getReg(0);
5370 ResultRegs[1] =
Hi.getReg(0);
5373 case TargetOpcode::G_LSHR:
5374 case TargetOpcode::G_ASHR: {
5384 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5398 ResultRegs[0] =
Lo.getReg(0);
5399 ResultRegs[1] =
Hi.getReg(0);
5407 MI.eraseFromParent();
5414 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5417 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5432 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
5437 "getNeutralElementForVecReduce called with invalid opcode!");
5438 case TargetOpcode::G_VECREDUCE_ADD:
5439 case TargetOpcode::G_VECREDUCE_OR:
5440 case TargetOpcode::G_VECREDUCE_XOR:
5441 case TargetOpcode::G_VECREDUCE_UMAX:
5443 case TargetOpcode::G_VECREDUCE_MUL:
5445 case TargetOpcode::G_VECREDUCE_AND:
5446 case TargetOpcode::G_VECREDUCE_UMIN:
5449 case TargetOpcode::G_VECREDUCE_SMAX:
5452 case TargetOpcode::G_VECREDUCE_SMIN:
5455 case TargetOpcode::G_VECREDUCE_FADD:
5457 case TargetOpcode::G_VECREDUCE_FMUL:
5459 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5460 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5461 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
5462 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5470 unsigned Opc =
MI.getOpcode();
5472 case TargetOpcode::G_IMPLICIT_DEF:
5473 case TargetOpcode::G_LOAD: {
5481 case TargetOpcode::G_STORE:
5488 case TargetOpcode::G_AND:
5489 case TargetOpcode::G_OR:
5490 case TargetOpcode::G_XOR:
5491 case TargetOpcode::G_ADD:
5492 case TargetOpcode::G_SUB:
5493 case TargetOpcode::G_MUL:
5494 case TargetOpcode::G_FADD:
5495 case TargetOpcode::G_FSUB:
5496 case TargetOpcode::G_FMUL:
5497 case TargetOpcode::G_FDIV:
5498 case TargetOpcode::G_FCOPYSIGN:
5499 case TargetOpcode::G_UADDSAT:
5500 case TargetOpcode::G_USUBSAT:
5501 case TargetOpcode::G_SADDSAT:
5502 case TargetOpcode::G_SSUBSAT:
5503 case TargetOpcode::G_SMIN:
5504 case TargetOpcode::G_SMAX:
5505 case TargetOpcode::G_UMIN:
5506 case TargetOpcode::G_UMAX:
5507 case TargetOpcode::G_FMINNUM:
5508 case TargetOpcode::G_FMAXNUM:
5509 case TargetOpcode::G_FMINNUM_IEEE:
5510 case TargetOpcode::G_FMAXNUM_IEEE:
5511 case TargetOpcode::G_FMINIMUM:
5512 case TargetOpcode::G_FMAXIMUM:
5513 case TargetOpcode::G_STRICT_FADD:
5514 case TargetOpcode::G_STRICT_FSUB:
5515 case TargetOpcode::G_STRICT_FMUL:
5516 case TargetOpcode::G_SHL:
5517 case TargetOpcode::G_ASHR:
5518 case TargetOpcode::G_LSHR: {
5526 case TargetOpcode::G_FMA:
5527 case TargetOpcode::G_STRICT_FMA:
5528 case TargetOpcode::G_FSHR:
5529 case TargetOpcode::G_FSHL: {
5538 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5539 case TargetOpcode::G_EXTRACT:
5546 case TargetOpcode::G_INSERT:
5547 case TargetOpcode::G_INSERT_VECTOR_ELT:
5548 case TargetOpcode::G_FREEZE:
5549 case TargetOpcode::G_FNEG:
5550 case TargetOpcode::G_FABS:
5551 case TargetOpcode::G_FSQRT:
5552 case TargetOpcode::G_FCEIL:
5553 case TargetOpcode::G_FFLOOR:
5554 case TargetOpcode::G_FNEARBYINT:
5555 case TargetOpcode::G_FRINT:
5556 case TargetOpcode::G_INTRINSIC_ROUND:
5557 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5558 case TargetOpcode::G_INTRINSIC_TRUNC:
5559 case TargetOpcode::G_BSWAP:
5560 case TargetOpcode::G_FCANONICALIZE:
5561 case TargetOpcode::G_SEXT_INREG:
5562 case TargetOpcode::G_ABS:
5570 case TargetOpcode::G_SELECT: {
5571 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
5573 if (!CondTy.isScalar() ||
5581 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5586 if (CondTy.isVector())
5596 case TargetOpcode::G_UNMERGE_VALUES:
5598 case TargetOpcode::G_PHI:
5600 case TargetOpcode::G_SHUFFLE_VECTOR:
5602 case TargetOpcode::G_BUILD_VECTOR: {
5604 for (
auto Op :
MI.uses()) {
5614 MI.eraseFromParent();
5617 case TargetOpcode::G_SEXT:
5618 case TargetOpcode::G_ZEXT:
5619 case TargetOpcode::G_ANYEXT:
5620 case TargetOpcode::G_TRUNC:
5621 case TargetOpcode::G_FPTRUNC:
5622 case TargetOpcode::G_FPEXT:
5623 case TargetOpcode::G_FPTOSI:
5624 case TargetOpcode::G_FPTOUI:
5625 case TargetOpcode::G_SITOFP:
5626 case TargetOpcode::G_UITOFP: {
5646 case TargetOpcode::G_ICMP:
5647 case TargetOpcode::G_FCMP: {
5661 case TargetOpcode::G_BITCAST: {
5682 case TargetOpcode::G_VECREDUCE_FADD:
5683 case TargetOpcode::G_VECREDUCE_FMUL:
5684 case TargetOpcode::G_VECREDUCE_ADD:
5685 case TargetOpcode::G_VECREDUCE_MUL:
5686 case TargetOpcode::G_VECREDUCE_AND:
5687 case TargetOpcode::G_VECREDUCE_OR:
5688 case TargetOpcode::G_VECREDUCE_XOR:
5689 case TargetOpcode::G_VECREDUCE_SMAX:
5690 case TargetOpcode::G_VECREDUCE_SMIN:
5691 case TargetOpcode::G_VECREDUCE_UMAX:
5692 case TargetOpcode::G_VECREDUCE_UMIN: {
5696 auto NeutralElement = getNeutralElementForVecReduce(
5704 NeutralElement,
Idx);
5708 MO.
setReg(NewVec.getReg(0));
5720 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5722 unsigned MaskNumElts = Mask.size();
5726 if (MaskNumElts == SrcNumElts)
5729 if (MaskNumElts < SrcNumElts) {
5733 for (
unsigned I = MaskNumElts;
I < SrcNumElts; ++
I)
5739 MI.getOperand(1).getReg(),
5740 MI.getOperand(2).getReg(), NewMask);
5741 MI.eraseFromParent();
5746 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
5747 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5755 MOps1[0] =
MI.getOperand(1).getReg();
5756 MOps2[0] =
MI.getOperand(2).getReg();
5763 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
5765 if (
Idx >=
static_cast<int>(SrcNumElts))
5766 Idx += PaddedMaskNumElts - SrcNumElts;
5771 if (MaskNumElts != PaddedMaskNumElts) {
5776 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
5786 MI.eraseFromParent();
5792 unsigned int TypeIdx,
LLT MoreTy) {
5793 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
5795 unsigned NumElts = DstTy.getNumElements();
5798 if (DstTy.isVector() && Src1Ty.isVector() &&
5799 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5807 if (DstTy != Src1Ty || DstTy != Src2Ty)
5815 for (
unsigned I = 0;
I != NumElts; ++
I) {
5817 if (
Idx <
static_cast<int>(NumElts))
5822 for (
unsigned I = NumElts;
I != WidenNumElts; ++
I)
5827 MI.getOperand(1).getReg(),
5828 MI.getOperand(2).getReg(), NewMask);
5829 MI.eraseFromParent();
5838 unsigned SrcParts = Src1Regs.
size();
5839 unsigned DstParts = DstRegs.
size();
5841 unsigned DstIdx = 0;
5843 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5844 DstRegs[DstIdx] = FactorSum;
5846 unsigned CarrySumPrevDstIdx;
5849 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5851 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5852 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5854 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5858 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5859 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5861 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5871 if (DstIdx != DstParts - 1) {
5873 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
5874 FactorSum = Uaddo.
getReg(0);
5875 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
5876 for (
unsigned i = 2; i < Factors.
size(); ++i) {
5878 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
5879 FactorSum = Uaddo.
getReg(0);
5881 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5885 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5886 for (
unsigned i = 2; i < Factors.
size(); ++i)
5887 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5890 CarrySumPrevDstIdx = CarrySum;
5891 DstRegs[DstIdx] = FactorSum;
5908 unsigned Opcode =
MI.getOpcode();
5909 unsigned OpO, OpE, OpF;
5911 case TargetOpcode::G_SADDO:
5912 case TargetOpcode::G_SADDE:
5913 case TargetOpcode::G_UADDO:
5914 case TargetOpcode::G_UADDE:
5915 case TargetOpcode::G_ADD:
5916 OpO = TargetOpcode::G_UADDO;
5917 OpE = TargetOpcode::G_UADDE;
5918 OpF = TargetOpcode::G_UADDE;
5919 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5920 OpF = TargetOpcode::G_SADDE;
5922 case TargetOpcode::G_SSUBO:
5923 case TargetOpcode::G_SSUBE:
5924 case TargetOpcode::G_USUBO:
5925 case TargetOpcode::G_USUBE:
5926 case TargetOpcode::G_SUB:
5927 OpO = TargetOpcode::G_USUBO;
5928 OpE = TargetOpcode::G_USUBE;
5929 OpF = TargetOpcode::G_USUBE;
5930 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5931 OpF = TargetOpcode::G_SSUBE;
5938 unsigned NumDefs =
MI.getNumExplicitDefs();
5939 Register Src1 =
MI.getOperand(NumDefs).getReg();
5940 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
5943 CarryDst =
MI.getOperand(1).getReg();
5944 if (
MI.getNumOperands() == NumDefs + 3)
5945 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
5948 LLT LeftoverTy, DummyTy;
5950 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5955 int NarrowParts = Src1Regs.
size();
5956 for (
int I = 0, E = Src1Left.
size();
I != E; ++
I) {
5962 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
5967 if (i == e - 1 && CarryDst)
5968 CarryOut = CarryDst;
5972 {Src1Regs[i], Src2Regs[i]});
5973 }
else if (i == e - 1) {
5975 {Src1Regs[i], Src2Regs[i], CarryIn});
5978 {Src1Regs[i], Src2Regs[i], CarryIn});
5984 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
5985 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5986 ArrayRef(DstRegs).drop_front(NarrowParts));
5988 MI.eraseFromParent();
5994 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
6002 if (
Size % NarrowSize != 0)
6005 unsigned NumParts =
Size / NarrowSize;
6006 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
6007 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6013 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6018 MI.eraseFromParent();
6028 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6042 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6058 if (SizeOp1 % NarrowSize != 0)
6060 int NumParts = SizeOp1 / NarrowSize;
6064 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6068 uint64_t OpStart =
MI.getOperand(2).getImm();
6070 for (
int i = 0; i < NumParts; ++i) {
6071 unsigned SrcStart = i * NarrowSize;
6073 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6076 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6084 int64_t ExtractOffset;
6086 if (OpStart < SrcStart) {
6088 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6090 ExtractOffset = OpStart - SrcStart;
6091 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6095 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6107 else if (DstRegs.
size() > 1)
6111 MI.eraseFromParent();
6126 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6134 uint64_t OpStart =
MI.getOperand(3).getImm();
6136 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6137 unsigned DstStart =
I * NarrowSize;
6139 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6147 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6153 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6161 int64_t ExtractOffset, InsertOffset;
6163 if (OpStart < DstStart) {
6165 ExtractOffset = DstStart - OpStart;
6166 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6168 InsertOffset = OpStart - DstStart;
6171 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6175 if (ExtractOffset != 0 || SegSize != OpSize) {
6195 MI.eraseFromParent();
6205 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6211 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6212 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6216 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6217 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6220 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6222 {Src0Regs[I], Src1Regs[I]});
6226 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6229 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6230 DstLeftoverRegs.
push_back(Inst.getReg(0));
6233 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6234 LeftoverTy, DstLeftoverRegs);
6236 MI.eraseFromParent();
6246 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6253 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6254 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6255 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6257 MI.eraseFromParent();
6267 Register CondReg =
MI.getOperand(1).getReg();
6279 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6280 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6284 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6285 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6288 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6290 CondReg, Src1Regs[
I], Src2Regs[
I]);
6294 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6296 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6300 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6301 LeftoverTy, DstLeftoverRegs);
6303 MI.eraseFromParent();
6313 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6317 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6320 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6322 auto C_0 =
B.buildConstant(NarrowTy, 0);
6324 UnmergeSrc.getReg(1), C_0);
6325 auto LoCTLZ = IsUndef ?
6326 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6327 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6328 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6329 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6330 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6331 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6333 MI.eraseFromParent();
6346 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6350 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6353 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6355 auto C_0 =
B.buildConstant(NarrowTy, 0);
6357 UnmergeSrc.getReg(0), C_0);
6358 auto HiCTTZ = IsUndef ?
6359 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6360 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6361 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6362 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6363 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6364 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6366 MI.eraseFromParent();
6379 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6389 MI.eraseFromParent();
6409 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6410 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6411 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6412 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6414 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6416 MI.getOperand(2).setReg(Trunc.getReg(0));
6423 unsigned Opc =
MI.getOpcode();
6432 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6435 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
6439 case TargetOpcode::G_CTLZ: {
6440 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6443 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6451 MI.eraseFromParent();
6467 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6471 Op = MIBOp.getReg(0);
6476 MI.eraseFromParent();
6479 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6482 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
6486 case TargetOpcode::G_CTTZ: {
6487 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6490 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6499 MI.eraseFromParent();
6510 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6511 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6515 MI.eraseFromParent();
6519 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
6520 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6524 case TargetOpcode::G_CTPOP: {
6535 auto C_1 =
B.buildConstant(Ty, 1);
6536 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
6538 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
6539 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6540 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
6544 auto C_2 =
B.buildConstant(Ty, 2);
6545 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
6547 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
6548 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6549 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6550 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6557 auto C_4 =
B.buildConstant(Ty, 4);
6558 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
6559 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
6561 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
6562 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6564 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
6570 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
6572 auto IsMulSupported = [
this](
const LLT Ty) {
6573 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6576 if (IsMulSupported(Ty)) {
6577 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
6578 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6580 auto ResTmp = B8Count;
6581 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
6582 auto ShiftC =
B.buildConstant(Ty, Shift);
6583 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
6584 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
6586 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6588 MI.eraseFromParent();
6601 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
6609 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6618 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6619 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6642 MI.eraseFromParent();
6648 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6653 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6697 MI.eraseFromParent();
6711 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6712 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6716 return lowerFunnelShiftAsShifts(
MI);
6720 if (Result == UnableToLegalize)
6721 return lowerFunnelShiftAsShifts(
MI);
6726 auto [Dst, Src] =
MI.getFirst2Regs();
6740 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6754 {UnmergeSrc.getReg(0)});
6756 {UnmergeSrc.getReg(1)});
6761 MI.eraseFromParent();
6778 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
6782 LLT DstTy =
MRI.getType(DstReg);
6783 LLT SrcTy =
MRI.getType(SrcReg);
6803 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
6817 MI.eraseFromParent();
6826 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6828 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6829 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6832 MI.eraseFromParent();
6837 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6839 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6840 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6845 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6848 return lowerRotateWithReverseRotate(
MI);
6851 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6852 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6853 bool IsFShLegal =
false;
6854 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6855 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6859 MI.eraseFromParent();
6864 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6867 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6872 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6873 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6874 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
6880 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
6881 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6883 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6889 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
6890 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
6892 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6894 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6898 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
6899 MI.eraseFromParent();
6907 auto [Dst, Src] =
MI.getFirst2Regs();
6957 MI.eraseFromParent();
6962 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6968 MI.eraseFromParent();
6987 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6997 MI.eraseFromParent();
7022 MI.eraseFromParent();
7030 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7034 if (SrcTy !=
S64 && SrcTy !=
S32)
7036 if (DstTy !=
S32 && DstTy !=
S64)
7065 MI.eraseFromParent();
7070 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7129 MI.eraseFromParent();
7139 auto [Dst, Src] =
MI.getFirst2Regs();
7147 unsigned Flags =
MI.getFlags();
7150 MI.eraseFromParent();
7154 const unsigned ExpMask = 0x7ff;
7155 const unsigned ExpBiasf64 = 1023;
7156 const unsigned ExpBiasf16 = 15;
7245 MI.eraseFromParent();
7251 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7262 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7267 MI.eraseFromParent();
7273 case TargetOpcode::G_SMIN:
7275 case TargetOpcode::G_SMAX:
7277 case TargetOpcode::G_UMIN:
7279 case TargetOpcode::G_UMAX:
7287 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7295 MI.eraseFromParent();
7325 MI.eraseFromParent();
7331 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
7332 const int Src0Size = Src0Ty.getScalarSizeInBits();
7333 const int Src1Size = Src1Ty.getScalarSizeInBits();
7343 if (Src0Ty == Src1Ty) {
7345 }
else if (Src0Size > Src1Size) {
7360 unsigned Flags =
MI.getFlags();
7367 MI.eraseFromParent();
7373 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7374 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7376 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7396 MI.eraseFromParent();
7404 unsigned Flags =
MI.getFlags();
7409 MI.eraseFromParent();
7415 auto [DstReg,
X] =
MI.getFirst2Regs();
7416 const unsigned Flags =
MI.getFlags();
7443 MI.eraseFromParent();
7448 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7449 unsigned Flags =
MI.getFlags();
7461 SrcReg, Zero, Flags);
7463 SrcReg, Trunc, Flags);
7468 MI.eraseFromParent();
7474 const unsigned NumOps =
MI.getNumOperands();
7475 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
7476 unsigned PartSize = Src0Ty.getSizeInBits();
7481 for (
unsigned I = 2;
I != NumOps; ++
I) {
7482 const unsigned Offset = (
I - 1) * PartSize;
7487 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
7493 ResultReg = NextResult;
7496 if (DstTy.isPointer()) {
7498 DstTy.getAddressSpace())) {
7506 MI.eraseFromParent();
7512 const unsigned NumDst =
MI.getNumOperands() - 1;
7513 Register SrcReg =
MI.getOperand(NumDst).getReg();
7514 Register Dst0Reg =
MI.getOperand(0).getReg();
7529 unsigned Offset = DstSize;
7530 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
7536 MI.eraseFromParent();
7555 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7556 InsertVal =
MI.getOperand(2).getReg();
7570 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
7576 MI.eraseFromParent();
7581 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
7599 int64_t
Offset = IdxVal * EltBytes;
7618 MI.eraseFromParent();
7624 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7625 MI.getFirst3RegLLTs();
7633 for (
int Idx : Mask) {
7635 if (!Undef.isValid())
7641 if (Src0Ty.isScalar()) {
7644 int NumElts = Src0Ty.getNumElements();
7645 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
7646 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
7653 if (DstTy.isScalar())
7657 MI.eraseFromParent();
7663 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7664 MI.getFirst4RegLLTs();
7666 if (VecTy.isScalableVector())
7691 std::optional<APInt> PassthruSplatVal =
7694 if (PassthruSplatVal.has_value()) {
7697 }
else if (HasPassthru) {
7709 unsigned NumElmts = VecTy.getNumElements();
7710 for (
unsigned I = 0;
I < NumElmts; ++
I) {
7725 if (HasPassthru &&
I == NumElmts - 1) {
7731 {OutPos, EndOfVector});
7744 MI.eraseFromParent();
7761 if (Alignment >
Align(1)) {
7773 const auto &MF = *
MI.getMF();
7774 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7779 Register AllocSize =
MI.getOperand(1).getReg();
7790 MI.eraseFromParent();
7801 MI.eraseFromParent();
7812 MI.eraseFromParent();
7818 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7819 unsigned Offset =
MI.getOperand(2).getImm();
7824 unsigned DstSize = DstTy.getSizeInBits();
7826 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7833 for (
unsigned Idx =
Offset / SrcEltSize;
7837 if (SubVectorElts.
size() == 1)
7842 MI.eraseFromParent();
7847 if (DstTy.isScalar() &&
7850 LLT SrcIntTy = SrcTy;
7864 MI.eraseFromParent();
7872 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
7884 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7897 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
7899 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
7912 MI.eraseFromParent();
7926 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
7930 LLT IntDstTy = DstTy;
7956 MI.eraseFromParent();
7962 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
7963 MI.getFirst4RegLLTs();
7964 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
7967 LLT BoolTy = Dst1Ty;
7986 auto ResultLowerThanLHS =
7994 MI.eraseFromParent();
8001 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8006 switch (
MI.getOpcode()) {
8009 case TargetOpcode::G_UADDSAT:
8012 BaseOp = TargetOpcode::G_ADD;
8014 case TargetOpcode::G_SADDSAT:
8017 BaseOp = TargetOpcode::G_ADD;
8019 case TargetOpcode::G_USUBSAT:
8022 BaseOp = TargetOpcode::G_SUB;
8024 case TargetOpcode::G_SSUBSAT:
8027 BaseOp = TargetOpcode::G_SUB;
8070 MI.eraseFromParent();
8076 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8081 unsigned OverflowOp;
8082 switch (
MI.getOpcode()) {
8085 case TargetOpcode::G_UADDSAT:
8088 OverflowOp = TargetOpcode::G_UADDO;
8090 case TargetOpcode::G_SADDSAT:
8093 OverflowOp = TargetOpcode::G_SADDO;
8095 case TargetOpcode::G_USUBSAT:
8098 OverflowOp = TargetOpcode::G_USUBO;
8100 case TargetOpcode::G_SSUBSAT:
8103 OverflowOp = TargetOpcode::G_SSUBO;
8109 Register Tmp = OverflowRes.getReg(0);
8110 Register Ov = OverflowRes.getReg(1);
8136 MI.eraseFromParent();
8142 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8143 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8144 "Expected shlsat opcode!");
8145 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8146 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8168 MI.eraseFromParent();
8173 auto [Dst, Src] =
MI.getFirst2Regs();
8176 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8185 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8187 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8199 Res.getInstr()->getOperand(0).setReg(Dst);
8201 MI.eraseFromParent();
8208 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8211 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8212 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8213 return B.buildOr(Dst,
LHS,
RHS);
8218 auto [Dst, Src] =
MI.getFirst2Regs();
8246 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
8266 MI.eraseFromParent();
8274 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8275 int NameOpIdx = IsRead ? 1 : 0;
8276 int ValRegIndex = IsRead ? 0 : 1;
8278 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
8280 const MDString *RegStr = cast<MDString>(
8281 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8292 MI.eraseFromParent();
8298 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
8299 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8308 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8314 MI.eraseFromParent();
8320 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8325 MI.eraseFromParent();
8330 MI.eraseFromParent();
8349 APInt ExpMask = Inf;
8367 LLT DstTyCopy = DstTy;
8382 Mask &= ~fcPosFinite;
8389 Mask &= ~fcNegFinite;
8400 Mask &= ~PartialCheck;
8409 else if (PartialCheck ==
fcZero)
8428 appendToRes(SubnormalRes);
8435 else if (PartialCheck ==
fcInf)
8448 if (PartialCheck ==
fcNan) {
8452 }
else if (PartialCheck ==
fcQNan) {
8462 Abs, InfWithQnanBitC);
8470 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
8473 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8484 appendToRes(NormalRes);
8488 MI.eraseFromParent();
8494 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8495 MI.getFirst4RegLLTs();
8497 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8506 if (MaskTy.isScalar()) {
8520 if (DstTy.isVector()) {
8523 MaskReg = ShufSplat.
getReg(0);
8528 }
else if (!DstTy.isVector()) {
8533 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8546 MI.eraseFromParent();
8552 unsigned Opcode =
MI.getOpcode();
8555 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8556 : TargetOpcode::G_UDIV,
8557 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8559 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8560 : TargetOpcode::G_UREM,
8561 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8562 MI.eraseFromParent();
8579 MI.eraseFromParent();
8594 MI.eraseFromParent();
8601 Register DestReg =
MI.getOperand(0).getReg();
8607 MI.eraseFromParent();
8634 Register ListPtr =
MI.getOperand(1).getReg();
8644 const Align A(
MI.getOperand(2).getImm());
8651 VAList = AndDst.
getReg(0);
8669 Align EltAlignment =
DL.getABITypeAlign(Ty);
8674 MI.eraseFromParent();
8689 unsigned Limit,
const MemOp &
Op,
8690 unsigned DstAS,
unsigned SrcAS,
8693 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
8703 if (
Op.isFixedDstAlign())
8711 unsigned NumMemOps = 0;
8715 while (TySize >
Size) {
8724 assert(NewTySize > 0 &&
"Could not find appropriate type");
8731 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
8733 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
8743 if (++NumMemOps > Limit)
8746 MemOps.push_back(Ty);
8758 if (!Ty.
isVector() && ValVRegAndVal) {
8759 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8767 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8790 auto &MF = *
MI.getParent()->getParent();
8791 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8792 auto &
DL = MF.getDataLayout();
8795 assert(KnownLen != 0 &&
"Have a zero length memset length!");
8797 bool DstAlignCanChange =
false;
8803 DstAlignCanChange =
true;
8806 std::vector<LLT> MemOps;
8808 const auto &DstMMO = **
MI.memoperands_begin();
8812 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8820 MF.getFunction().getAttributes(), TLI))
8823 if (DstAlignCanChange) {
8826 Align NewAlign =
DL.getABITypeAlign(IRTy);
8827 if (NewAlign > Alignment) {
8828 Alignment = NewAlign;
8838 LLT LargestTy = MemOps[0];
8839 for (
unsigned i = 1; i < MemOps.size(); i++)
8841 LargestTy = MemOps[i];
8854 unsigned DstOff = 0;
8855 unsigned Size = KnownLen;
8856 for (
unsigned I = 0;
I < MemOps.size();
I++) {
8859 if (TySize >
Size) {
8862 assert(
I == MemOps.size() - 1 &&
I != 0);
8863 DstOff -= TySize -
Size;
8874 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8881 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8887 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
8890 MIB.buildStore(
Value,
Ptr, *StoreMMO);
8895 MI.eraseFromParent();
8901 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8903 auto [Dst, Src, Len] =
MI.getFirst3Regs();
8905 const auto *MMOIt =
MI.memoperands_begin();
8907 bool IsVolatile =
MemOp->isVolatile();
8913 "inline memcpy with dynamic size is not yet supported");
8914 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8915 if (KnownLen == 0) {
8916 MI.eraseFromParent();
8920 const auto &DstMMO = **
MI.memoperands_begin();
8921 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8922 Align DstAlign = DstMMO.getBaseAlign();
8923 Align SrcAlign = SrcMMO.getBaseAlign();
8925 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8932 Align SrcAlign,
bool IsVolatile) {
8933 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8934 return lowerMemcpy(
MI, Dst, Src, KnownLen,
8935 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8942 Align SrcAlign,
bool IsVolatile) {
8943 auto &MF = *
MI.getParent()->getParent();
8944 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8945 auto &
DL = MF.getDataLayout();
8948 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
8950 bool DstAlignCanChange =
false;
8952 Align Alignment = std::min(DstAlign, SrcAlign);
8956 DstAlignCanChange =
true;
8962 std::vector<LLT> MemOps;
8964 const auto &DstMMO = **
MI.memoperands_begin();
8965 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8971 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8974 MF.getFunction().getAttributes(), TLI))
8977 if (DstAlignCanChange) {
8980 Align NewAlign =
DL.getABITypeAlign(IRTy);
8985 if (!
TRI->hasStackRealignment(MF))
8986 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
8989 if (NewAlign > Alignment) {
8990 Alignment = NewAlign;
8998 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
9006 unsigned CurrOffset = 0;
9007 unsigned Size = KnownLen;
9008 for (
auto CopyTy : MemOps) {
9011 if (CopyTy.getSizeInBytes() >
Size)
9012 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
9016 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9018 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9023 if (CurrOffset != 0) {
9027 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9029 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9033 if (CurrOffset != 0) {
9035 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9037 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9038 CurrOffset += CopyTy.getSizeInBytes();
9039 Size -= CopyTy.getSizeInBytes();
9042 MI.eraseFromParent();
9050 auto &MF = *
MI.getParent()->getParent();
9051 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9052 auto &
DL = MF.getDataLayout();
9055 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9057 bool DstAlignCanChange =
false;
9060 Align Alignment = std::min(DstAlign, SrcAlign);
9064 DstAlignCanChange =
true;
9067 std::vector<LLT> MemOps;
9069 const auto &DstMMO = **
MI.memoperands_begin();
9070 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9079 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9082 MF.getFunction().getAttributes(), TLI))
9085 if (DstAlignCanChange) {
9088 Align NewAlign =
DL.getABITypeAlign(IRTy);
9093 if (!
TRI->hasStackRealignment(MF))
9094 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
9097 if (NewAlign > Alignment) {
9098 Alignment = NewAlign;
9106 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9112 unsigned CurrOffset = 0;
9114 for (
auto CopyTy : MemOps) {
9117 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9121 if (CurrOffset != 0) {
9125 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9127 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9128 CurrOffset += CopyTy.getSizeInBytes();
9132 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9133 LLT CopyTy = MemOps[
I];
9136 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9139 if (CurrOffset != 0) {
9143 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9145 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9148 MI.eraseFromParent();
9154 const unsigned Opc =
MI.getOpcode();
9157 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9158 Opc == TargetOpcode::G_MEMSET) &&
9159 "Expected memcpy like instruction");
9161 auto MMOIt =
MI.memoperands_begin();
9166 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9168 if (Opc != TargetOpcode::G_MEMSET) {
9169 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9171 SrcAlign =
MemOp->getBaseAlign();
9178 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9180 if (KnownLen == 0) {
9181 MI.eraseFromParent();
9185 bool IsVolatile =
MemOp->isVolatile();
9186 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9187 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9194 if (MaxLen && KnownLen > MaxLen)
9197 if (Opc == TargetOpcode::G_MEMCPY) {
9198 auto &MF = *
MI.getParent()->getParent();
9199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9202 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9205 if (Opc == TargetOpcode::G_MEMMOVE)
9206 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9207 if (Opc == TargetOpcode::G_MEMSET)
9208 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ ICMP_ULT
unsigned less than
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)