42#define DEBUG_TYPE "legalizer"
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
55static std::pair<int, int>
61 unsigned NumParts =
Size / NarrowSize;
62 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
65 if (LeftoverSize == 0)
70 if (LeftoverSize % EltSize != 0)
79 return std::make_pair(NumParts, NumLeftover);
106 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
107 LI(*MF.getSubtarget().getLegalizerInfo()),
108 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
113 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
114 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
123 if (isa<GIntrinsic>(
MI))
126 switch (Step.Action) {
141 return bitcast(
MI, Step.TypeIdx, Step.NewType);
144 return lower(
MI, Step.TypeIdx, Step.NewType);
161void LegalizerHelper::insertParts(
Register DstReg,
183 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
185 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 return mergeMixedSubvectors(DstReg, AllRegs);
192 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
208void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
211 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
218 appendVectorElts(AllElts, Leftover);
226 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228 const int StartIdx = Regs.
size();
229 const int NumResults =
MI.getNumOperands() - 1;
231 for (
int I = 0;
I != NumResults; ++
I)
232 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
238 if (SrcTy == GCDTy) {
253 extractGCDType(Parts, GCDTy, SrcReg);
257LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
259 unsigned PadStrategy) {
264 int NumOrigSrc = VRegs.
size();
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
276 assert(PadStrategy == TargetOpcode::G_SEXT);
297 for (
int I = 0;
I != NumParts; ++
I) {
298 bool AllMergePartsArePadding =
true;
301 for (
int J = 0; J != NumSubParts; ++J) {
302 int Idx =
I * NumSubParts + J;
303 if (
Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
308 SubMerge[J] = VRegs[
Idx];
311 AllMergePartsArePadding =
false;
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
330 Remerge[
I] = AllPadReg;
334 if (NumSubParts == 1)
335 Remerge[
I] = SubMerge[0];
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[
I];
344 VRegs = std::move(Remerge);
348void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
355 if (DstTy == LCMTy) {
369 UnmergeDefs[0] = DstReg;
370 for (
unsigned I = 1;
I != NumDefs; ++
I)
382#define RTLIBCASE_INT(LibcallPrefix) \
386 return RTLIB::LibcallPrefix##32; \
388 return RTLIB::LibcallPrefix##64; \
390 return RTLIB::LibcallPrefix##128; \
392 llvm_unreachable("unexpected size"); \
396#define RTLIBCASE(LibcallPrefix) \
400 return RTLIB::LibcallPrefix##32; \
402 return RTLIB::LibcallPrefix##64; \
404 return RTLIB::LibcallPrefix##80; \
406 return RTLIB::LibcallPrefix##128; \
408 llvm_unreachable("unexpected size"); \
413 case TargetOpcode::G_MUL:
415 case TargetOpcode::G_SDIV:
417 case TargetOpcode::G_UDIV:
419 case TargetOpcode::G_SREM:
421 case TargetOpcode::G_UREM:
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 case TargetOpcode::G_FADD:
427 case TargetOpcode::G_FSUB:
429 case TargetOpcode::G_FMUL:
431 case TargetOpcode::G_FDIV:
433 case TargetOpcode::G_FEXP:
435 case TargetOpcode::G_FEXP2:
437 case TargetOpcode::G_FEXP10:
439 case TargetOpcode::G_FREM:
441 case TargetOpcode::G_FPOW:
443 case TargetOpcode::G_FPOWI:
445 case TargetOpcode::G_FMA:
447 case TargetOpcode::G_FSIN:
449 case TargetOpcode::G_FCOS:
451 case TargetOpcode::G_FLOG10:
453 case TargetOpcode::G_FLOG:
455 case TargetOpcode::G_FLOG2:
457 case TargetOpcode::G_FLDEXP:
459 case TargetOpcode::G_FCEIL:
461 case TargetOpcode::G_FFLOOR:
463 case TargetOpcode::G_FMINNUM:
465 case TargetOpcode::G_FMAXNUM:
467 case TargetOpcode::G_FSQRT:
469 case TargetOpcode::G_FRINT:
471 case TargetOpcode::G_FNEARBYINT:
473 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
475 case TargetOpcode::G_INTRINSIC_LRINT:
477 case TargetOpcode::G_INTRINSIC_LLRINT:
503 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
515 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
522 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
525 Register PReg = Next->getOperand(0).getReg();
533 if (Ret->getNumImplicitOperands() != 1)
536 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
560 Info.OrigRet = Result;
563 (Result.Ty->isVoidTy() ||
568 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
569 if (!CLI.lowerCall(MIRBuilder,
Info))
572 if (
MI &&
Info.LoweredTailCall) {
573 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
584 "Expected instr following MI to be return or debug inst?");
588 }
while (
MI->getNextNode());
618 Args.push_back({MO.getReg(), OpType, 0});
620 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
631 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
635 LLT OpLLT =
MRI.getType(Reg);
636 Type *OpTy =
nullptr;
641 Args.push_back({Reg, OpTy, 0});
647 unsigned Opc =
MI.getOpcode();
649 case TargetOpcode::G_BZERO:
650 RTLibcall = RTLIB::BZERO;
652 case TargetOpcode::G_MEMCPY:
653 RTLibcall = RTLIB::MEMCPY;
654 Args[0].Flags[0].setReturned();
656 case TargetOpcode::G_MEMMOVE:
657 RTLibcall = RTLIB::MEMMOVE;
658 Args[0].Flags[0].setReturned();
660 case TargetOpcode::G_MEMSET:
661 RTLibcall = RTLIB::MEMSET;
662 Args[0].Flags[0].setReturned();
667 const char *
Name = TLI.getLibcallName(RTLibcall);
677 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
681 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
684 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
685 if (!CLI.lowerCall(MIRBuilder,
Info))
688 if (
Info.LoweredTailCall) {
689 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
700 "Expected instr following MI to be return or debug inst?");
704 }
while (
MI.getNextNode());
714 unsigned Opc =
MI.getOpcode();
715 auto &AtomicMI = cast<GMemOperation>(
MI);
716 auto &MMO = AtomicMI.getMMO();
717 auto Ordering = MMO.getMergedOrdering();
718 LLT MemType = MMO.getMemoryType();
721 return RTLIB::UNKNOWN_LIBCALL;
723#define LCALLS(A, B) \
724 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
726 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
728 case TargetOpcode::G_ATOMIC_CMPXCHG:
729 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
731 return getOutlineAtomicHelper(LC, Ordering, MemSize);
733 case TargetOpcode::G_ATOMICRMW_XCHG: {
735 return getOutlineAtomicHelper(LC, Ordering, MemSize);
737 case TargetOpcode::G_ATOMICRMW_ADD:
738 case TargetOpcode::G_ATOMICRMW_SUB: {
740 return getOutlineAtomicHelper(LC, Ordering, MemSize);
742 case TargetOpcode::G_ATOMICRMW_AND: {
744 return getOutlineAtomicHelper(LC, Ordering, MemSize);
746 case TargetOpcode::G_ATOMICRMW_OR: {
748 return getOutlineAtomicHelper(LC, Ordering, MemSize);
750 case TargetOpcode::G_ATOMICRMW_XOR: {
752 return getOutlineAtomicHelper(LC, Ordering, MemSize);
755 return RTLIB::UNKNOWN_LIBCALL;
768 unsigned Opc =
MI.getOpcode();
770 case TargetOpcode::G_ATOMIC_CMPXCHG:
771 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
774 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
775 MI.getFirst4RegLLTs();
778 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
779 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
780 NewLLT) =
MI.getFirst5RegLLTs();
790 case TargetOpcode::G_ATOMICRMW_XCHG:
791 case TargetOpcode::G_ATOMICRMW_ADD:
792 case TargetOpcode::G_ATOMICRMW_SUB:
793 case TargetOpcode::G_ATOMICRMW_AND:
794 case TargetOpcode::G_ATOMICRMW_OR:
795 case TargetOpcode::G_ATOMICRMW_XOR: {
796 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
799 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
803 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
818 const char *
Name = TLI.getLibcallName(RTLibcall);
828 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
832 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
833 if (!CLI.lowerCall(MIRBuilder,
Info))
845 case TargetOpcode::G_FPEXT:
847 case TargetOpcode::G_FPTRUNC:
849 case TargetOpcode::G_FPTOSI:
851 case TargetOpcode::G_FPTOUI:
853 case TargetOpcode::G_SITOFP:
855 case TargetOpcode::G_UITOFP:
867 {{
MI.getOperand(1).
getReg(), FromType, 0}}, LocObserver, &
MI);
873 switch (
MI.getOpcode()) {
874 case TargetOpcode::G_GET_FPENV:
875 RTLibcall = RTLIB::FEGETENV;
877 case TargetOpcode::G_SET_FPENV:
878 case TargetOpcode::G_RESET_FPENV:
879 RTLibcall = RTLIB::FESETENV;
881 case TargetOpcode::G_GET_FPMODE:
882 RTLibcall = RTLIB::FEGETMODE;
884 case TargetOpcode::G_SET_FPMODE:
885 case TargetOpcode::G_RESET_FPMODE:
886 RTLibcall = RTLIB::FESETMODE;
915 auto &Ctx = MF.getFunction().getContext();
926 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
933 LocObserver,
nullptr);
955 auto &Ctx = MF.getFunction().getContext();
971 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
977 LocObserver,
nullptr);
994 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
996 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1013 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_MUL:
1017 case TargetOpcode::G_SDIV:
1018 case TargetOpcode::G_UDIV:
1019 case TargetOpcode::G_SREM:
1020 case TargetOpcode::G_UREM:
1021 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1030 case TargetOpcode::G_FADD:
1031 case TargetOpcode::G_FSUB:
1032 case TargetOpcode::G_FMUL:
1033 case TargetOpcode::G_FDIV:
1034 case TargetOpcode::G_FMA:
1035 case TargetOpcode::G_FPOW:
1036 case TargetOpcode::G_FREM:
1037 case TargetOpcode::G_FCOS:
1038 case TargetOpcode::G_FSIN:
1039 case TargetOpcode::G_FLOG10:
1040 case TargetOpcode::G_FLOG:
1041 case TargetOpcode::G_FLOG2:
1042 case TargetOpcode::G_FLDEXP:
1043 case TargetOpcode::G_FEXP:
1044 case TargetOpcode::G_FEXP2:
1045 case TargetOpcode::G_FEXP10:
1046 case TargetOpcode::G_FCEIL:
1047 case TargetOpcode::G_FFLOOR:
1048 case TargetOpcode::G_FMINNUM:
1049 case TargetOpcode::G_FMAXNUM:
1050 case TargetOpcode::G_FSQRT:
1051 case TargetOpcode::G_FRINT:
1052 case TargetOpcode::G_FNEARBYINT:
1053 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1058 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1066 case TargetOpcode::G_INTRINSIC_LRINT:
1067 case TargetOpcode::G_INTRINSIC_LLRINT: {
1074 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1080 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1083 MI.eraseFromParent();
1086 case TargetOpcode::G_FPOWI: {
1093 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1097 std::initializer_list<CallLowering::ArgInfo> Args = {
1098 {
MI.getOperand(1).getReg(), HLTy, 0},
1099 {
MI.getOperand(2).getReg(), ITy, 1}};
1102 Args, LocObserver, &
MI);
1107 case TargetOpcode::G_FPEXT:
1108 case TargetOpcode::G_FPTRUNC: {
1111 if (!FromTy || !ToTy)
1119 case TargetOpcode::G_FPTOSI:
1120 case TargetOpcode::G_FPTOUI: {
1124 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1135 case TargetOpcode::G_SITOFP:
1136 case TargetOpcode::G_UITOFP: {
1140 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1151 case TargetOpcode::G_ATOMICRMW_XCHG:
1152 case TargetOpcode::G_ATOMICRMW_ADD:
1153 case TargetOpcode::G_ATOMICRMW_SUB:
1154 case TargetOpcode::G_ATOMICRMW_AND:
1155 case TargetOpcode::G_ATOMICRMW_OR:
1156 case TargetOpcode::G_ATOMICRMW_XOR:
1157 case TargetOpcode::G_ATOMIC_CMPXCHG:
1158 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1164 case TargetOpcode::G_BZERO:
1165 case TargetOpcode::G_MEMCPY:
1166 case TargetOpcode::G_MEMMOVE:
1167 case TargetOpcode::G_MEMSET: {
1172 MI.eraseFromParent();
1175 case TargetOpcode::G_GET_FPENV:
1176 case TargetOpcode::G_GET_FPMODE: {
1182 case TargetOpcode::G_SET_FPENV:
1183 case TargetOpcode::G_SET_FPMODE: {
1189 case TargetOpcode::G_RESET_FPENV:
1190 case TargetOpcode::G_RESET_FPMODE: {
1199 MI.eraseFromParent();
1209 switch (
MI.getOpcode()) {
1212 case TargetOpcode::G_IMPLICIT_DEF: {
1222 if (SizeOp0 % NarrowSize != 0) {
1223 LLT ImplicitTy = NarrowTy;
1230 MI.eraseFromParent();
1234 int NumParts = SizeOp0 / NarrowSize;
1237 for (
int i = 0; i < NumParts; ++i)
1244 MI.eraseFromParent();
1247 case TargetOpcode::G_CONSTANT: {
1249 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1252 int NumParts = TotalSize / NarrowSize;
1255 for (
int I = 0;
I != NumParts; ++
I) {
1256 unsigned Offset =
I * NarrowSize;
1263 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1265 if (LeftoverBits != 0) {
1269 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1273 insertParts(
MI.getOperand(0).getReg(),
1274 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1276 MI.eraseFromParent();
1279 case TargetOpcode::G_SEXT:
1280 case TargetOpcode::G_ZEXT:
1281 case TargetOpcode::G_ANYEXT:
1283 case TargetOpcode::G_TRUNC: {
1289 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1295 MI.eraseFromParent();
1299 case TargetOpcode::G_FREEZE: {
1310 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1316 MI.eraseFromParent();
1319 case TargetOpcode::G_ADD:
1320 case TargetOpcode::G_SUB:
1321 case TargetOpcode::G_SADDO:
1322 case TargetOpcode::G_SSUBO:
1323 case TargetOpcode::G_SADDE:
1324 case TargetOpcode::G_SSUBE:
1325 case TargetOpcode::G_UADDO:
1326 case TargetOpcode::G_USUBO:
1327 case TargetOpcode::G_UADDE:
1328 case TargetOpcode::G_USUBE:
1330 case TargetOpcode::G_MUL:
1331 case TargetOpcode::G_UMULH:
1333 case TargetOpcode::G_EXTRACT:
1335 case TargetOpcode::G_INSERT:
1337 case TargetOpcode::G_LOAD: {
1338 auto &LoadMI = cast<GLoad>(
MI);
1339 Register DstReg = LoadMI.getDstReg();
1344 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1348 LoadMI.eraseFromParent();
1354 case TargetOpcode::G_ZEXTLOAD:
1355 case TargetOpcode::G_SEXTLOAD: {
1356 auto &LoadMI = cast<GExtLoad>(
MI);
1357 Register DstReg = LoadMI.getDstReg();
1358 Register PtrReg = LoadMI.getPointerReg();
1361 auto &MMO = LoadMI.getMMO();
1364 if (MemSize == NarrowSize) {
1366 }
else if (MemSize < NarrowSize) {
1368 }
else if (MemSize > NarrowSize) {
1373 if (isa<GZExtLoad>(LoadMI))
1378 LoadMI.eraseFromParent();
1381 case TargetOpcode::G_STORE: {
1382 auto &StoreMI = cast<GStore>(
MI);
1384 Register SrcReg = StoreMI.getValueReg();
1389 int NumParts = SizeOp0 / NarrowSize;
1391 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1392 if (SrcTy.
isVector() && LeftoverBits != 0)
1395 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1399 StoreMI.eraseFromParent();
1405 case TargetOpcode::G_SELECT:
1407 case TargetOpcode::G_AND:
1408 case TargetOpcode::G_OR:
1409 case TargetOpcode::G_XOR: {
1421 case TargetOpcode::G_SHL:
1422 case TargetOpcode::G_LSHR:
1423 case TargetOpcode::G_ASHR:
1425 case TargetOpcode::G_CTLZ:
1426 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1427 case TargetOpcode::G_CTTZ:
1428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1429 case TargetOpcode::G_CTPOP:
1431 switch (
MI.getOpcode()) {
1432 case TargetOpcode::G_CTLZ:
1433 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1435 case TargetOpcode::G_CTTZ:
1436 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1438 case TargetOpcode::G_CTPOP:
1448 case TargetOpcode::G_INTTOPTR:
1456 case TargetOpcode::G_PTRTOINT:
1464 case TargetOpcode::G_PHI: {
1467 if (SizeOp0 % NarrowSize != 0)
1470 unsigned NumParts = SizeOp0 / NarrowSize;
1474 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1482 for (
unsigned i = 0; i < NumParts; ++i) {
1486 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1487 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1492 MI.eraseFromParent();
1495 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1496 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1500 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1506 case TargetOpcode::G_ICMP: {
1525 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1526 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1539 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1540 auto LHS = std::get<0>(LHSAndRHS);
1541 auto RHS = std::get<1>(LHSAndRHS);
1549 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1550 auto LHS = std::get<0>(LHSAndRHS);
1551 auto RHS = std::get<1>(LHSAndRHS);
1553 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1554 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1555 TargetOpcode::G_ZEXT);
1562 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1564 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1569 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1570 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1582 MI.eraseFromParent();
1585 case TargetOpcode::G_FCMP:
1594 case TargetOpcode::G_SEXT_INREG: {
1598 int64_t SizeInBits =
MI.getOperand(2).getImm();
1608 MO1.
setReg(TruncMIB.getReg(0));
1623 if (SizeOp0 % NarrowSize != 0)
1625 int NumParts = SizeOp0 / NarrowSize;
1633 for (
int i = 0; i < NumParts; ++i) {
1649 for (
int i = 0; i < NumParts; ++i) {
1652 PartialExtensionReg = DstRegs.
back();
1654 assert(PartialExtensionReg &&
1655 "Expected to visit partial extension before full");
1656 if (FullExtensionReg) {
1663 FullExtensionReg = DstRegs.
back();
1668 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1671 PartialExtensionReg = DstRegs.
back();
1678 MI.eraseFromParent();
1681 case TargetOpcode::G_BSWAP:
1682 case TargetOpcode::G_BITREVERSE: {
1683 if (SizeOp0 % NarrowSize != 0)
1688 unsigned NumParts = SizeOp0 / NarrowSize;
1689 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1692 for (
unsigned i = 0; i < NumParts; ++i) {
1694 {SrcRegs[NumParts - 1 - i]});
1701 MI.eraseFromParent();
1704 case TargetOpcode::G_PTR_ADD:
1705 case TargetOpcode::G_PTRMASK: {
1713 case TargetOpcode::G_FPTOUI:
1714 case TargetOpcode::G_FPTOSI:
1716 case TargetOpcode::G_FPEXT:
1723 case TargetOpcode::G_FLDEXP:
1724 case TargetOpcode::G_STRICT_FLDEXP:
1726 case TargetOpcode::G_VSCALE: {
1737 MI.eraseFromParent();
1765 unsigned OpIdx,
unsigned ExtOpcode) {
1768 MO.
setReg(ExtB.getReg(0));
1775 MO.
setReg(ExtB.getReg(0));
1779 unsigned OpIdx,
unsigned TruncOpcode) {
1788 unsigned OpIdx,
unsigned ExtOpcode) {
1827LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1832 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
1833 if (DstTy.isVector())
1840 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1842 unsigned NumOps =
MI.getNumOperands();
1843 unsigned NumSrc =
MI.getNumOperands() - 1;
1844 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1846 if (WideSize >= DstSize) {
1850 for (
unsigned I = 2;
I != NumOps; ++
I) {
1851 const unsigned Offset = (
I - 1) * PartSize;
1858 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1864 ResultReg = NextResult;
1867 if (WideSize > DstSize)
1869 else if (DstTy.isPointer())
1872 MI.eraseFromParent();
1897 const int GCD = std::gcd(SrcSize, WideSize);
1908 if (GCD == SrcSize) {
1912 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1918 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1920 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1924 const int PartsPerGCD = WideSize / GCD;
1928 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1943 MI.eraseFromParent();
1948LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1953 int NumDst =
MI.getNumOperands() - 1;
1954 Register SrcReg =
MI.getOperand(NumDst).getReg();
1959 Register Dst0Reg =
MI.getOperand(0).getReg();
1969 dbgs() <<
"Not casting non-integral address space integer\n");
1990 for (
int I = 1;
I != NumDst; ++
I) {
1996 MI.eraseFromParent();
2007 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2032 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2037 if (PartsPerRemerge == 1) {
2040 for (
int I = 0;
I != NumUnmerge; ++
I) {
2043 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2044 int Idx =
I * PartsPerUnmerge + J;
2046 MIB.addDef(
MI.getOperand(
Idx).getReg());
2053 MIB.addUse(Unmerge.getReg(
I));
2057 for (
int J = 0; J != NumUnmerge; ++J)
2058 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2061 for (
int I = 0;
I != NumDst; ++
I) {
2062 for (
int J = 0; J < PartsPerRemerge; ++J) {
2063 const int Idx =
I * PartsPerRemerge + J;
2068 RemergeParts.
clear();
2072 MI.eraseFromParent();
2077LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2079 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2080 unsigned Offset =
MI.getOperand(2).getImm();
2083 if (SrcTy.
isVector() || DstTy.isVector())
2099 if (DstTy.isPointer())
2106 MI.eraseFromParent();
2111 LLT ShiftTy = SrcTy;
2120 MI.eraseFromParent();
2151LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2153 if (TypeIdx != 0 || WideTy.
isVector())
2163LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2167 std::optional<Register> CarryIn;
2168 switch (
MI.getOpcode()) {
2171 case TargetOpcode::G_SADDO:
2172 Opcode = TargetOpcode::G_ADD;
2173 ExtOpcode = TargetOpcode::G_SEXT;
2175 case TargetOpcode::G_SSUBO:
2176 Opcode = TargetOpcode::G_SUB;
2177 ExtOpcode = TargetOpcode::G_SEXT;
2179 case TargetOpcode::G_UADDO:
2180 Opcode = TargetOpcode::G_ADD;
2181 ExtOpcode = TargetOpcode::G_ZEXT;
2183 case TargetOpcode::G_USUBO:
2184 Opcode = TargetOpcode::G_SUB;
2185 ExtOpcode = TargetOpcode::G_ZEXT;
2187 case TargetOpcode::G_SADDE:
2188 Opcode = TargetOpcode::G_UADDE;
2189 ExtOpcode = TargetOpcode::G_SEXT;
2190 CarryIn =
MI.getOperand(4).getReg();
2192 case TargetOpcode::G_SSUBE:
2193 Opcode = TargetOpcode::G_USUBE;
2194 ExtOpcode = TargetOpcode::G_SEXT;
2195 CarryIn =
MI.getOperand(4).getReg();
2197 case TargetOpcode::G_UADDE:
2198 Opcode = TargetOpcode::G_UADDE;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2200 CarryIn =
MI.getOperand(4).getReg();
2202 case TargetOpcode::G_USUBE:
2203 Opcode = TargetOpcode::G_USUBE;
2204 ExtOpcode = TargetOpcode::G_ZEXT;
2205 CarryIn =
MI.getOperand(4).getReg();
2226 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2229 {LHSExt, RHSExt, *CarryIn})
2241 MI.eraseFromParent();
2246LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2248 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2249 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2250 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2251 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2252 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2277 {ShiftL, ShiftR},
MI.getFlags());
2285 MI.eraseFromParent();
2290LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2299 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2302 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2309 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2318 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2321 if (WideMulCanOverflow)
2323 {LeftOperand, RightOperand});
2344 if (WideMulCanOverflow) {
2352 MI.eraseFromParent();
2358 switch (
MI.getOpcode()) {
2361 case TargetOpcode::G_ATOMICRMW_XCHG:
2362 case TargetOpcode::G_ATOMICRMW_ADD:
2363 case TargetOpcode::G_ATOMICRMW_SUB:
2364 case TargetOpcode::G_ATOMICRMW_AND:
2365 case TargetOpcode::G_ATOMICRMW_OR:
2366 case TargetOpcode::G_ATOMICRMW_XOR:
2367 case TargetOpcode::G_ATOMICRMW_MIN:
2368 case TargetOpcode::G_ATOMICRMW_MAX:
2369 case TargetOpcode::G_ATOMICRMW_UMIN:
2370 case TargetOpcode::G_ATOMICRMW_UMAX:
2371 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2377 case TargetOpcode::G_ATOMIC_CMPXCHG:
2378 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2385 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2395 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2400 case TargetOpcode::G_EXTRACT:
2401 return widenScalarExtract(
MI, TypeIdx, WideTy);
2402 case TargetOpcode::G_INSERT:
2403 return widenScalarInsert(
MI, TypeIdx, WideTy);
2404 case TargetOpcode::G_MERGE_VALUES:
2405 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2406 case TargetOpcode::G_UNMERGE_VALUES:
2407 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2408 case TargetOpcode::G_SADDO:
2409 case TargetOpcode::G_SSUBO:
2410 case TargetOpcode::G_UADDO:
2411 case TargetOpcode::G_USUBO:
2412 case TargetOpcode::G_SADDE:
2413 case TargetOpcode::G_SSUBE:
2414 case TargetOpcode::G_UADDE:
2415 case TargetOpcode::G_USUBE:
2416 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2417 case TargetOpcode::G_UMULO:
2418 case TargetOpcode::G_SMULO:
2419 return widenScalarMulo(
MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_SADDSAT:
2421 case TargetOpcode::G_SSUBSAT:
2422 case TargetOpcode::G_SSHLSAT:
2423 case TargetOpcode::G_UADDSAT:
2424 case TargetOpcode::G_USUBSAT:
2425 case TargetOpcode::G_USHLSAT:
2426 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2427 case TargetOpcode::G_CTTZ:
2428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2429 case TargetOpcode::G_CTLZ:
2430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2431 case TargetOpcode::G_CTPOP: {
2442 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2443 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2444 ? TargetOpcode::G_ANYEXT
2445 : TargetOpcode::G_ZEXT;
2448 unsigned NewOpc =
MI.getOpcode();
2449 if (NewOpc == TargetOpcode::G_CTTZ) {
2458 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2464 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
2465 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2473 MI.eraseFromParent();
2476 case TargetOpcode::G_BSWAP: {
2485 MI.getOperand(0).setReg(DstExt);
2498 case TargetOpcode::G_BITREVERSE: {
2507 MI.getOperand(0).setReg(DstExt);
2516 case TargetOpcode::G_FREEZE:
2523 case TargetOpcode::G_ABS:
2530 case TargetOpcode::G_ADD:
2531 case TargetOpcode::G_AND:
2532 case TargetOpcode::G_MUL:
2533 case TargetOpcode::G_OR:
2534 case TargetOpcode::G_XOR:
2535 case TargetOpcode::G_SUB:
2536 case TargetOpcode::G_SHUFFLE_VECTOR:
2547 case TargetOpcode::G_SBFX:
2548 case TargetOpcode::G_UBFX:
2562 case TargetOpcode::G_SHL:
2578 case TargetOpcode::G_ROTR:
2579 case TargetOpcode::G_ROTL:
2588 case TargetOpcode::G_SDIV:
2589 case TargetOpcode::G_SREM:
2590 case TargetOpcode::G_SMIN:
2591 case TargetOpcode::G_SMAX:
2599 case TargetOpcode::G_SDIVREM:
2608 case TargetOpcode::G_ASHR:
2609 case TargetOpcode::G_LSHR:
2613 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2614 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2627 case TargetOpcode::G_UDIV:
2628 case TargetOpcode::G_UREM:
2629 case TargetOpcode::G_UMIN:
2630 case TargetOpcode::G_UMAX:
2638 case TargetOpcode::G_UDIVREM:
2647 case TargetOpcode::G_SELECT:
2664 case TargetOpcode::G_FPTOSI:
2665 case TargetOpcode::G_FPTOUI:
2666 case TargetOpcode::G_INTRINSIC_LRINT:
2667 case TargetOpcode::G_INTRINSIC_LLRINT:
2668 case TargetOpcode::G_IS_FPCLASS:
2678 case TargetOpcode::G_SITOFP:
2688 case TargetOpcode::G_UITOFP:
2698 case TargetOpcode::G_LOAD:
2699 case TargetOpcode::G_SEXTLOAD:
2700 case TargetOpcode::G_ZEXTLOAD:
2706 case TargetOpcode::G_STORE: {
2717 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2723 case TargetOpcode::G_CONSTANT: {
2727 MRI.
getType(
MI.getOperand(0).getReg()));
2728 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2729 ExtOpc == TargetOpcode::G_ANYEXT) &&
2732 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2736 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
2742 case TargetOpcode::G_FCONSTANT: {
2750 MI.eraseFromParent();
2753 case TargetOpcode::G_IMPLICIT_DEF: {
2759 case TargetOpcode::G_BRCOND:
2765 case TargetOpcode::G_FCMP:
2776 case TargetOpcode::G_ICMP:
2782 MI.getOperand(1).getPredicate()))
2783 ? TargetOpcode::G_SEXT
2784 : TargetOpcode::G_ZEXT;
2791 case TargetOpcode::G_PTR_ADD:
2792 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2798 case TargetOpcode::G_PHI: {
2799 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2802 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2814 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2822 TargetOpcode::G_ANYEXT);
2837 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2873 case TargetOpcode::G_FADD:
2874 case TargetOpcode::G_FMUL:
2875 case TargetOpcode::G_FSUB:
2876 case TargetOpcode::G_FMA:
2877 case TargetOpcode::G_FMAD:
2878 case TargetOpcode::G_FNEG:
2879 case TargetOpcode::G_FABS:
2880 case TargetOpcode::G_FCANONICALIZE:
2881 case TargetOpcode::G_FMINNUM:
2882 case TargetOpcode::G_FMAXNUM:
2883 case TargetOpcode::G_FMINNUM_IEEE:
2884 case TargetOpcode::G_FMAXNUM_IEEE:
2885 case TargetOpcode::G_FMINIMUM:
2886 case TargetOpcode::G_FMAXIMUM:
2887 case TargetOpcode::G_FDIV:
2888 case TargetOpcode::G_FREM:
2889 case TargetOpcode::G_FCEIL:
2890 case TargetOpcode::G_FFLOOR:
2891 case TargetOpcode::G_FCOS:
2892 case TargetOpcode::G_FSIN:
2893 case TargetOpcode::G_FLOG10:
2894 case TargetOpcode::G_FLOG:
2895 case TargetOpcode::G_FLOG2:
2896 case TargetOpcode::G_FRINT:
2897 case TargetOpcode::G_FNEARBYINT:
2898 case TargetOpcode::G_FSQRT:
2899 case TargetOpcode::G_FEXP:
2900 case TargetOpcode::G_FEXP2:
2901 case TargetOpcode::G_FEXP10:
2902 case TargetOpcode::G_FPOW:
2903 case TargetOpcode::G_INTRINSIC_TRUNC:
2904 case TargetOpcode::G_INTRINSIC_ROUND:
2905 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2909 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
2915 case TargetOpcode::G_FPOWI:
2916 case TargetOpcode::G_FLDEXP:
2917 case TargetOpcode::G_STRICT_FLDEXP: {
2919 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2940 case TargetOpcode::G_FFREXP: {
2953 case TargetOpcode::G_INTTOPTR:
2961 case TargetOpcode::G_PTRTOINT:
2969 case TargetOpcode::G_BUILD_VECTOR: {
2973 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
2987 case TargetOpcode::G_SEXT_INREG:
2996 case TargetOpcode::G_PTRMASK: {
3004 case TargetOpcode::G_VECREDUCE_FADD:
3005 case TargetOpcode::G_VECREDUCE_FMUL:
3006 case TargetOpcode::G_VECREDUCE_FMIN:
3007 case TargetOpcode::G_VECREDUCE_FMAX:
3008 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3009 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3023 case TargetOpcode::G_VSCALE: {
3030 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3035 case TargetOpcode::G_SPLAT_VECTOR: {
3049 auto Unmerge =
B.buildUnmerge(Ty, Src);
3050 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3059 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3061 LLT DstLLT =
MRI.getType(DstReg);
3082 MI.eraseFromParent();
3093 MI.eraseFromParent();
3100 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3105 if (DstTy.isVector()) {
3106 int NumDstElt = DstTy.getNumElements();
3110 LLT DstCastTy = DstEltTy;
3111 LLT SrcPartTy = SrcEltTy;
3115 if (NumSrcElt < NumDstElt) {
3125 SrcPartTy = SrcEltTy;
3126 }
else if (NumSrcElt > NumDstElt) {
3137 DstCastTy = DstEltTy;
3147 MI.eraseFromParent();
3151 if (DstTy.isVector()) {
3155 MI.eraseFromParent();
3171 unsigned NewEltSize,
3172 unsigned OldEltSize) {
3173 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3174 LLT IdxTy =
B.getMRI()->getType(
Idx);
3177 auto OffsetMask =
B.buildConstant(
3179 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3180 return B.buildShl(IdxTy, OffsetIdx,
3181 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3196 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3200 unsigned OldNumElts = SrcVecTy.getNumElements();
3207 if (NewNumElts > OldNumElts) {
3218 if (NewNumElts % OldNumElts != 0)
3222 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3231 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3235 NewOps[
I] = Elt.getReg(0);
3240 MI.eraseFromParent();
3244 if (NewNumElts < OldNumElts) {
3245 if (NewEltSize % OldEltSize != 0)
3267 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3286 MI.eraseFromParent();
3300 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3301 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3302 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3303 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3306 auto EltMask =
B.buildConstant(
3310 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3311 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3314 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3318 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3332 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3333 MI.getFirst4RegLLTs();
3345 if (NewNumElts < OldNumElts) {
3346 if (NewEltSize % OldEltSize != 0)
3355 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3375 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3379 MI.eraseFromParent();
3398 if (MemSizeInBits != MemStoreSizeInBits) {
3418 if (isa<GSExtLoad>(LoadMI)) {
3421 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3430 if (DstTy != LoadTy)
3456 uint64_t LargeSplitSize, SmallSplitSize;
3461 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3471 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3496 LargeSplitSize / 8);
3500 SmallPtr, *SmallMMO);
3505 if (AnyExtTy == DstTy)
3540 if (StoreWidth != StoreSizeInBits) {
3575 uint64_t LargeSplitSize, SmallSplitSize;
3578 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3585 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3624 switch (
MI.getOpcode()) {
3625 case TargetOpcode::G_LOAD: {
3640 case TargetOpcode::G_STORE: {
3656 case TargetOpcode::G_SELECT: {
3662 dbgs() <<
"bitcast action not implemented for vector select\n");
3673 case TargetOpcode::G_AND:
3674 case TargetOpcode::G_OR:
3675 case TargetOpcode::G_XOR: {
3683 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3685 case TargetOpcode::G_INSERT_VECTOR_ELT:
3693void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3701 using namespace TargetOpcode;
3703 switch(
MI.getOpcode()) {
3706 case TargetOpcode::G_FCONSTANT:
3708 case TargetOpcode::G_BITCAST:
3710 case TargetOpcode::G_SREM:
3711 case TargetOpcode::G_UREM: {
3715 {MI.getOperand(1), MI.getOperand(2)});
3719 MI.eraseFromParent();
3722 case TargetOpcode::G_SADDO:
3723 case TargetOpcode::G_SSUBO:
3725 case TargetOpcode::G_UMULH:
3726 case TargetOpcode::G_SMULH:
3728 case TargetOpcode::G_SMULO:
3729 case TargetOpcode::G_UMULO: {
3732 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
3735 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3736 ? TargetOpcode::G_SMULH
3737 : TargetOpcode::G_UMULH;
3741 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3742 MI.removeOperand(1);
3753 if (Opcode == TargetOpcode::G_SMULH) {
3762 case TargetOpcode::G_FNEG: {
3763 auto [Res, SubByReg] =
MI.getFirst2Regs();
3773 MI.eraseFromParent();
3776 case TargetOpcode::G_FSUB:
3777 case TargetOpcode::G_STRICT_FSUB: {
3778 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
3784 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3789 MI.eraseFromParent();
3792 case TargetOpcode::G_FMAD:
3794 case TargetOpcode::G_FFLOOR:
3796 case TargetOpcode::G_INTRINSIC_ROUND:
3798 case TargetOpcode::G_FRINT: {
3801 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3804 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3805 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
3808 **
MI.memoperands_begin());
3811 MI.eraseFromParent();
3814 case TargetOpcode::G_LOAD:
3815 case TargetOpcode::G_SEXTLOAD:
3816 case TargetOpcode::G_ZEXTLOAD:
3818 case TargetOpcode::G_STORE:
3820 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3821 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3822 case TargetOpcode::G_CTLZ:
3823 case TargetOpcode::G_CTTZ:
3824 case TargetOpcode::G_CTPOP:
3827 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
3836 MI.eraseFromParent();
3840 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
3866 MI.eraseFromParent();
3870 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
3875 MI.eraseFromParent();
3879 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
3901 MI.eraseFromParent();
3926 case G_MERGE_VALUES:
3928 case G_UNMERGE_VALUES:
3930 case TargetOpcode::G_SEXT_INREG: {
3931 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3932 int64_t SizeInBits =
MI.getOperand(2).getImm();
3934 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
3941 MI.eraseFromParent();
3944 case G_EXTRACT_VECTOR_ELT:
3945 case G_INSERT_VECTOR_ELT:
3947 case G_SHUFFLE_VECTOR:
3949 case G_DYN_STACKALLOC:
3953 case G_STACKRESTORE:
3963 case G_READ_REGISTER:
3964 case G_WRITE_REGISTER:
4009 case G_MEMCPY_INLINE:
4010 return lowerMemcpyInline(
MI);
4041 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4050 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4062 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4065 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4076 "Converting bits to bytes lost precision");
4083 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4100 std::initializer_list<unsigned> NonVecOpIndices) {
4101 if (
MI.getNumMemOperands() != 0)
4104 LLT VecTy =
MRI.getType(
MI.getReg(0));
4109 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4142 int NumParts, NumLeftover;
4143 std::tie(NumParts, NumLeftover) =
4146 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4147 for (
int i = 0; i < NumParts; ++i) {
4152 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4161 for (
unsigned i = 0; i <
N; ++i) {
4164 else if (
Op.isImm())
4166 else if (
Op.isPredicate())
4188 std::initializer_list<unsigned> NonVecOpIndices) {
4190 "Non-compatible opcode or not specified non-vector operands");
4193 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4194 unsigned NumDefs =
MI.getNumDefs();
4202 for (
unsigned i = 0; i < NumDefs; ++i) {
4211 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4212 ++UseIdx, ++UseNo) {
4215 MI.getOperand(UseIdx));
4220 for (
auto Reg : SplitPieces)
4225 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4229 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4231 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4232 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4235 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4236 Uses.push_back(InputOpsPieces[InputNo][i]);
4239 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4240 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4245 for (
unsigned i = 0; i < NumDefs; ++i)
4246 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4248 for (
unsigned i = 0; i < NumDefs; ++i)
4252 MI.eraseFromParent();
4261 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4262 unsigned NumDefs =
MI.getNumDefs();
4271 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4272 UseIdx += 2, ++UseNo) {
4280 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4282 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4288 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4289 Phi.addUse(InputOpsPieces[j][i]);
4290 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4300 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4305 MI.eraseFromParent();
4313 const int NumDst =
MI.getNumOperands() - 1;
4314 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4318 if (TypeIdx != 1 || NarrowTy == DstTy)
4344 const int PartsPerUnmerge = NumDst / NumUnmerge;
4346 for (
int I = 0;
I != NumUnmerge; ++
I) {
4349 for (
int J = 0; J != PartsPerUnmerge; ++J)
4350 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
4351 MIB.
addUse(Unmerge.getReg(
I));
4354 MI.eraseFromParent();
4361 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4365 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
4367 if (NarrowTy == SrcTy)
4377 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
4391 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
4393 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4399 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4400 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
4401 ++i,
Offset += NumNarrowTyElts) {
4408 MI.eraseFromParent();
4412 assert(TypeIdx == 0 &&
"Bad type index");
4428 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
4431 for (
unsigned i = 0; i < NumParts; ++i) {
4433 for (
unsigned j = 0; j < NumElts; ++j)
4434 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
4440 MI.eraseFromParent();
4448 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
4450 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4452 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
4454 InsertVal =
MI.getOperand(2).getReg();
4469 IdxVal = MaybeCst->Value.getSExtValue();
4473 MI.eraseFromParent();
4478 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4481 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4482 TargetOpcode::G_ANYEXT);
4487 int64_t PartIdx = IdxVal / NewNumElts;
4496 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4497 VecParts[PartIdx] = InsertPart.getReg(0);
4501 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4506 MI.eraseFromParent();
4530 bool IsLoad = isa<GLoad>(LdStMI);
4542 int NumLeftover = -1;
4548 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4550 NumParts = NarrowRegs.
size();
4551 NumLeftover = NarrowLeftoverRegs.
size();
4568 auto MMO = LdStMI.
getMMO();
4570 unsigned NumParts,
unsigned Offset) ->
unsigned {
4573 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
4575 unsigned ByteOffset =
Offset / 8;
4585 ValRegs.push_back(Dst);
4597 unsigned HandledOffset =
4598 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4602 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4605 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4606 LeftoverTy, NarrowLeftoverRegs);
4616 using namespace TargetOpcode;
4620 switch (
MI.getOpcode()) {
4621 case G_IMPLICIT_DEF:
4637 case G_FCANONICALIZE:
4654 case G_INTRINSIC_ROUND:
4655 case G_INTRINSIC_ROUNDEVEN:
4656 case G_INTRINSIC_TRUNC:
4675 case G_FMINNUM_IEEE:
4676 case G_FMAXNUM_IEEE:
4696 case G_CTLZ_ZERO_UNDEF:
4698 case G_CTTZ_ZERO_UNDEF:
4712 case G_ADDRSPACE_CAST:
4725 case G_STRICT_FLDEXP:
4739 case G_UNMERGE_VALUES:
4741 case G_BUILD_VECTOR:
4742 assert(TypeIdx == 0 &&
"not a vector type index");
4744 case G_CONCAT_VECTORS:
4748 case G_EXTRACT_VECTOR_ELT:
4749 case G_INSERT_VECTOR_ELT:
4758 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4759 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4761 case G_SHUFFLE_VECTOR:
4767 case G_INTRINSIC_FPTRUNC_ROUND:
4777 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
4778 "Not a bitcast operation");
4783 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4791 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4796 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
4801 MI.eraseFromParent();
4807 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4811 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4812 MI.getFirst3RegLLTs();
4815 if (DstTy != Src1Ty)
4817 if (DstTy != Src2Ty)
4832 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4848 unsigned InputUsed[2] = {-1U, -1U};
4849 unsigned FirstMaskIdx =
High * NewElts;
4850 bool UseBuildVector =
false;
4851 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4853 int Idx = Mask[FirstMaskIdx + MaskOffset];
4858 if (Input >= std::size(Inputs)) {
4865 Idx -= Input * NewElts;
4869 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4870 if (InputUsed[OpNo] == Input) {
4873 }
else if (InputUsed[OpNo] == -1U) {
4875 InputUsed[OpNo] = Input;
4880 if (OpNo >= std::size(InputUsed)) {
4883 UseBuildVector =
true;
4891 if (UseBuildVector) {
4896 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4898 int Idx = Mask[FirstMaskIdx + MaskOffset];
4903 if (Input >= std::size(Inputs)) {
4910 Idx -= Input * NewElts;
4914 .buildExtractVectorElement(
4915 EltTy, Inputs[Input],
4922 }
else if (InputUsed[0] == -1U) {
4926 Register Op0 = Inputs[InputUsed[0]];
4930 : Inputs[InputUsed[1]];
4939 MI.eraseFromParent();
4945 auto &RdxMI = cast<GVecReduce>(
MI);
4952 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4958 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4961 const unsigned NumParts =
4967 if (DstTy != NarrowTy)
4973 unsigned NumPartsLeft = NumParts;
4974 while (NumPartsLeft > 1) {
4975 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
4978 .buildInstr(ScalarOpc, {NarrowTy},
4979 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
4982 SplitSrcs = PartialResults;
4983 PartialResults.
clear();
4984 NumPartsLeft = SplitSrcs.
size();
4988 MI.eraseFromParent();
4993 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
4997 MI.eraseFromParent();
5001 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5011 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5014 Register Acc = PartialReductions[0];
5015 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5016 if (Part == NumParts - 1) {
5018 {Acc, PartialReductions[Part]});
5021 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5025 MI.eraseFromParent();
5031 unsigned int TypeIdx,
5033 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5034 MI.getFirst3RegLLTs();
5035 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5039 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5040 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5041 "Unexpected vecreduce opcode");
5042 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5043 ? TargetOpcode::G_FADD
5044 : TargetOpcode::G_FMUL;
5050 for (
unsigned i = 0; i < NumParts; i++)
5055 MI.eraseFromParent();
5062 unsigned ScalarOpc) {
5070 while (SplitSrcs.
size() > 1) {
5072 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5080 SplitSrcs = std::move(PartialRdxs);
5084 MI.getOperand(1).setReg(SplitSrcs[0]);
5091 const LLT HalfTy,
const LLT AmtTy) {
5099 MI.eraseFromParent();
5105 unsigned VTBits = 2 * NVTBits;
5108 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5109 if (Amt.
ugt(VTBits)) {
5111 }
else if (Amt.
ugt(NVTBits)) {
5115 }
else if (Amt == NVTBits) {
5126 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5127 if (Amt.
ugt(VTBits)) {
5129 }
else if (Amt.
ugt(NVTBits)) {
5133 }
else if (Amt == NVTBits) {
5147 if (Amt.
ugt(VTBits)) {
5150 }
else if (Amt.
ugt(NVTBits)) {
5155 }
else if (Amt == NVTBits) {
5172 MI.eraseFromParent();
5196 if (DstEltSize % 2 != 0)
5202 const unsigned NewBitSize = DstEltSize / 2;
5228 switch (
MI.getOpcode()) {
5229 case TargetOpcode::G_SHL: {
5245 ResultRegs[0] =
Lo.getReg(0);
5246 ResultRegs[1] =
Hi.getReg(0);
5249 case TargetOpcode::G_LSHR:
5250 case TargetOpcode::G_ASHR: {
5260 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5274 ResultRegs[0] =
Lo.getReg(0);
5275 ResultRegs[1] =
Hi.getReg(0);
5283 MI.eraseFromParent();
5290 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5293 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5308 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
5313 "getNeutralElementForVecReduce called with invalid opcode!");
5314 case TargetOpcode::G_VECREDUCE_ADD:
5315 case TargetOpcode::G_VECREDUCE_OR:
5316 case TargetOpcode::G_VECREDUCE_XOR:
5317 case TargetOpcode::G_VECREDUCE_UMAX:
5319 case TargetOpcode::G_VECREDUCE_MUL:
5321 case TargetOpcode::G_VECREDUCE_AND:
5322 case TargetOpcode::G_VECREDUCE_UMIN:
5325 case TargetOpcode::G_VECREDUCE_SMAX:
5328 case TargetOpcode::G_VECREDUCE_SMIN:
5331 case TargetOpcode::G_VECREDUCE_FADD:
5333 case TargetOpcode::G_VECREDUCE_FMUL:
5335 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5336 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5337 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
5338 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5346 unsigned Opc =
MI.getOpcode();
5348 case TargetOpcode::G_IMPLICIT_DEF:
5349 case TargetOpcode::G_LOAD: {
5357 case TargetOpcode::G_STORE:
5364 case TargetOpcode::G_AND:
5365 case TargetOpcode::G_OR:
5366 case TargetOpcode::G_XOR:
5367 case TargetOpcode::G_ADD:
5368 case TargetOpcode::G_SUB:
5369 case TargetOpcode::G_MUL:
5370 case TargetOpcode::G_FADD:
5371 case TargetOpcode::G_FSUB:
5372 case TargetOpcode::G_FMUL:
5373 case TargetOpcode::G_FDIV:
5374 case TargetOpcode::G_FCOPYSIGN:
5375 case TargetOpcode::G_UADDSAT:
5376 case TargetOpcode::G_USUBSAT:
5377 case TargetOpcode::G_SADDSAT:
5378 case TargetOpcode::G_SSUBSAT:
5379 case TargetOpcode::G_SMIN:
5380 case TargetOpcode::G_SMAX:
5381 case TargetOpcode::G_UMIN:
5382 case TargetOpcode::G_UMAX:
5383 case TargetOpcode::G_FMINNUM:
5384 case TargetOpcode::G_FMAXNUM:
5385 case TargetOpcode::G_FMINNUM_IEEE:
5386 case TargetOpcode::G_FMAXNUM_IEEE:
5387 case TargetOpcode::G_FMINIMUM:
5388 case TargetOpcode::G_FMAXIMUM:
5389 case TargetOpcode::G_STRICT_FADD:
5390 case TargetOpcode::G_STRICT_FSUB:
5391 case TargetOpcode::G_STRICT_FMUL:
5392 case TargetOpcode::G_SHL:
5393 case TargetOpcode::G_ASHR:
5394 case TargetOpcode::G_LSHR: {
5402 case TargetOpcode::G_FMA:
5403 case TargetOpcode::G_STRICT_FMA:
5404 case TargetOpcode::G_FSHR:
5405 case TargetOpcode::G_FSHL: {
5414 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5415 case TargetOpcode::G_EXTRACT:
5422 case TargetOpcode::G_INSERT:
5423 case TargetOpcode::G_INSERT_VECTOR_ELT:
5424 case TargetOpcode::G_FREEZE:
5425 case TargetOpcode::G_FNEG:
5426 case TargetOpcode::G_FABS:
5427 case TargetOpcode::G_FSQRT:
5428 case TargetOpcode::G_FCEIL:
5429 case TargetOpcode::G_FFLOOR:
5430 case TargetOpcode::G_FNEARBYINT:
5431 case TargetOpcode::G_FRINT:
5432 case TargetOpcode::G_INTRINSIC_ROUND:
5433 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5434 case TargetOpcode::G_INTRINSIC_TRUNC:
5435 case TargetOpcode::G_BSWAP:
5436 case TargetOpcode::G_FCANONICALIZE:
5437 case TargetOpcode::G_SEXT_INREG:
5438 case TargetOpcode::G_ABS:
5446 case TargetOpcode::G_SELECT: {
5447 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
5449 if (!CondTy.isScalar() ||
5457 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5462 if (CondTy.isVector())
5472 case TargetOpcode::G_UNMERGE_VALUES:
5474 case TargetOpcode::G_PHI:
5476 case TargetOpcode::G_SHUFFLE_VECTOR:
5478 case TargetOpcode::G_BUILD_VECTOR: {
5480 for (
auto Op :
MI.uses()) {
5490 MI.eraseFromParent();
5493 case TargetOpcode::G_SEXT:
5494 case TargetOpcode::G_ZEXT:
5495 case TargetOpcode::G_ANYEXT:
5496 case TargetOpcode::G_TRUNC:
5497 case TargetOpcode::G_FPTRUNC:
5498 case TargetOpcode::G_FPEXT:
5499 case TargetOpcode::G_FPTOSI:
5500 case TargetOpcode::G_FPTOUI:
5501 case TargetOpcode::G_SITOFP:
5502 case TargetOpcode::G_UITOFP: {
5522 case TargetOpcode::G_ICMP:
5523 case TargetOpcode::G_FCMP: {
5537 case TargetOpcode::G_BITCAST: {
5558 case TargetOpcode::G_VECREDUCE_FADD:
5559 case TargetOpcode::G_VECREDUCE_FMUL:
5560 case TargetOpcode::G_VECREDUCE_ADD:
5561 case TargetOpcode::G_VECREDUCE_MUL:
5562 case TargetOpcode::G_VECREDUCE_AND:
5563 case TargetOpcode::G_VECREDUCE_OR:
5564 case TargetOpcode::G_VECREDUCE_XOR:
5565 case TargetOpcode::G_VECREDUCE_SMAX:
5566 case TargetOpcode::G_VECREDUCE_SMIN:
5567 case TargetOpcode::G_VECREDUCE_UMAX:
5568 case TargetOpcode::G_VECREDUCE_UMIN: {
5572 auto NeutralElement = getNeutralElementForVecReduce(
5580 NeutralElement,
Idx);
5584 MO.
setReg(NewVec.getReg(0));
5596 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5598 unsigned MaskNumElts = Mask.size();
5602 if (MaskNumElts == SrcNumElts)
5605 if (MaskNumElts < SrcNumElts) {
5609 for (
unsigned I = MaskNumElts;
I < SrcNumElts; ++
I)
5615 MI.getOperand(1).getReg(),
5616 MI.getOperand(2).getReg(), NewMask);
5617 MI.eraseFromParent();
5622 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
5623 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5631 MOps1[0] =
MI.getOperand(1).getReg();
5632 MOps2[0] =
MI.getOperand(2).getReg();
5639 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
5641 if (
Idx >=
static_cast<int>(SrcNumElts))
5642 Idx += PaddedMaskNumElts - SrcNumElts;
5647 if (MaskNumElts != PaddedMaskNumElts) {
5652 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
5662 MI.eraseFromParent();
5668 unsigned int TypeIdx,
LLT MoreTy) {
5669 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
5671 unsigned NumElts = DstTy.getNumElements();
5674 if (DstTy.isVector() && Src1Ty.isVector() &&
5675 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5683 if (DstTy != Src1Ty || DstTy != Src2Ty)
5691 for (
unsigned I = 0;
I != NumElts; ++
I) {
5693 if (
Idx <
static_cast<int>(NumElts))
5698 for (
unsigned I = NumElts;
I != WidenNumElts; ++
I)
5703 MI.getOperand(1).getReg(),
5704 MI.getOperand(2).getReg(), NewMask);
5705 MI.eraseFromParent();
5714 unsigned SrcParts = Src1Regs.
size();
5715 unsigned DstParts = DstRegs.
size();
5717 unsigned DstIdx = 0;
5719 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5720 DstRegs[DstIdx] = FactorSum;
5722 unsigned CarrySumPrevDstIdx;
5725 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5727 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5728 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5730 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5734 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5735 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5737 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5747 if (DstIdx != DstParts - 1) {
5749 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
5750 FactorSum = Uaddo.
getReg(0);
5751 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
5752 for (
unsigned i = 2; i < Factors.
size(); ++i) {
5754 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
5755 FactorSum = Uaddo.
getReg(0);
5757 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5761 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5762 for (
unsigned i = 2; i < Factors.
size(); ++i)
5763 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5766 CarrySumPrevDstIdx = CarrySum;
5767 DstRegs[DstIdx] = FactorSum;
5784 unsigned Opcode =
MI.getOpcode();
5785 unsigned OpO, OpE, OpF;
5787 case TargetOpcode::G_SADDO:
5788 case TargetOpcode::G_SADDE:
5789 case TargetOpcode::G_UADDO:
5790 case TargetOpcode::G_UADDE:
5791 case TargetOpcode::G_ADD:
5792 OpO = TargetOpcode::G_UADDO;
5793 OpE = TargetOpcode::G_UADDE;
5794 OpF = TargetOpcode::G_UADDE;
5795 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5796 OpF = TargetOpcode::G_SADDE;
5798 case TargetOpcode::G_SSUBO:
5799 case TargetOpcode::G_SSUBE:
5800 case TargetOpcode::G_USUBO:
5801 case TargetOpcode::G_USUBE:
5802 case TargetOpcode::G_SUB:
5803 OpO = TargetOpcode::G_USUBO;
5804 OpE = TargetOpcode::G_USUBE;
5805 OpF = TargetOpcode::G_USUBE;
5806 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5807 OpF = TargetOpcode::G_SSUBE;
5814 unsigned NumDefs =
MI.getNumExplicitDefs();
5815 Register Src1 =
MI.getOperand(NumDefs).getReg();
5816 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
5819 CarryDst =
MI.getOperand(1).getReg();
5820 if (
MI.getNumOperands() == NumDefs + 3)
5821 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
5824 LLT LeftoverTy, DummyTy;
5826 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5831 int NarrowParts = Src1Regs.
size();
5832 for (
int I = 0, E = Src1Left.
size();
I != E; ++
I) {
5838 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
5843 if (i == e - 1 && CarryDst)
5844 CarryOut = CarryDst;
5848 {Src1Regs[i], Src2Regs[i]});
5849 }
else if (i == e - 1) {
5851 {Src1Regs[i], Src2Regs[i], CarryIn});
5854 {Src1Regs[i], Src2Regs[i], CarryIn});
5860 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
5861 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5862 ArrayRef(DstRegs).drop_front(NarrowParts));
5864 MI.eraseFromParent();
5870 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
5878 if (
Size % NarrowSize != 0)
5881 unsigned NumParts =
Size / NarrowSize;
5882 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
5883 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5889 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5894 MI.eraseFromParent();
5904 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
5918 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5934 if (SizeOp1 % NarrowSize != 0)
5936 int NumParts = SizeOp1 / NarrowSize;
5940 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
5944 uint64_t OpStart =
MI.getOperand(2).getImm();
5946 for (
int i = 0; i < NumParts; ++i) {
5947 unsigned SrcStart = i * NarrowSize;
5949 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5952 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
5960 int64_t ExtractOffset;
5962 if (OpStart < SrcStart) {
5964 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5966 ExtractOffset = OpStart - SrcStart;
5967 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5971 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5983 else if (DstRegs.
size() > 1)
5987 MI.eraseFromParent();
6002 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6010 uint64_t OpStart =
MI.getOperand(3).getImm();
6012 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6013 unsigned DstStart =
I * NarrowSize;
6015 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6023 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6029 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6037 int64_t ExtractOffset, InsertOffset;
6039 if (OpStart < DstStart) {
6041 ExtractOffset = DstStart - OpStart;
6042 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6044 InsertOffset = OpStart - DstStart;
6047 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6051 if (ExtractOffset != 0 || SegSize != OpSize) {
6071 MI.eraseFromParent();
6081 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6087 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6088 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6092 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6093 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6096 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6098 {Src0Regs[I], Src1Regs[I]});
6102 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6105 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6106 DstLeftoverRegs.
push_back(Inst.getReg(0));
6109 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6110 LeftoverTy, DstLeftoverRegs);
6112 MI.eraseFromParent();
6122 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6129 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6130 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6131 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6133 MI.eraseFromParent();
6143 Register CondReg =
MI.getOperand(1).getReg();
6155 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6156 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6160 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6161 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6164 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6166 CondReg, Src1Regs[
I], Src2Regs[
I]);
6170 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6172 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6176 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6177 LeftoverTy, DstLeftoverRegs);
6179 MI.eraseFromParent();
6189 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6193 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6196 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6198 auto C_0 =
B.buildConstant(NarrowTy, 0);
6200 UnmergeSrc.getReg(1), C_0);
6201 auto LoCTLZ = IsUndef ?
6202 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6203 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6204 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6205 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6206 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6207 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6209 MI.eraseFromParent();
6222 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6226 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6229 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6231 auto C_0 =
B.buildConstant(NarrowTy, 0);
6233 UnmergeSrc.getReg(0), C_0);
6234 auto HiCTTZ = IsUndef ?
6235 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6236 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6237 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6238 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6239 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6240 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6242 MI.eraseFromParent();
6255 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6265 MI.eraseFromParent();
6285 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6286 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6287 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6288 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6290 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6292 MI.getOperand(2).setReg(Trunc.getReg(0));
6299 unsigned Opc =
MI.getOpcode();
6308 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6311 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
6315 case TargetOpcode::G_CTLZ: {
6316 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6319 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6327 MI.eraseFromParent();
6343 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6347 Op = MIBOp.getReg(0);
6352 MI.eraseFromParent();
6355 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6358 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
6362 case TargetOpcode::G_CTTZ: {
6363 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6366 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6375 MI.eraseFromParent();
6386 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6387 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6391 MI.eraseFromParent();
6395 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
6396 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6400 case TargetOpcode::G_CTPOP: {
6411 auto C_1 =
B.buildConstant(Ty, 1);
6412 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
6414 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
6415 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6416 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
6420 auto C_2 =
B.buildConstant(Ty, 2);
6421 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
6423 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
6424 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6425 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6426 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6433 auto C_4 =
B.buildConstant(Ty, 4);
6434 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
6435 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
6437 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
6438 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6440 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
6446 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
6448 auto IsMulSupported = [
this](
const LLT Ty) {
6449 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6452 if (IsMulSupported(Ty)) {
6453 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
6454 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6456 auto ResTmp = B8Count;
6457 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
6458 auto ShiftC =
B.buildConstant(Ty, Shift);
6459 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
6460 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
6462 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6464 MI.eraseFromParent();
6477 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
6485 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6494 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6495 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6518 MI.eraseFromParent();
6524 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
6529 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6573 MI.eraseFromParent();
6587 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
6588 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6592 return lowerFunnelShiftAsShifts(
MI);
6596 if (Result == UnableToLegalize)
6597 return lowerFunnelShiftAsShifts(
MI);
6602 auto [Dst, Src] =
MI.getFirst2Regs();
6616 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6630 {UnmergeSrc.getReg(0)});
6632 {UnmergeSrc.getReg(1)});
6637 MI.eraseFromParent();
6654 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
6658 LLT DstTy =
MRI.getType(DstReg);
6659 LLT SrcTy =
MRI.getType(SrcReg);
6679 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
6693 MI.eraseFromParent();
6702 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6704 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6705 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6708 MI.eraseFromParent();
6713 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
6715 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6716 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
6721 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6724 return lowerRotateWithReverseRotate(
MI);
6727 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6728 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6729 bool IsFShLegal =
false;
6730 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6731 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6735 MI.eraseFromParent();
6740 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6743 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6748 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6749 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6750 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
6756 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
6757 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6759 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6765 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
6766 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
6768 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6770 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6774 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
6775 MI.eraseFromParent();
6783 auto [Dst, Src] =
MI.getFirst2Regs();
6833 MI.eraseFromParent();
6838 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6844 MI.eraseFromParent();
6863 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6873 MI.eraseFromParent();
6898 MI.eraseFromParent();
6906 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
6910 if (SrcTy !=
S64 && SrcTy !=
S32)
6912 if (DstTy !=
S32 && DstTy !=
S64)
6941 MI.eraseFromParent();
6946 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7005 MI.eraseFromParent();
7015 auto [Dst, Src] =
MI.getFirst2Regs();
7023 unsigned Flags =
MI.getFlags();
7026 MI.eraseFromParent();
7030 const unsigned ExpMask = 0x7ff;
7031 const unsigned ExpBiasf64 = 1023;
7032 const unsigned ExpBiasf16 = 15;
7121 MI.eraseFromParent();
7127 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7140 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7145 MI.eraseFromParent();
7151 case TargetOpcode::G_SMIN:
7153 case TargetOpcode::G_SMAX:
7155 case TargetOpcode::G_UMIN:
7157 case TargetOpcode::G_UMAX:
7165 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7173 MI.eraseFromParent();
7179 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
7180 const int Src0Size = Src0Ty.getScalarSizeInBits();
7181 const int Src1Size = Src1Ty.getScalarSizeInBits();
7191 if (Src0Ty == Src1Ty) {
7193 }
else if (Src0Size > Src1Size) {
7208 unsigned Flags =
MI.getFlags();
7211 MI.eraseFromParent();
7217 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7218 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7220 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7240 MI.eraseFromParent();
7248 unsigned Flags =
MI.getFlags();
7253 MI.eraseFromParent();
7259 auto [DstReg,
X] =
MI.getFirst2Regs();
7260 const unsigned Flags =
MI.getFlags();
7287 MI.eraseFromParent();
7292 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7293 unsigned Flags =
MI.getFlags();
7305 SrcReg, Zero, Flags);
7307 SrcReg, Trunc, Flags);
7312 MI.eraseFromParent();
7318 const unsigned NumOps =
MI.getNumOperands();
7319 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
7320 unsigned PartSize = Src0Ty.getSizeInBits();
7325 for (
unsigned I = 2;
I != NumOps; ++
I) {
7326 const unsigned Offset = (
I - 1) * PartSize;
7331 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
7337 ResultReg = NextResult;
7340 if (DstTy.isPointer()) {
7342 DstTy.getAddressSpace())) {
7350 MI.eraseFromParent();
7356 const unsigned NumDst =
MI.getNumOperands() - 1;
7357 Register SrcReg =
MI.getOperand(NumDst).getReg();
7358 Register Dst0Reg =
MI.getOperand(0).getReg();
7373 unsigned Offset = DstSize;
7374 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
7380 MI.eraseFromParent();
7399 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7400 InsertVal =
MI.getOperand(2).getReg();
7414 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
7420 MI.eraseFromParent();
7425 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
7443 int64_t
Offset = IdxVal * EltBytes;
7462 MI.eraseFromParent();
7468 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7469 MI.getFirst3RegLLTs();
7477 for (
int Idx : Mask) {
7479 if (!Undef.isValid())
7485 if (Src0Ty.isScalar()) {
7488 int NumElts = Src0Ty.getNumElements();
7489 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
7490 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
7497 if (DstTy.isScalar())
7501 MI.eraseFromParent();
7518 if (Alignment >
Align(1)) {
7530 const auto &MF = *
MI.getMF();
7531 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7536 Register AllocSize =
MI.getOperand(1).getReg();
7547 MI.eraseFromParent();
7558 MI.eraseFromParent();
7569 MI.eraseFromParent();
7575 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7576 unsigned Offset =
MI.getOperand(2).getImm();
7581 unsigned DstSize = DstTy.getSizeInBits();
7583 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7590 for (
unsigned Idx =
Offset / SrcEltSize;
7594 if (SubVectorElts.
size() == 1)
7599 MI.eraseFromParent();
7604 if (DstTy.isScalar() &&
7607 LLT SrcIntTy = SrcTy;
7621 MI.eraseFromParent();
7629 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
7641 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7654 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
7656 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
7669 MI.eraseFromParent();
7683 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
7687 LLT IntDstTy = DstTy;
7713 MI.eraseFromParent();
7719 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
7720 MI.getFirst4RegLLTs();
7721 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
7724 LLT BoolTy = Dst1Ty;
7743 auto ResultLowerThanLHS =
7751 MI.eraseFromParent();
7758 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
7763 switch (
MI.getOpcode()) {
7766 case TargetOpcode::G_UADDSAT:
7769 BaseOp = TargetOpcode::G_ADD;
7771 case TargetOpcode::G_SADDSAT:
7774 BaseOp = TargetOpcode::G_ADD;
7776 case TargetOpcode::G_USUBSAT:
7779 BaseOp = TargetOpcode::G_SUB;
7781 case TargetOpcode::G_SSUBSAT:
7784 BaseOp = TargetOpcode::G_SUB;
7827 MI.eraseFromParent();
7833 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
7838 unsigned OverflowOp;
7839 switch (
MI.getOpcode()) {
7842 case TargetOpcode::G_UADDSAT:
7845 OverflowOp = TargetOpcode::G_UADDO;
7847 case TargetOpcode::G_SADDSAT:
7850 OverflowOp = TargetOpcode::G_SADDO;
7852 case TargetOpcode::G_USUBSAT:
7855 OverflowOp = TargetOpcode::G_USUBO;
7857 case TargetOpcode::G_SSUBSAT:
7860 OverflowOp = TargetOpcode::G_SSUBO;
7866 Register Tmp = OverflowRes.getReg(0);
7867 Register Ov = OverflowRes.getReg(1);
7893 MI.eraseFromParent();
7899 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7900 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7901 "Expected shlsat opcode!");
7902 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7903 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
7925 MI.eraseFromParent();
7930 auto [Dst, Src] =
MI.getFirst2Regs();
7933 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7942 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
7944 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7956 Res.getInstr()->getOperand(0).setReg(Dst);
7958 MI.eraseFromParent();
7965 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
7968 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7969 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7970 return B.buildOr(Dst,
LHS,
RHS);
7975 auto [Dst, Src] =
MI.getFirst2Regs();
7999 MI.eraseFromParent();
8007 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8008 int NameOpIdx = IsRead ? 1 : 0;
8009 int ValRegIndex = IsRead ? 0 : 1;
8011 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
8013 const MDString *RegStr = cast<MDString>(
8014 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8025 MI.eraseFromParent();
8031 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
8032 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8041 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8047 MI.eraseFromParent();
8053 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8058 MI.eraseFromParent();
8063 MI.eraseFromParent();
8082 APInt ExpMask = Inf;
8100 LLT DstTyCopy = DstTy;
8115 Mask &= ~fcPosFinite;
8122 Mask &= ~fcNegFinite;
8133 Mask &= ~PartialCheck;
8142 else if (PartialCheck ==
fcZero)
8161 appendToRes(SubnormalRes);
8168 else if (PartialCheck ==
fcInf)
8181 if (PartialCheck ==
fcNan) {
8185 }
else if (PartialCheck ==
fcQNan) {
8195 Abs, InfWithQnanBitC);
8203 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
8206 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8217 appendToRes(NormalRes);
8221 MI.eraseFromParent();
8227 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8228 MI.getFirst4RegLLTs();
8230 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8239 if (MaskTy.isScalar()) {
8253 if (DstTy.isVector()) {
8256 MaskReg = ShufSplat.
getReg(0);
8261 }
else if (!DstTy.isVector()) {
8266 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8279 MI.eraseFromParent();
8285 unsigned Opcode =
MI.getOpcode();
8288 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8289 : TargetOpcode::G_UDIV,
8290 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8292 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8293 : TargetOpcode::G_UREM,
8294 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
8295 MI.eraseFromParent();
8312 MI.eraseFromParent();
8327 MI.eraseFromParent();
8334 Register DestReg =
MI.getOperand(0).getReg();
8340 MI.eraseFromParent();
8369 Register ListPtr =
MI.getOperand(1).getReg();
8379 const Align A(
MI.getOperand(2).getImm());
8386 VAList = AndDst.
getReg(0);
8404 Align EltAlignment =
DL.getABITypeAlign(Ty);
8409 MI.eraseFromParent();
8424 unsigned Limit,
const MemOp &
Op,
8425 unsigned DstAS,
unsigned SrcAS,
8428 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
8438 if (
Op.isFixedDstAlign())
8446 unsigned NumMemOps = 0;
8450 while (TySize >
Size) {
8459 assert(NewTySize > 0 &&
"Could not find appropriate type");
8466 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
8468 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
8478 if (++NumMemOps > Limit)
8481 MemOps.push_back(Ty);
8500 if (!Ty.
isVector() && ValVRegAndVal) {
8501 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8509 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8532 auto &MF = *
MI.getParent()->getParent();
8533 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8534 auto &
DL = MF.getDataLayout();
8537 assert(KnownLen != 0 &&
"Have a zero length memset length!");
8539 bool DstAlignCanChange =
false;
8545 DstAlignCanChange =
true;
8548 std::vector<LLT> MemOps;
8550 const auto &DstMMO = **
MI.memoperands_begin();
8554 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8562 MF.getFunction().getAttributes(), TLI))
8565 if (DstAlignCanChange) {
8568 Align NewAlign =
DL.getABITypeAlign(IRTy);
8569 if (NewAlign > Alignment) {
8570 Alignment = NewAlign;
8580 LLT LargestTy = MemOps[0];
8581 for (
unsigned i = 1; i < MemOps.size(); i++)
8583 LargestTy = MemOps[i];
8596 unsigned DstOff = 0;
8597 unsigned Size = KnownLen;
8598 for (
unsigned I = 0;
I < MemOps.size();
I++) {
8601 if (TySize >
Size) {
8604 assert(
I == MemOps.size() - 1 &&
I != 0);
8605 DstOff -= TySize -
Size;
8616 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8623 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8629 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
8632 MIB.buildStore(
Value,
Ptr, *StoreMMO);
8637 MI.eraseFromParent();
8643 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8645 auto [Dst, Src, Len] =
MI.getFirst3Regs();
8647 const auto *MMOIt =
MI.memoperands_begin();
8649 bool IsVolatile =
MemOp->isVolatile();
8655 "inline memcpy with dynamic size is not yet supported");
8656 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8657 if (KnownLen == 0) {
8658 MI.eraseFromParent();
8662 const auto &DstMMO = **
MI.memoperands_begin();
8663 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8664 Align DstAlign = DstMMO.getBaseAlign();
8665 Align SrcAlign = SrcMMO.getBaseAlign();
8667 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8674 Align SrcAlign,
bool IsVolatile) {
8675 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8676 return lowerMemcpy(
MI, Dst, Src, KnownLen,
8677 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8684 Align SrcAlign,
bool IsVolatile) {
8685 auto &MF = *
MI.getParent()->getParent();
8686 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8687 auto &
DL = MF.getDataLayout();
8690 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
8692 bool DstAlignCanChange =
false;
8694 Align Alignment = std::min(DstAlign, SrcAlign);
8698 DstAlignCanChange =
true;
8704 std::vector<LLT> MemOps;
8706 const auto &DstMMO = **
MI.memoperands_begin();
8707 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8713 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8716 MF.getFunction().getAttributes(), TLI))
8719 if (DstAlignCanChange) {
8722 Align NewAlign =
DL.getABITypeAlign(IRTy);
8727 if (!
TRI->hasStackRealignment(MF))
8728 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
8731 if (NewAlign > Alignment) {
8732 Alignment = NewAlign;
8740 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
8748 unsigned CurrOffset = 0;
8749 unsigned Size = KnownLen;
8750 for (
auto CopyTy : MemOps) {
8753 if (CopyTy.getSizeInBytes() >
Size)
8754 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
8758 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8760 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8765 if (CurrOffset != 0) {
8769 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
8771 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8775 if (CurrOffset != 0) {
8777 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
8779 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8780 CurrOffset += CopyTy.getSizeInBytes();
8781 Size -= CopyTy.getSizeInBytes();
8784 MI.eraseFromParent();
8792 auto &MF = *
MI.getParent()->getParent();
8793 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8794 auto &
DL = MF.getDataLayout();
8797 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
8799 bool DstAlignCanChange =
false;
8802 Align Alignment = std::min(DstAlign, SrcAlign);
8806 DstAlignCanChange =
true;
8809 std::vector<LLT> MemOps;
8811 const auto &DstMMO = **
MI.memoperands_begin();
8812 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
8821 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8824 MF.getFunction().getAttributes(), TLI))
8827 if (DstAlignCanChange) {
8830 Align NewAlign =
DL.getABITypeAlign(IRTy);
8835 if (!
TRI->hasStackRealignment(MF))
8836 while (NewAlign > Alignment &&
DL.exceedsNaturalStackAlignment(NewAlign))
8839 if (NewAlign > Alignment) {
8840 Alignment = NewAlign;
8848 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
8854 unsigned CurrOffset = 0;
8856 for (
auto CopyTy : MemOps) {
8859 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8863 if (CurrOffset != 0) {
8867 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
8869 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8870 CurrOffset += CopyTy.getSizeInBytes();
8874 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
8875 LLT CopyTy = MemOps[
I];
8878 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
8881 if (CurrOffset != 0) {
8885 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
8887 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
8890 MI.eraseFromParent();
8896 const unsigned Opc =
MI.getOpcode();
8899 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8900 Opc == TargetOpcode::G_MEMSET) &&
8901 "Expected memcpy like instruction");
8903 auto MMOIt =
MI.memoperands_begin();
8908 auto [Dst, Src, Len] =
MI.getFirst3Regs();
8910 if (Opc != TargetOpcode::G_MEMSET) {
8911 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
8913 SrcAlign =
MemOp->getBaseAlign();
8920 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8922 if (KnownLen == 0) {
8923 MI.eraseFromParent();
8927 bool IsVolatile =
MemOp->isVolatile();
8928 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8929 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8936 if (MaxLen && KnownLen > MaxLen)
8939 if (Opc == TargetOpcode::G_MEMCPY) {
8940 auto &MF = *
MI.getParent()->getParent();
8941 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8944 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8947 if (Opc == TargetOpcode::G_MEMMOVE)
8948 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8949 if (Opc == TargetOpcode::G_MEMSET)
8950 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ ICMP_ULT
unsigned less than
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFreeze(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_FREEZE Src.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)