LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
460 case TargetOpcode::G_FSINH:
461 RTLIBCASE(SINH_F);
462 case TargetOpcode::G_FCOSH:
463 RTLIBCASE(COSH_F);
464 case TargetOpcode::G_FTANH:
465 RTLIBCASE(TANH_F);
466 case TargetOpcode::G_FLOG10:
467 RTLIBCASE(LOG10_F);
468 case TargetOpcode::G_FLOG:
469 RTLIBCASE(LOG_F);
470 case TargetOpcode::G_FLOG2:
471 RTLIBCASE(LOG2_F);
472 case TargetOpcode::G_FLDEXP:
473 RTLIBCASE(LDEXP_F);
474 case TargetOpcode::G_FCEIL:
475 RTLIBCASE(CEIL_F);
476 case TargetOpcode::G_FFLOOR:
477 RTLIBCASE(FLOOR_F);
478 case TargetOpcode::G_FMINNUM:
479 RTLIBCASE(FMIN_F);
480 case TargetOpcode::G_FMAXNUM:
481 RTLIBCASE(FMAX_F);
482 case TargetOpcode::G_FSQRT:
483 RTLIBCASE(SQRT_F);
484 case TargetOpcode::G_FRINT:
485 RTLIBCASE(RINT_F);
486 case TargetOpcode::G_FNEARBYINT:
487 RTLIBCASE(NEARBYINT_F);
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
489 RTLIBCASE(ROUNDEVEN_F);
490 case TargetOpcode::G_INTRINSIC_LRINT:
491 RTLIBCASE(LRINT_F);
492 case TargetOpcode::G_INTRINSIC_LLRINT:
493 RTLIBCASE(LLRINT_F);
494 }
495 llvm_unreachable("Unknown libcall function");
496}
497
498/// True if an instruction is in tail position in its caller. Intended for
499/// legalizing libcalls as tail calls when possible.
502 const TargetInstrInfo &TII,
504 MachineBasicBlock &MBB = *MI.getParent();
505 const Function &F = MBB.getParent()->getFunction();
506
507 // Conservatively require the attributes of the call to match those of
508 // the return. Ignore NoAlias and NonNull because they don't affect the
509 // call sequence.
510 AttributeList CallerAttrs = F.getAttributes();
511 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
512 .removeAttribute(Attribute::NoAlias)
513 .removeAttribute(Attribute::NonNull)
514 .hasAttributes())
515 return false;
516
517 // It's not safe to eliminate the sign / zero extension of the return value.
518 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
519 CallerAttrs.hasRetAttr(Attribute::SExt))
520 return false;
521
522 // Only tail call if the following instruction is a standard return or if we
523 // have a `thisreturn` callee, and a sequence like:
524 //
525 // G_MEMCPY %0, %1, %2
526 // $x0 = COPY %0
527 // RET_ReallyLR implicit $x0
528 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
529 if (Next != MBB.instr_end() && Next->isCopy()) {
530 if (MI.getOpcode() == TargetOpcode::G_BZERO)
531 return false;
532
533 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
534 // mempy/etc routines return the same parameter. For other it will be the
535 // returned value.
536 Register VReg = MI.getOperand(0).getReg();
537 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
538 return false;
539
540 Register PReg = Next->getOperand(0).getReg();
541 if (!PReg.isPhysical())
542 return false;
543
544 auto Ret = next_nodbg(Next, MBB.instr_end());
545 if (Ret == MBB.instr_end() || !Ret->isReturn())
546 return false;
547
548 if (Ret->getNumImplicitOperands() != 1)
549 return false;
550
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
552 return false;
553
554 // Skip over the COPY that we just validated.
555 Next = Ret;
556 }
557
558 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
559 return false;
560
561 return true;
562}
563
566 const CallLowering::ArgInfo &Result,
568 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
569 MachineInstr *MI) {
570 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
571
573 Info.CallConv = CC;
575 Info.OrigRet = Result;
576 if (MI)
577 Info.IsTailCall =
578 (Result.Ty->isVoidTy() ||
579 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
580 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
581 *MIRBuilder.getMRI());
582
583 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder, Info))
586
587 if (MI && Info.LoweredTailCall) {
588 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
589
590 // Check debug locations before removing the return.
591 LocObserver.checkpoint(true);
592
593 // We must have a return following the call (or debug insts) to get past
594 // isLibCallInTailPosition.
595 do {
596 MachineInstr *Next = MI->getNextNode();
597 assert(Next &&
598 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
599 "Expected instr following MI to be return or debug inst?");
600 // We lowered a tail call, so the call is now the return from the block.
601 // Delete the old return.
602 Next->eraseFromParent();
603 } while (MI->getNextNode());
604
605 // We expect to lose the debug location from the return.
606 LocObserver.checkpoint(false);
607 }
609}
610
613 const CallLowering::ArgInfo &Result,
615 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
616 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
617 const char *Name = TLI.getLibcallName(Libcall);
618 if (!Name)
620 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
621 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
622}
623
624// Useful for libcalls where all operands have the same type.
627 Type *OpType, LostDebugLocObserver &LocObserver) {
628 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
629
630 // FIXME: What does the original arg index mean here?
632 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
633 Args.push_back({MO.getReg(), OpType, 0});
634 return createLibcall(MIRBuilder, Libcall,
635 {MI.getOperand(0).getReg(), OpType, 0}, Args,
636 LocObserver, &MI);
637}
638
641 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
642 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
643
645 // Add all the args, except for the last which is an imm denoting 'tail'.
646 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
647 Register Reg = MI.getOperand(i).getReg();
648
649 // Need derive an IR type for call lowering.
650 LLT OpLLT = MRI.getType(Reg);
651 Type *OpTy = nullptr;
652 if (OpLLT.isPointer())
653 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
654 else
655 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
656 Args.push_back({Reg, OpTy, 0});
657 }
658
659 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
660 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
661 RTLIB::Libcall RTLibcall;
662 unsigned Opc = MI.getOpcode();
663 switch (Opc) {
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
666 break;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
670 break;
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
674 break;
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
678 break;
679 default:
680 llvm_unreachable("unsupported opcode");
681 }
682 const char *Name = TLI.getLibcallName(RTLibcall);
683
684 // Unsupported libcall on the target.
685 if (!Name) {
686 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
687 << MIRBuilder.getTII().getName(Opc) << "\n");
689 }
690
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
694 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
695 Info.IsTailCall =
696 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
697 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
698
699 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder, Info))
702
703 if (Info.LoweredTailCall) {
704 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
705
706 // Check debug locations before removing the return.
707 LocObserver.checkpoint(true);
708
709 // We must have a return following the call (or debug insts) to get past
710 // isLibCallInTailPosition.
711 do {
712 MachineInstr *Next = MI.getNextNode();
713 assert(Next &&
714 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
715 "Expected instr following MI to be return or debug inst?");
716 // We lowered a tail call, so the call is now the return from the block.
717 // Delete the old return.
718 Next->eraseFromParent();
719 } while (MI.getNextNode());
720
721 // We expect to lose the debug location from the return.
722 LocObserver.checkpoint(false);
723 }
724
726}
727
729 unsigned Opc = MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
734 uint64_t MemSize = MemType.getSizeInBytes();
735 if (MemType.isVector())
736 return RTLIB::UNKNOWN_LIBCALL;
737
738#define LCALLS(A, B) \
739 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
740#define LCALL5(A) \
741 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
742 switch (Opc) {
743 case TargetOpcode::G_ATOMIC_CMPXCHG:
744 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
745 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
746 return getOutlineAtomicHelper(LC, Ordering, MemSize);
747 }
748 case TargetOpcode::G_ATOMICRMW_XCHG: {
749 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
750 return getOutlineAtomicHelper(LC, Ordering, MemSize);
751 }
752 case TargetOpcode::G_ATOMICRMW_ADD:
753 case TargetOpcode::G_ATOMICRMW_SUB: {
754 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
756 }
757 case TargetOpcode::G_ATOMICRMW_AND: {
758 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
759 return getOutlineAtomicHelper(LC, Ordering, MemSize);
760 }
761 case TargetOpcode::G_ATOMICRMW_OR: {
762 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
763 return getOutlineAtomicHelper(LC, Ordering, MemSize);
764 }
765 case TargetOpcode::G_ATOMICRMW_XOR: {
766 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
767 return getOutlineAtomicHelper(LC, Ordering, MemSize);
768 }
769 default:
770 return RTLIB::UNKNOWN_LIBCALL;
771 }
772#undef LCALLS
773#undef LCALL5
774}
775
778 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
779
780 Type *RetTy;
781 SmallVector<Register> RetRegs;
783 unsigned Opc = MI.getOpcode();
784 switch (Opc) {
785 case TargetOpcode::G_ATOMIC_CMPXCHG:
786 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
788 LLT SuccessLLT;
789 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
790 MI.getFirst4RegLLTs();
791 RetRegs.push_back(Ret);
792 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
793 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
794 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
795 NewLLT) = MI.getFirst5RegLLTs();
796 RetRegs.push_back(Success);
798 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
799 }
800 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
801 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
802 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
803 break;
804 }
805 case TargetOpcode::G_ATOMICRMW_XCHG:
806 case TargetOpcode::G_ATOMICRMW_ADD:
807 case TargetOpcode::G_ATOMICRMW_SUB:
808 case TargetOpcode::G_ATOMICRMW_AND:
809 case TargetOpcode::G_ATOMICRMW_OR:
810 case TargetOpcode::G_ATOMICRMW_XOR: {
811 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
812 RetRegs.push_back(Ret);
813 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
814 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
815 Val =
816 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
817 .getReg(0);
818 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
819 Val =
820 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
821 .getReg(0);
822 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
823 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
824 break;
825 }
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
831 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
833 const char *Name = TLI.getLibcallName(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (!Name) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
845 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
846
847 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
848 if (!CLI.lowerCall(MIRBuilder, Info))
850
852}
853
854static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
855 Type *FromType) {
856 auto ToMVT = MVT::getVT(ToType);
857 auto FromMVT = MVT::getVT(FromType);
858
859 switch (Opcode) {
860 case TargetOpcode::G_FPEXT:
861 return RTLIB::getFPEXT(FromMVT, ToMVT);
862 case TargetOpcode::G_FPTRUNC:
863 return RTLIB::getFPROUND(FromMVT, ToMVT);
864 case TargetOpcode::G_FPTOSI:
865 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
866 case TargetOpcode::G_FPTOUI:
867 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
868 case TargetOpcode::G_SITOFP:
869 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
870 case TargetOpcode::G_UITOFP:
871 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
872 }
873 llvm_unreachable("Unsupported libcall function");
874}
875
878 Type *FromType, LostDebugLocObserver &LocObserver) {
879 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
880 return createLibcall(
881 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
882 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
883}
884
885static RTLIB::Libcall
887 RTLIB::Libcall RTLibcall;
888 switch (MI.getOpcode()) {
889 case TargetOpcode::G_GET_FPENV:
890 RTLibcall = RTLIB::FEGETENV;
891 break;
892 case TargetOpcode::G_SET_FPENV:
893 case TargetOpcode::G_RESET_FPENV:
894 RTLibcall = RTLIB::FESETENV;
895 break;
896 case TargetOpcode::G_GET_FPMODE:
897 RTLibcall = RTLIB::FEGETMODE;
898 break;
899 case TargetOpcode::G_SET_FPMODE:
900 case TargetOpcode::G_RESET_FPMODE:
901 RTLibcall = RTLIB::FESETMODE;
902 break;
903 default:
904 llvm_unreachable("Unexpected opcode");
905 }
906 return RTLibcall;
907}
908
909// Some library functions that read FP state (fegetmode, fegetenv) write the
910// state into a region in memory. IR intrinsics that do the same operations
911// (get_fpmode, get_fpenv) return the state as integer value. To implement these
912// intrinsics via the library functions, we need to use temporary variable,
913// for example:
914//
915// %0:_(s32) = G_GET_FPMODE
916//
917// is transformed to:
918//
919// %1:_(p0) = G_FRAME_INDEX %stack.0
920// BL &fegetmode
921// %0:_(s32) = G_LOAD % 1
922//
924LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
926 LostDebugLocObserver &LocObserver) {
928 auto &MF = MIRBuilder.getMF();
929 auto &MRI = *MIRBuilder.getMRI();
930 auto &Ctx = MF.getFunction().getContext();
931
932 // Create temporary, where library function will put the read state.
933 Register Dst = MI.getOperand(0).getReg();
934 LLT StateTy = MRI.getType(Dst);
935 TypeSize StateSize = StateTy.getSizeInBytes();
937 MachinePointerInfo TempPtrInfo;
938 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
939
940 // Create a call to library function, with the temporary as an argument.
941 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
942 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
944 auto Res =
945 createLibcall(MIRBuilder, RTLibcall,
947 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
948 LocObserver, nullptr);
950 return Res;
951
952 // Create a load from the temporary.
953 MachineMemOperand *MMO = MF.getMachineMemOperand(
954 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
955 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
956
958}
959
960// Similar to `createGetStateLibcall` the function calls a library function
961// using transient space in stack. In this case the library function reads
962// content of memory region.
964LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
966 LostDebugLocObserver &LocObserver) {
968 auto &MF = MIRBuilder.getMF();
969 auto &MRI = *MIRBuilder.getMRI();
970 auto &Ctx = MF.getFunction().getContext();
971
972 // Create temporary, where library function will get the new state.
973 Register Src = MI.getOperand(0).getReg();
974 LLT StateTy = MRI.getType(Src);
975 TypeSize StateSize = StateTy.getSizeInBytes();
977 MachinePointerInfo TempPtrInfo;
978 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
979
980 // Put the new state into the temporary.
981 MachineMemOperand *MMO = MF.getMachineMemOperand(
982 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
983 MIRBuilder.buildStore(Src, Temp, *MMO);
984
985 // Create a call to library function, with the temporary as an argument.
986 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
987 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
989 return createLibcall(MIRBuilder, RTLibcall,
991 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
992 LocObserver, nullptr);
993}
994
995// The function is used to legalize operations that set default environment
996// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
997// On most targets supported in glibc FE_DFL_MODE is defined as
998// `((const femode_t *) -1)`. Such assumption is used here. If for some target
999// it is not true, the target must provide custom lowering.
1001LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1003 LostDebugLocObserver &LocObserver) {
1005 auto &MF = MIRBuilder.getMF();
1006 auto &Ctx = MF.getFunction().getContext();
1007
1008 // Create an argument for the library function.
1009 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1010 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1011 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1012 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1013 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1014 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1015 MIRBuilder.buildIntToPtr(Dest, DefValue);
1016
1018 return createLibcall(MIRBuilder, RTLibcall,
1020 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1021 LocObserver, &MI);
1022}
1023
1026 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1027
1028 switch (MI.getOpcode()) {
1029 default:
1030 return UnableToLegalize;
1031 case TargetOpcode::G_MUL:
1032 case TargetOpcode::G_SDIV:
1033 case TargetOpcode::G_UDIV:
1034 case TargetOpcode::G_SREM:
1035 case TargetOpcode::G_UREM:
1036 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1037 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1038 unsigned Size = LLTy.getSizeInBits();
1039 Type *HLTy = IntegerType::get(Ctx, Size);
1040 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1041 if (Status != Legalized)
1042 return Status;
1043 break;
1044 }
1045 case TargetOpcode::G_FADD:
1046 case TargetOpcode::G_FSUB:
1047 case TargetOpcode::G_FMUL:
1048 case TargetOpcode::G_FDIV:
1049 case TargetOpcode::G_FMA:
1050 case TargetOpcode::G_FPOW:
1051 case TargetOpcode::G_FREM:
1052 case TargetOpcode::G_FCOS:
1053 case TargetOpcode::G_FSIN:
1054 case TargetOpcode::G_FTAN:
1055 case TargetOpcode::G_FACOS:
1056 case TargetOpcode::G_FASIN:
1057 case TargetOpcode::G_FATAN:
1058 case TargetOpcode::G_FCOSH:
1059 case TargetOpcode::G_FSINH:
1060 case TargetOpcode::G_FTANH:
1061 case TargetOpcode::G_FLOG10:
1062 case TargetOpcode::G_FLOG:
1063 case TargetOpcode::G_FLOG2:
1064 case TargetOpcode::G_FLDEXP:
1065 case TargetOpcode::G_FEXP:
1066 case TargetOpcode::G_FEXP2:
1067 case TargetOpcode::G_FEXP10:
1068 case TargetOpcode::G_FCEIL:
1069 case TargetOpcode::G_FFLOOR:
1070 case TargetOpcode::G_FMINNUM:
1071 case TargetOpcode::G_FMAXNUM:
1072 case TargetOpcode::G_FSQRT:
1073 case TargetOpcode::G_FRINT:
1074 case TargetOpcode::G_FNEARBYINT:
1075 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1076 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1077 unsigned Size = LLTy.getSizeInBits();
1078 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1079 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1080 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1081 return UnableToLegalize;
1082 }
1083 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1084 if (Status != Legalized)
1085 return Status;
1086 break;
1087 }
1088 case TargetOpcode::G_INTRINSIC_LRINT:
1089 case TargetOpcode::G_INTRINSIC_LLRINT: {
1090 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1091 unsigned Size = LLTy.getSizeInBits();
1092 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1093 Type *ITy = IntegerType::get(
1094 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1095 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1096 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1097 return UnableToLegalize;
1098 }
1099 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1101 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1102 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1103 if (Status != Legalized)
1104 return Status;
1105 MI.eraseFromParent();
1106 return Legalized;
1107 }
1108 case TargetOpcode::G_FPOWI: {
1109 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1110 unsigned Size = LLTy.getSizeInBits();
1111 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1112 Type *ITy = IntegerType::get(
1113 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1114 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1115 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1116 return UnableToLegalize;
1117 }
1118 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1119 std::initializer_list<CallLowering::ArgInfo> Args = {
1120 {MI.getOperand(1).getReg(), HLTy, 0},
1121 {MI.getOperand(2).getReg(), ITy, 1}};
1123 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1124 Args, LocObserver, &MI);
1125 if (Status != Legalized)
1126 return Status;
1127 break;
1128 }
1129 case TargetOpcode::G_FPEXT:
1130 case TargetOpcode::G_FPTRUNC: {
1131 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1132 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1133 if (!FromTy || !ToTy)
1134 return UnableToLegalize;
1136 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1137 if (Status != Legalized)
1138 return Status;
1139 break;
1140 }
1141 case TargetOpcode::G_FPTOSI:
1142 case TargetOpcode::G_FPTOUI: {
1143 // FIXME: Support other types
1144 Type *FromTy =
1145 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1146 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1147 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1148 return UnableToLegalize;
1150 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
1151 if (Status != Legalized)
1152 return Status;
1153 break;
1154 }
1155 case TargetOpcode::G_SITOFP:
1156 case TargetOpcode::G_UITOFP: {
1157 // FIXME: Support other types
1158 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1159 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1160 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1161 return UnableToLegalize;
1163 MI, MIRBuilder,
1164 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1165 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1166 LocObserver);
1167 if (Status != Legalized)
1168 return Status;
1169 break;
1170 }
1171 case TargetOpcode::G_ATOMICRMW_XCHG:
1172 case TargetOpcode::G_ATOMICRMW_ADD:
1173 case TargetOpcode::G_ATOMICRMW_SUB:
1174 case TargetOpcode::G_ATOMICRMW_AND:
1175 case TargetOpcode::G_ATOMICRMW_OR:
1176 case TargetOpcode::G_ATOMICRMW_XOR:
1177 case TargetOpcode::G_ATOMIC_CMPXCHG:
1178 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1180 if (Status != Legalized)
1181 return Status;
1182 break;
1183 }
1184 case TargetOpcode::G_BZERO:
1185 case TargetOpcode::G_MEMCPY:
1186 case TargetOpcode::G_MEMMOVE:
1187 case TargetOpcode::G_MEMSET: {
1188 LegalizeResult Result =
1189 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1190 if (Result != Legalized)
1191 return Result;
1192 MI.eraseFromParent();
1193 return Result;
1194 }
1195 case TargetOpcode::G_GET_FPENV:
1196 case TargetOpcode::G_GET_FPMODE: {
1197 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1198 if (Result != Legalized)
1199 return Result;
1200 break;
1201 }
1202 case TargetOpcode::G_SET_FPENV:
1203 case TargetOpcode::G_SET_FPMODE: {
1204 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1205 if (Result != Legalized)
1206 return Result;
1207 break;
1208 }
1209 case TargetOpcode::G_RESET_FPENV:
1210 case TargetOpcode::G_RESET_FPMODE: {
1211 LegalizeResult Result =
1212 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1213 if (Result != Legalized)
1214 return Result;
1215 break;
1216 }
1217 }
1218
1219 MI.eraseFromParent();
1220 return Legalized;
1221}
1222
1224 unsigned TypeIdx,
1225 LLT NarrowTy) {
1226 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1227 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1228
1229 switch (MI.getOpcode()) {
1230 default:
1231 return UnableToLegalize;
1232 case TargetOpcode::G_IMPLICIT_DEF: {
1233 Register DstReg = MI.getOperand(0).getReg();
1234 LLT DstTy = MRI.getType(DstReg);
1235
1236 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1237 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1238 // FIXME: Although this would also be legal for the general case, it causes
1239 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1240 // combines not being hit). This seems to be a problem related to the
1241 // artifact combiner.
1242 if (SizeOp0 % NarrowSize != 0) {
1243 LLT ImplicitTy = NarrowTy;
1244 if (DstTy.isVector())
1245 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1246
1247 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1248 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1249
1250 MI.eraseFromParent();
1251 return Legalized;
1252 }
1253
1254 int NumParts = SizeOp0 / NarrowSize;
1255
1257 for (int i = 0; i < NumParts; ++i)
1258 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1259
1260 if (DstTy.isVector())
1261 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1262 else
1263 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1264 MI.eraseFromParent();
1265 return Legalized;
1266 }
1267 case TargetOpcode::G_CONSTANT: {
1268 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1269 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1270 unsigned TotalSize = Ty.getSizeInBits();
1271 unsigned NarrowSize = NarrowTy.getSizeInBits();
1272 int NumParts = TotalSize / NarrowSize;
1273
1274 SmallVector<Register, 4> PartRegs;
1275 for (int I = 0; I != NumParts; ++I) {
1276 unsigned Offset = I * NarrowSize;
1277 auto K = MIRBuilder.buildConstant(NarrowTy,
1278 Val.lshr(Offset).trunc(NarrowSize));
1279 PartRegs.push_back(K.getReg(0));
1280 }
1281
1282 LLT LeftoverTy;
1283 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1284 SmallVector<Register, 1> LeftoverRegs;
1285 if (LeftoverBits != 0) {
1286 LeftoverTy = LLT::scalar(LeftoverBits);
1287 auto K = MIRBuilder.buildConstant(
1288 LeftoverTy,
1289 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1290 LeftoverRegs.push_back(K.getReg(0));
1291 }
1292
1293 insertParts(MI.getOperand(0).getReg(),
1294 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1295
1296 MI.eraseFromParent();
1297 return Legalized;
1298 }
1299 case TargetOpcode::G_SEXT:
1300 case TargetOpcode::G_ZEXT:
1301 case TargetOpcode::G_ANYEXT:
1302 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1303 case TargetOpcode::G_TRUNC: {
1304 if (TypeIdx != 1)
1305 return UnableToLegalize;
1306
1307 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1308 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1309 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1310 return UnableToLegalize;
1311 }
1312
1313 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1314 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1315 MI.eraseFromParent();
1316 return Legalized;
1317 }
1318 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1319 case TargetOpcode::G_FREEZE: {
1320 if (TypeIdx != 0)
1321 return UnableToLegalize;
1322
1323 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1324 // Should widen scalar first
1325 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1326 return UnableToLegalize;
1327
1328 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1330 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1331 Parts.push_back(
1332 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1333 .getReg(0));
1334 }
1335
1336 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1337 MI.eraseFromParent();
1338 return Legalized;
1339 }
1340 case TargetOpcode::G_ADD:
1341 case TargetOpcode::G_SUB:
1342 case TargetOpcode::G_SADDO:
1343 case TargetOpcode::G_SSUBO:
1344 case TargetOpcode::G_SADDE:
1345 case TargetOpcode::G_SSUBE:
1346 case TargetOpcode::G_UADDO:
1347 case TargetOpcode::G_USUBO:
1348 case TargetOpcode::G_UADDE:
1349 case TargetOpcode::G_USUBE:
1350 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1351 case TargetOpcode::G_MUL:
1352 case TargetOpcode::G_UMULH:
1353 return narrowScalarMul(MI, NarrowTy);
1354 case TargetOpcode::G_EXTRACT:
1355 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1356 case TargetOpcode::G_INSERT:
1357 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1358 case TargetOpcode::G_LOAD: {
1359 auto &LoadMI = cast<GLoad>(MI);
1360 Register DstReg = LoadMI.getDstReg();
1361 LLT DstTy = MRI.getType(DstReg);
1362 if (DstTy.isVector())
1363 return UnableToLegalize;
1364
1365 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1366 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1367 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1368 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1369 LoadMI.eraseFromParent();
1370 return Legalized;
1371 }
1372
1373 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1374 }
1375 case TargetOpcode::G_ZEXTLOAD:
1376 case TargetOpcode::G_SEXTLOAD: {
1377 auto &LoadMI = cast<GExtLoad>(MI);
1378 Register DstReg = LoadMI.getDstReg();
1379 Register PtrReg = LoadMI.getPointerReg();
1380
1381 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1382 auto &MMO = LoadMI.getMMO();
1383 unsigned MemSize = MMO.getSizeInBits().getValue();
1384
1385 if (MemSize == NarrowSize) {
1386 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1387 } else if (MemSize < NarrowSize) {
1388 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1389 } else if (MemSize > NarrowSize) {
1390 // FIXME: Need to split the load.
1391 return UnableToLegalize;
1392 }
1393
1394 if (isa<GZExtLoad>(LoadMI))
1395 MIRBuilder.buildZExt(DstReg, TmpReg);
1396 else
1397 MIRBuilder.buildSExt(DstReg, TmpReg);
1398
1399 LoadMI.eraseFromParent();
1400 return Legalized;
1401 }
1402 case TargetOpcode::G_STORE: {
1403 auto &StoreMI = cast<GStore>(MI);
1404
1405 Register SrcReg = StoreMI.getValueReg();
1406 LLT SrcTy = MRI.getType(SrcReg);
1407 if (SrcTy.isVector())
1408 return UnableToLegalize;
1409
1410 int NumParts = SizeOp0 / NarrowSize;
1411 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1412 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1413 if (SrcTy.isVector() && LeftoverBits != 0)
1414 return UnableToLegalize;
1415
1416 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1417 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1418 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1419 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1420 StoreMI.eraseFromParent();
1421 return Legalized;
1422 }
1423
1424 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1425 }
1426 case TargetOpcode::G_SELECT:
1427 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_OR:
1430 case TargetOpcode::G_XOR: {
1431 // Legalize bitwise operation:
1432 // A = BinOp<Ty> B, C
1433 // into:
1434 // B1, ..., BN = G_UNMERGE_VALUES B
1435 // C1, ..., CN = G_UNMERGE_VALUES C
1436 // A1 = BinOp<Ty/N> B1, C2
1437 // ...
1438 // AN = BinOp<Ty/N> BN, CN
1439 // A = G_MERGE_VALUES A1, ..., AN
1440 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1441 }
1442 case TargetOpcode::G_SHL:
1443 case TargetOpcode::G_LSHR:
1444 case TargetOpcode::G_ASHR:
1445 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1446 case TargetOpcode::G_CTLZ:
1447 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1448 case TargetOpcode::G_CTTZ:
1449 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1450 case TargetOpcode::G_CTPOP:
1451 if (TypeIdx == 1)
1452 switch (MI.getOpcode()) {
1453 case TargetOpcode::G_CTLZ:
1454 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1455 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1456 case TargetOpcode::G_CTTZ:
1457 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1458 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1459 case TargetOpcode::G_CTPOP:
1460 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1461 default:
1462 return UnableToLegalize;
1463 }
1464
1466 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1468 return Legalized;
1469 case TargetOpcode::G_INTTOPTR:
1470 if (TypeIdx != 1)
1471 return UnableToLegalize;
1472
1474 narrowScalarSrc(MI, NarrowTy, 1);
1476 return Legalized;
1477 case TargetOpcode::G_PTRTOINT:
1478 if (TypeIdx != 0)
1479 return UnableToLegalize;
1480
1482 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1484 return Legalized;
1485 case TargetOpcode::G_PHI: {
1486 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1487 // NarrowSize.
1488 if (SizeOp0 % NarrowSize != 0)
1489 return UnableToLegalize;
1490
1491 unsigned NumParts = SizeOp0 / NarrowSize;
1492 SmallVector<Register, 2> DstRegs(NumParts);
1493 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1495 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1496 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1498 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1499 SrcRegs[i / 2], MIRBuilder, MRI);
1500 }
1501 MachineBasicBlock &MBB = *MI.getParent();
1503 for (unsigned i = 0; i < NumParts; ++i) {
1504 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1506 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1507 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1508 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1509 }
1511 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1513 MI.eraseFromParent();
1514 return Legalized;
1515 }
1516 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1517 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1518 if (TypeIdx != 2)
1519 return UnableToLegalize;
1520
1521 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1523 narrowScalarSrc(MI, NarrowTy, OpIdx);
1525 return Legalized;
1526 }
1527 case TargetOpcode::G_ICMP: {
1528 Register LHS = MI.getOperand(2).getReg();
1529 LLT SrcTy = MRI.getType(LHS);
1530 uint64_t SrcSize = SrcTy.getSizeInBits();
1531 CmpInst::Predicate Pred =
1532 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1533
1534 // TODO: Handle the non-equality case for weird sizes.
1535 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1536 return UnableToLegalize;
1537
1538 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1539 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1540 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1541 LHSLeftoverRegs, MIRBuilder, MRI))
1542 return UnableToLegalize;
1543
1544 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1545 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1546 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1547 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1548 return UnableToLegalize;
1549
1550 // We now have the LHS and RHS of the compare split into narrow-type
1551 // registers, plus potentially some leftover type.
1552 Register Dst = MI.getOperand(0).getReg();
1553 LLT ResTy = MRI.getType(Dst);
1554 if (ICmpInst::isEquality(Pred)) {
1555 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1556 // them together. For each equal part, the result should be all 0s. For
1557 // each non-equal part, we'll get at least one 1.
1558 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1560 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1561 auto LHS = std::get<0>(LHSAndRHS);
1562 auto RHS = std::get<1>(LHSAndRHS);
1563 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1564 Xors.push_back(Xor);
1565 }
1566
1567 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1568 // to the desired narrow type so that we can OR them together later.
1569 SmallVector<Register, 4> WidenedXors;
1570 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1571 auto LHS = std::get<0>(LHSAndRHS);
1572 auto RHS = std::get<1>(LHSAndRHS);
1573 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1574 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1575 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1576 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1577 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1578 }
1579
1580 // Now, for each part we broke up, we know if they are equal/not equal
1581 // based off the G_XOR. We can OR these all together and compare against
1582 // 0 to get the result.
1583 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1584 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1585 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1586 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1587 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1588 } else {
1589 // TODO: Handle non-power-of-two types.
1590 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1591 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1592 Register LHSL = LHSPartRegs[0];
1593 Register LHSH = LHSPartRegs[1];
1594 Register RHSL = RHSPartRegs[0];
1595 Register RHSH = RHSPartRegs[1];
1596 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1597 MachineInstrBuilder CmpHEQ =
1600 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1601 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1602 }
1603 MI.eraseFromParent();
1604 return Legalized;
1605 }
1606 case TargetOpcode::G_FCMP:
1607 if (TypeIdx != 0)
1608 return UnableToLegalize;
1609
1611 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1613 return Legalized;
1614
1615 case TargetOpcode::G_SEXT_INREG: {
1616 if (TypeIdx != 0)
1617 return UnableToLegalize;
1618
1619 int64_t SizeInBits = MI.getOperand(2).getImm();
1620
1621 // So long as the new type has more bits than the bits we're extending we
1622 // don't need to break it apart.
1623 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1625 // We don't lose any non-extension bits by truncating the src and
1626 // sign-extending the dst.
1627 MachineOperand &MO1 = MI.getOperand(1);
1628 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1629 MO1.setReg(TruncMIB.getReg(0));
1630
1631 MachineOperand &MO2 = MI.getOperand(0);
1632 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1634 MIRBuilder.buildSExt(MO2, DstExt);
1635 MO2.setReg(DstExt);
1637 return Legalized;
1638 }
1639
1640 // Break it apart. Components below the extension point are unmodified. The
1641 // component containing the extension point becomes a narrower SEXT_INREG.
1642 // Components above it are ashr'd from the component containing the
1643 // extension point.
1644 if (SizeOp0 % NarrowSize != 0)
1645 return UnableToLegalize;
1646 int NumParts = SizeOp0 / NarrowSize;
1647
1648 // List the registers where the destination will be scattered.
1650 // List the registers where the source will be split.
1652
1653 // Create all the temporary registers.
1654 for (int i = 0; i < NumParts; ++i) {
1655 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1656
1657 SrcRegs.push_back(SrcReg);
1658 }
1659
1660 // Explode the big arguments into smaller chunks.
1661 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1662
1663 Register AshrCstReg =
1664 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1665 .getReg(0);
1666 Register FullExtensionReg;
1667 Register PartialExtensionReg;
1668
1669 // Do the operation on each small part.
1670 for (int i = 0; i < NumParts; ++i) {
1671 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1672 DstRegs.push_back(SrcRegs[i]);
1673 PartialExtensionReg = DstRegs.back();
1674 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1675 assert(PartialExtensionReg &&
1676 "Expected to visit partial extension before full");
1677 if (FullExtensionReg) {
1678 DstRegs.push_back(FullExtensionReg);
1679 continue;
1680 }
1681 DstRegs.push_back(
1682 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1683 .getReg(0));
1684 FullExtensionReg = DstRegs.back();
1685 } else {
1686 DstRegs.push_back(
1688 .buildInstr(
1689 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1690 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1691 .getReg(0));
1692 PartialExtensionReg = DstRegs.back();
1693 }
1694 }
1695
1696 // Gather the destination registers into the final destination.
1697 Register DstReg = MI.getOperand(0).getReg();
1698 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1699 MI.eraseFromParent();
1700 return Legalized;
1701 }
1702 case TargetOpcode::G_BSWAP:
1703 case TargetOpcode::G_BITREVERSE: {
1704 if (SizeOp0 % NarrowSize != 0)
1705 return UnableToLegalize;
1706
1708 SmallVector<Register, 2> SrcRegs, DstRegs;
1709 unsigned NumParts = SizeOp0 / NarrowSize;
1710 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1711 MIRBuilder, MRI);
1712
1713 for (unsigned i = 0; i < NumParts; ++i) {
1714 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1715 {SrcRegs[NumParts - 1 - i]});
1716 DstRegs.push_back(DstPart.getReg(0));
1717 }
1718
1719 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1720
1722 MI.eraseFromParent();
1723 return Legalized;
1724 }
1725 case TargetOpcode::G_PTR_ADD:
1726 case TargetOpcode::G_PTRMASK: {
1727 if (TypeIdx != 1)
1728 return UnableToLegalize;
1730 narrowScalarSrc(MI, NarrowTy, 2);
1732 return Legalized;
1733 }
1734 case TargetOpcode::G_FPTOUI:
1735 case TargetOpcode::G_FPTOSI:
1736 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1737 case TargetOpcode::G_FPEXT:
1738 if (TypeIdx != 0)
1739 return UnableToLegalize;
1741 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1743 return Legalized;
1744 case TargetOpcode::G_FLDEXP:
1745 case TargetOpcode::G_STRICT_FLDEXP:
1746 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1747 case TargetOpcode::G_VSCALE: {
1748 Register Dst = MI.getOperand(0).getReg();
1749 LLT Ty = MRI.getType(Dst);
1750
1751 // Assume VSCALE(1) fits into a legal integer
1752 const APInt One(NarrowTy.getSizeInBits(), 1);
1753 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1754 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1755 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1756 MIRBuilder.buildMul(Dst, ZExt, C);
1757
1758 MI.eraseFromParent();
1759 return Legalized;
1760 }
1761 }
1762}
1763
1765 LLT Ty = MRI.getType(Val);
1766 if (Ty.isScalar())
1767 return Val;
1768
1770 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1771 if (Ty.isPointer()) {
1772 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1773 return Register();
1774 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1775 }
1776
1777 Register NewVal = Val;
1778
1779 assert(Ty.isVector());
1780 if (Ty.isPointerVector())
1781 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1782 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1783}
1784
1786 unsigned OpIdx, unsigned ExtOpcode) {
1787 MachineOperand &MO = MI.getOperand(OpIdx);
1788 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1789 MO.setReg(ExtB.getReg(0));
1790}
1791
1793 unsigned OpIdx) {
1794 MachineOperand &MO = MI.getOperand(OpIdx);
1795 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1796 MO.setReg(ExtB.getReg(0));
1797}
1798
1800 unsigned OpIdx, unsigned TruncOpcode) {
1801 MachineOperand &MO = MI.getOperand(OpIdx);
1802 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1804 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1805 MO.setReg(DstExt);
1806}
1807
1809 unsigned OpIdx, unsigned ExtOpcode) {
1810 MachineOperand &MO = MI.getOperand(OpIdx);
1811 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1813 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1814 MO.setReg(DstTrunc);
1815}
1816
1818 unsigned OpIdx) {
1819 MachineOperand &MO = MI.getOperand(OpIdx);
1821 Register Dst = MO.getReg();
1822 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1823 MO.setReg(DstExt);
1825}
1826
1828 unsigned OpIdx) {
1829 MachineOperand &MO = MI.getOperand(OpIdx);
1832}
1833
1834void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1835 MachineOperand &Op = MI.getOperand(OpIdx);
1836 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1837}
1838
1839void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1840 MachineOperand &MO = MI.getOperand(OpIdx);
1841 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1843 MIRBuilder.buildBitcast(MO, CastDst);
1844 MO.setReg(CastDst);
1845}
1846
1848LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1849 LLT WideTy) {
1850 if (TypeIdx != 1)
1851 return UnableToLegalize;
1852
1853 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1854 if (DstTy.isVector())
1855 return UnableToLegalize;
1856
1857 LLT SrcTy = MRI.getType(Src1Reg);
1858 const int DstSize = DstTy.getSizeInBits();
1859 const int SrcSize = SrcTy.getSizeInBits();
1860 const int WideSize = WideTy.getSizeInBits();
1861 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1862
1863 unsigned NumOps = MI.getNumOperands();
1864 unsigned NumSrc = MI.getNumOperands() - 1;
1865 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1866
1867 if (WideSize >= DstSize) {
1868 // Directly pack the bits in the target type.
1869 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1870
1871 for (unsigned I = 2; I != NumOps; ++I) {
1872 const unsigned Offset = (I - 1) * PartSize;
1873
1874 Register SrcReg = MI.getOperand(I).getReg();
1875 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1876
1877 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1878
1879 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1880 MRI.createGenericVirtualRegister(WideTy);
1881
1882 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1883 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1884 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1885 ResultReg = NextResult;
1886 }
1887
1888 if (WideSize > DstSize)
1889 MIRBuilder.buildTrunc(DstReg, ResultReg);
1890 else if (DstTy.isPointer())
1891 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1892
1893 MI.eraseFromParent();
1894 return Legalized;
1895 }
1896
1897 // Unmerge the original values to the GCD type, and recombine to the next
1898 // multiple greater than the original type.
1899 //
1900 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1901 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1902 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1903 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1904 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1905 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1906 // %12:_(s12) = G_MERGE_VALUES %10, %11
1907 //
1908 // Padding with undef if necessary:
1909 //
1910 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1911 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1912 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1913 // %7:_(s2) = G_IMPLICIT_DEF
1914 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1915 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1916 // %10:_(s12) = G_MERGE_VALUES %8, %9
1917
1918 const int GCD = std::gcd(SrcSize, WideSize);
1919 LLT GCDTy = LLT::scalar(GCD);
1920
1922 SmallVector<Register, 8> NewMergeRegs;
1923 SmallVector<Register, 8> Unmerges;
1924 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1925
1926 // Decompose the original operands if they don't evenly divide.
1927 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1928 Register SrcReg = MO.getReg();
1929 if (GCD == SrcSize) {
1930 Unmerges.push_back(SrcReg);
1931 } else {
1932 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1933 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1934 Unmerges.push_back(Unmerge.getReg(J));
1935 }
1936 }
1937
1938 // Pad with undef to the next size that is a multiple of the requested size.
1939 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1940 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1941 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1942 Unmerges.push_back(UndefReg);
1943 }
1944
1945 const int PartsPerGCD = WideSize / GCD;
1946
1947 // Build merges of each piece.
1948 ArrayRef<Register> Slicer(Unmerges);
1949 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1950 auto Merge =
1951 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1952 NewMergeRegs.push_back(Merge.getReg(0));
1953 }
1954
1955 // A truncate may be necessary if the requested type doesn't evenly divide the
1956 // original result type.
1957 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1958 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1959 } else {
1960 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1961 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1962 }
1963
1964 MI.eraseFromParent();
1965 return Legalized;
1966}
1967
1969LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1970 LLT WideTy) {
1971 if (TypeIdx != 0)
1972 return UnableToLegalize;
1973
1974 int NumDst = MI.getNumOperands() - 1;
1975 Register SrcReg = MI.getOperand(NumDst).getReg();
1976 LLT SrcTy = MRI.getType(SrcReg);
1977 if (SrcTy.isVector())
1978 return UnableToLegalize;
1979
1980 Register Dst0Reg = MI.getOperand(0).getReg();
1981 LLT DstTy = MRI.getType(Dst0Reg);
1982 if (!DstTy.isScalar())
1983 return UnableToLegalize;
1984
1985 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1986 if (SrcTy.isPointer()) {
1988 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1989 LLVM_DEBUG(
1990 dbgs() << "Not casting non-integral address space integer\n");
1991 return UnableToLegalize;
1992 }
1993
1994 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1995 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1996 }
1997
1998 // Widen SrcTy to WideTy. This does not affect the result, but since the
1999 // user requested this size, it is probably better handled than SrcTy and
2000 // should reduce the total number of legalization artifacts.
2001 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2002 SrcTy = WideTy;
2003 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2004 }
2005
2006 // Theres no unmerge type to target. Directly extract the bits from the
2007 // source type
2008 unsigned DstSize = DstTy.getSizeInBits();
2009
2010 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2011 for (int I = 1; I != NumDst; ++I) {
2012 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2013 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2014 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2015 }
2016
2017 MI.eraseFromParent();
2018 return Legalized;
2019 }
2020
2021 // Extend the source to a wider type.
2022 LLT LCMTy = getLCMType(SrcTy, WideTy);
2023
2024 Register WideSrc = SrcReg;
2025 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2026 // TODO: If this is an integral address space, cast to integer and anyext.
2027 if (SrcTy.isPointer()) {
2028 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2029 return UnableToLegalize;
2030 }
2031
2032 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2033 }
2034
2035 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2036
2037 // Create a sequence of unmerges and merges to the original results. Since we
2038 // may have widened the source, we will need to pad the results with dead defs
2039 // to cover the source register.
2040 // e.g. widen s48 to s64:
2041 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2042 //
2043 // =>
2044 // %4:_(s192) = G_ANYEXT %0:_(s96)
2045 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2046 // ; unpack to GCD type, with extra dead defs
2047 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2048 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2049 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2050 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2051 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2052 const LLT GCDTy = getGCDType(WideTy, DstTy);
2053 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2054 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2055
2056 // Directly unmerge to the destination without going through a GCD type
2057 // if possible
2058 if (PartsPerRemerge == 1) {
2059 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2060
2061 for (int I = 0; I != NumUnmerge; ++I) {
2062 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2063
2064 for (int J = 0; J != PartsPerUnmerge; ++J) {
2065 int Idx = I * PartsPerUnmerge + J;
2066 if (Idx < NumDst)
2067 MIB.addDef(MI.getOperand(Idx).getReg());
2068 else {
2069 // Create dead def for excess components.
2070 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2071 }
2072 }
2073
2074 MIB.addUse(Unmerge.getReg(I));
2075 }
2076 } else {
2078 for (int J = 0; J != NumUnmerge; ++J)
2079 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2080
2081 SmallVector<Register, 8> RemergeParts;
2082 for (int I = 0; I != NumDst; ++I) {
2083 for (int J = 0; J < PartsPerRemerge; ++J) {
2084 const int Idx = I * PartsPerRemerge + J;
2085 RemergeParts.emplace_back(Parts[Idx]);
2086 }
2087
2088 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2089 RemergeParts.clear();
2090 }
2091 }
2092
2093 MI.eraseFromParent();
2094 return Legalized;
2095}
2096
2098LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2099 LLT WideTy) {
2100 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2101 unsigned Offset = MI.getOperand(2).getImm();
2102
2103 if (TypeIdx == 0) {
2104 if (SrcTy.isVector() || DstTy.isVector())
2105 return UnableToLegalize;
2106
2107 SrcOp Src(SrcReg);
2108 if (SrcTy.isPointer()) {
2109 // Extracts from pointers can be handled only if they are really just
2110 // simple integers.
2112 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2113 return UnableToLegalize;
2114
2115 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2116 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2117 SrcTy = SrcAsIntTy;
2118 }
2119
2120 if (DstTy.isPointer())
2121 return UnableToLegalize;
2122
2123 if (Offset == 0) {
2124 // Avoid a shift in the degenerate case.
2125 MIRBuilder.buildTrunc(DstReg,
2126 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2127 MI.eraseFromParent();
2128 return Legalized;
2129 }
2130
2131 // Do a shift in the source type.
2132 LLT ShiftTy = SrcTy;
2133 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2134 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2135 ShiftTy = WideTy;
2136 }
2137
2138 auto LShr = MIRBuilder.buildLShr(
2139 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2140 MIRBuilder.buildTrunc(DstReg, LShr);
2141 MI.eraseFromParent();
2142 return Legalized;
2143 }
2144
2145 if (SrcTy.isScalar()) {
2147 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2149 return Legalized;
2150 }
2151
2152 if (!SrcTy.isVector())
2153 return UnableToLegalize;
2154
2155 if (DstTy != SrcTy.getElementType())
2156 return UnableToLegalize;
2157
2158 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2159 return UnableToLegalize;
2160
2162 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2163
2164 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2165 Offset);
2166 widenScalarDst(MI, WideTy.getScalarType(), 0);
2168 return Legalized;
2169}
2170
2172LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2173 LLT WideTy) {
2174 if (TypeIdx != 0 || WideTy.isVector())
2175 return UnableToLegalize;
2177 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2178 widenScalarDst(MI, WideTy);
2180 return Legalized;
2181}
2182
2184LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2185 LLT WideTy) {
2186 unsigned Opcode;
2187 unsigned ExtOpcode;
2188 std::optional<Register> CarryIn;
2189 switch (MI.getOpcode()) {
2190 default:
2191 llvm_unreachable("Unexpected opcode!");
2192 case TargetOpcode::G_SADDO:
2193 Opcode = TargetOpcode::G_ADD;
2194 ExtOpcode = TargetOpcode::G_SEXT;
2195 break;
2196 case TargetOpcode::G_SSUBO:
2197 Opcode = TargetOpcode::G_SUB;
2198 ExtOpcode = TargetOpcode::G_SEXT;
2199 break;
2200 case TargetOpcode::G_UADDO:
2201 Opcode = TargetOpcode::G_ADD;
2202 ExtOpcode = TargetOpcode::G_ZEXT;
2203 break;
2204 case TargetOpcode::G_USUBO:
2205 Opcode = TargetOpcode::G_SUB;
2206 ExtOpcode = TargetOpcode::G_ZEXT;
2207 break;
2208 case TargetOpcode::G_SADDE:
2209 Opcode = TargetOpcode::G_UADDE;
2210 ExtOpcode = TargetOpcode::G_SEXT;
2211 CarryIn = MI.getOperand(4).getReg();
2212 break;
2213 case TargetOpcode::G_SSUBE:
2214 Opcode = TargetOpcode::G_USUBE;
2215 ExtOpcode = TargetOpcode::G_SEXT;
2216 CarryIn = MI.getOperand(4).getReg();
2217 break;
2218 case TargetOpcode::G_UADDE:
2219 Opcode = TargetOpcode::G_UADDE;
2220 ExtOpcode = TargetOpcode::G_ZEXT;
2221 CarryIn = MI.getOperand(4).getReg();
2222 break;
2223 case TargetOpcode::G_USUBE:
2224 Opcode = TargetOpcode::G_USUBE;
2225 ExtOpcode = TargetOpcode::G_ZEXT;
2226 CarryIn = MI.getOperand(4).getReg();
2227 break;
2228 }
2229
2230 if (TypeIdx == 1) {
2231 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2232
2234 if (CarryIn)
2235 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2236 widenScalarDst(MI, WideTy, 1);
2237
2239 return Legalized;
2240 }
2241
2242 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2243 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2244 // Do the arithmetic in the larger type.
2245 Register NewOp;
2246 if (CarryIn) {
2247 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2248 NewOp = MIRBuilder
2249 .buildInstr(Opcode, {WideTy, CarryOutTy},
2250 {LHSExt, RHSExt, *CarryIn})
2251 .getReg(0);
2252 } else {
2253 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2254 }
2255 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2256 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2257 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2258 // There is no overflow if the ExtOp is the same as NewOp.
2259 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2260 // Now trunc the NewOp to the original result.
2261 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2262 MI.eraseFromParent();
2263 return Legalized;
2264}
2265
2267LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2268 LLT WideTy) {
2269 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2270 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2271 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2272 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2273 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2274 // We can convert this to:
2275 // 1. Any extend iN to iM
2276 // 2. SHL by M-N
2277 // 3. [US][ADD|SUB|SHL]SAT
2278 // 4. L/ASHR by M-N
2279 //
2280 // It may be more efficient to lower this to a min and a max operation in
2281 // the higher precision arithmetic if the promoted operation isn't legal,
2282 // but this decision is up to the target's lowering request.
2283 Register DstReg = MI.getOperand(0).getReg();
2284
2285 unsigned NewBits = WideTy.getScalarSizeInBits();
2286 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2287
2288 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2289 // must not left shift the RHS to preserve the shift amount.
2290 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2291 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2292 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2293 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2294 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2295 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2296
2297 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2298 {ShiftL, ShiftR}, MI.getFlags());
2299
2300 // Use a shift that will preserve the number of sign bits when the trunc is
2301 // folded away.
2302 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2303 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2304
2305 MIRBuilder.buildTrunc(DstReg, Result);
2306 MI.eraseFromParent();
2307 return Legalized;
2308}
2309
2311LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2312 LLT WideTy) {
2313 if (TypeIdx == 1) {
2315 widenScalarDst(MI, WideTy, 1);
2317 return Legalized;
2318 }
2319
2320 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2321 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2322 LLT SrcTy = MRI.getType(LHS);
2323 LLT OverflowTy = MRI.getType(OriginalOverflow);
2324 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2325
2326 // To determine if the result overflowed in the larger type, we extend the
2327 // input to the larger type, do the multiply (checking if it overflows),
2328 // then also check the high bits of the result to see if overflow happened
2329 // there.
2330 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2331 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2332 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2333
2334 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2335 // so we don't need to check the overflow result of larger type Mulo.
2336 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2337
2338 unsigned MulOpc =
2339 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2340
2342 if (WideMulCanOverflow)
2343 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2344 {LeftOperand, RightOperand});
2345 else
2346 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2347
2348 auto Mul = Mulo->getOperand(0);
2349 MIRBuilder.buildTrunc(Result, Mul);
2350
2351 MachineInstrBuilder ExtResult;
2352 // Overflow occurred if it occurred in the larger type, or if the high part
2353 // of the result does not zero/sign-extend the low part. Check this second
2354 // possibility first.
2355 if (IsSigned) {
2356 // For signed, overflow occurred when the high part does not sign-extend
2357 // the low part.
2358 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2359 } else {
2360 // Unsigned overflow occurred when the high part does not zero-extend the
2361 // low part.
2362 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2363 }
2364
2365 if (WideMulCanOverflow) {
2366 auto Overflow =
2367 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2368 // Finally check if the multiplication in the larger type itself overflowed.
2369 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2370 } else {
2371 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2372 }
2373 MI.eraseFromParent();
2374 return Legalized;
2375}
2376
2379 switch (MI.getOpcode()) {
2380 default:
2381 return UnableToLegalize;
2382 case TargetOpcode::G_ATOMICRMW_XCHG:
2383 case TargetOpcode::G_ATOMICRMW_ADD:
2384 case TargetOpcode::G_ATOMICRMW_SUB:
2385 case TargetOpcode::G_ATOMICRMW_AND:
2386 case TargetOpcode::G_ATOMICRMW_OR:
2387 case TargetOpcode::G_ATOMICRMW_XOR:
2388 case TargetOpcode::G_ATOMICRMW_MIN:
2389 case TargetOpcode::G_ATOMICRMW_MAX:
2390 case TargetOpcode::G_ATOMICRMW_UMIN:
2391 case TargetOpcode::G_ATOMICRMW_UMAX:
2392 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2394 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2395 widenScalarDst(MI, WideTy, 0);
2397 return Legalized;
2398 case TargetOpcode::G_ATOMIC_CMPXCHG:
2399 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2401 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2402 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2403 widenScalarDst(MI, WideTy, 0);
2405 return Legalized;
2406 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2407 if (TypeIdx == 0) {
2409 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2410 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2411 widenScalarDst(MI, WideTy, 0);
2413 return Legalized;
2414 }
2415 assert(TypeIdx == 1 &&
2416 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2418 widenScalarDst(MI, WideTy, 1);
2420 return Legalized;
2421 case TargetOpcode::G_EXTRACT:
2422 return widenScalarExtract(MI, TypeIdx, WideTy);
2423 case TargetOpcode::G_INSERT:
2424 return widenScalarInsert(MI, TypeIdx, WideTy);
2425 case TargetOpcode::G_MERGE_VALUES:
2426 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2427 case TargetOpcode::G_UNMERGE_VALUES:
2428 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2429 case TargetOpcode::G_SADDO:
2430 case TargetOpcode::G_SSUBO:
2431 case TargetOpcode::G_UADDO:
2432 case TargetOpcode::G_USUBO:
2433 case TargetOpcode::G_SADDE:
2434 case TargetOpcode::G_SSUBE:
2435 case TargetOpcode::G_UADDE:
2436 case TargetOpcode::G_USUBE:
2437 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2438 case TargetOpcode::G_UMULO:
2439 case TargetOpcode::G_SMULO:
2440 return widenScalarMulo(MI, TypeIdx, WideTy);
2441 case TargetOpcode::G_SADDSAT:
2442 case TargetOpcode::G_SSUBSAT:
2443 case TargetOpcode::G_SSHLSAT:
2444 case TargetOpcode::G_UADDSAT:
2445 case TargetOpcode::G_USUBSAT:
2446 case TargetOpcode::G_USHLSAT:
2447 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2448 case TargetOpcode::G_CTTZ:
2449 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2450 case TargetOpcode::G_CTLZ:
2451 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2452 case TargetOpcode::G_CTPOP: {
2453 if (TypeIdx == 0) {
2455 widenScalarDst(MI, WideTy, 0);
2457 return Legalized;
2458 }
2459
2460 Register SrcReg = MI.getOperand(1).getReg();
2461
2462 // First extend the input.
2463 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2464 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2465 ? TargetOpcode::G_ANYEXT
2466 : TargetOpcode::G_ZEXT;
2467 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2468 LLT CurTy = MRI.getType(SrcReg);
2469 unsigned NewOpc = MI.getOpcode();
2470 if (NewOpc == TargetOpcode::G_CTTZ) {
2471 // The count is the same in the larger type except if the original
2472 // value was zero. This can be handled by setting the bit just off
2473 // the top of the original type.
2474 auto TopBit =
2476 MIBSrc = MIRBuilder.buildOr(
2477 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2478 // Now we know the operand is non-zero, use the more relaxed opcode.
2479 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2480 }
2481
2482 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2483
2484 if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2485 // An optimization where the result is the CTLZ after the left shift by
2486 // (Difference in widety and current ty), that is,
2487 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2488 // Result = ctlz MIBSrc
2489 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2490 MIRBuilder.buildConstant(WideTy, SizeDiff));
2491 }
2492
2493 // Perform the operation at the larger size.
2494 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2495 // This is already the correct result for CTPOP and CTTZs
2496 if (MI.getOpcode() == TargetOpcode::G_CTLZ) {
2497 // The correct result is NewOp - (Difference in widety and current ty).
2498 MIBNewOp = MIRBuilder.buildSub(
2499 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2500 }
2501
2502 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2503 MI.eraseFromParent();
2504 return Legalized;
2505 }
2506 case TargetOpcode::G_BSWAP: {
2508 Register DstReg = MI.getOperand(0).getReg();
2509
2510 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2511 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2512 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2513 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2514
2515 MI.getOperand(0).setReg(DstExt);
2516
2518
2519 LLT Ty = MRI.getType(DstReg);
2520 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2521 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2522 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2523
2524 MIRBuilder.buildTrunc(DstReg, ShrReg);
2526 return Legalized;
2527 }
2528 case TargetOpcode::G_BITREVERSE: {
2530
2531 Register DstReg = MI.getOperand(0).getReg();
2532 LLT Ty = MRI.getType(DstReg);
2533 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2534
2535 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2536 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2537 MI.getOperand(0).setReg(DstExt);
2539
2540 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2541 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2542 MIRBuilder.buildTrunc(DstReg, Shift);
2544 return Legalized;
2545 }
2546 case TargetOpcode::G_FREEZE:
2547 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2549 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2550 widenScalarDst(MI, WideTy);
2552 return Legalized;
2553
2554 case TargetOpcode::G_ABS:
2556 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2557 widenScalarDst(MI, WideTy);
2559 return Legalized;
2560
2561 case TargetOpcode::G_ADD:
2562 case TargetOpcode::G_AND:
2563 case TargetOpcode::G_MUL:
2564 case TargetOpcode::G_OR:
2565 case TargetOpcode::G_XOR:
2566 case TargetOpcode::G_SUB:
2567 case TargetOpcode::G_SHUFFLE_VECTOR:
2568 // Perform operation at larger width (any extension is fines here, high bits
2569 // don't affect the result) and then truncate the result back to the
2570 // original type.
2572 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2573 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2574 widenScalarDst(MI, WideTy);
2576 return Legalized;
2577
2578 case TargetOpcode::G_SBFX:
2579 case TargetOpcode::G_UBFX:
2581
2582 if (TypeIdx == 0) {
2583 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2584 widenScalarDst(MI, WideTy);
2585 } else {
2586 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2587 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2588 }
2589
2591 return Legalized;
2592
2593 case TargetOpcode::G_SHL:
2595
2596 if (TypeIdx == 0) {
2597 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2598 widenScalarDst(MI, WideTy);
2599 } else {
2600 assert(TypeIdx == 1);
2601 // The "number of bits to shift" operand must preserve its value as an
2602 // unsigned integer:
2603 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2604 }
2605
2607 return Legalized;
2608
2609 case TargetOpcode::G_ROTR:
2610 case TargetOpcode::G_ROTL:
2611 if (TypeIdx != 1)
2612 return UnableToLegalize;
2613
2615 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2617 return Legalized;
2618
2619 case TargetOpcode::G_SDIV:
2620 case TargetOpcode::G_SREM:
2621 case TargetOpcode::G_SMIN:
2622 case TargetOpcode::G_SMAX:
2624 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2625 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2626 widenScalarDst(MI, WideTy);
2628 return Legalized;
2629
2630 case TargetOpcode::G_SDIVREM:
2632 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2633 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2634 widenScalarDst(MI, WideTy);
2635 widenScalarDst(MI, WideTy, 1);
2637 return Legalized;
2638
2639 case TargetOpcode::G_ASHR:
2640 case TargetOpcode::G_LSHR:
2642
2643 if (TypeIdx == 0) {
2644 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2645 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2646
2647 widenScalarSrc(MI, WideTy, 1, CvtOp);
2648 widenScalarDst(MI, WideTy);
2649 } else {
2650 assert(TypeIdx == 1);
2651 // The "number of bits to shift" operand must preserve its value as an
2652 // unsigned integer:
2653 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2654 }
2655
2657 return Legalized;
2658 case TargetOpcode::G_UDIV:
2659 case TargetOpcode::G_UREM:
2660 case TargetOpcode::G_UMIN:
2661 case TargetOpcode::G_UMAX:
2663 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2664 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2665 widenScalarDst(MI, WideTy);
2667 return Legalized;
2668
2669 case TargetOpcode::G_UDIVREM:
2671 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2672 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2673 widenScalarDst(MI, WideTy);
2674 widenScalarDst(MI, WideTy, 1);
2676 return Legalized;
2677
2678 case TargetOpcode::G_SELECT:
2680 if (TypeIdx == 0) {
2681 // Perform operation at larger width (any extension is fine here, high
2682 // bits don't affect the result) and then truncate the result back to the
2683 // original type.
2684 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2685 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2686 widenScalarDst(MI, WideTy);
2687 } else {
2688 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2689 // Explicit extension is required here since high bits affect the result.
2690 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2691 }
2693 return Legalized;
2694
2695 case TargetOpcode::G_FPTOSI:
2696 case TargetOpcode::G_FPTOUI:
2697 case TargetOpcode::G_INTRINSIC_LRINT:
2698 case TargetOpcode::G_INTRINSIC_LLRINT:
2699 case TargetOpcode::G_IS_FPCLASS:
2701
2702 if (TypeIdx == 0)
2703 widenScalarDst(MI, WideTy);
2704 else
2705 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2706
2708 return Legalized;
2709 case TargetOpcode::G_SITOFP:
2711
2712 if (TypeIdx == 0)
2713 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2714 else
2715 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2716
2718 return Legalized;
2719 case TargetOpcode::G_UITOFP:
2721
2722 if (TypeIdx == 0)
2723 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2724 else
2725 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2726
2728 return Legalized;
2729 case TargetOpcode::G_LOAD:
2730 case TargetOpcode::G_SEXTLOAD:
2731 case TargetOpcode::G_ZEXTLOAD:
2733 widenScalarDst(MI, WideTy);
2735 return Legalized;
2736
2737 case TargetOpcode::G_STORE: {
2738 if (TypeIdx != 0)
2739 return UnableToLegalize;
2740
2741 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2742 if (!Ty.isScalar())
2743 return UnableToLegalize;
2744
2746
2747 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2748 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2749 widenScalarSrc(MI, WideTy, 0, ExtType);
2750
2752 return Legalized;
2753 }
2754 case TargetOpcode::G_CONSTANT: {
2755 MachineOperand &SrcMO = MI.getOperand(1);
2757 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2758 MRI.getType(MI.getOperand(0).getReg()));
2759 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2760 ExtOpc == TargetOpcode::G_ANYEXT) &&
2761 "Illegal Extend");
2762 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2763 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2764 ? SrcVal.sext(WideTy.getSizeInBits())
2765 : SrcVal.zext(WideTy.getSizeInBits());
2767 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2768
2769 widenScalarDst(MI, WideTy);
2771 return Legalized;
2772 }
2773 case TargetOpcode::G_FCONSTANT: {
2774 // To avoid changing the bits of the constant due to extension to a larger
2775 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2776 MachineOperand &SrcMO = MI.getOperand(1);
2777 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2779 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2780 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2781 MI.eraseFromParent();
2782 return Legalized;
2783 }
2784 case TargetOpcode::G_IMPLICIT_DEF: {
2786 widenScalarDst(MI, WideTy);
2788 return Legalized;
2789 }
2790 case TargetOpcode::G_BRCOND:
2792 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2794 return Legalized;
2795
2796 case TargetOpcode::G_FCMP:
2798 if (TypeIdx == 0)
2799 widenScalarDst(MI, WideTy);
2800 else {
2801 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2802 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2803 }
2805 return Legalized;
2806
2807 case TargetOpcode::G_ICMP:
2809 if (TypeIdx == 0)
2810 widenScalarDst(MI, WideTy);
2811 else {
2812 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2813 MI.getOperand(1).getPredicate()))
2814 ? TargetOpcode::G_SEXT
2815 : TargetOpcode::G_ZEXT;
2816 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2817 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2818 }
2820 return Legalized;
2821
2822 case TargetOpcode::G_PTR_ADD:
2823 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2825 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2827 return Legalized;
2828
2829 case TargetOpcode::G_PHI: {
2830 assert(TypeIdx == 0 && "Expecting only Idx 0");
2831
2833 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2834 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2836 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2837 }
2838
2839 MachineBasicBlock &MBB = *MI.getParent();
2841 widenScalarDst(MI, WideTy);
2843 return Legalized;
2844 }
2845 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2846 if (TypeIdx == 0) {
2847 Register VecReg = MI.getOperand(1).getReg();
2848 LLT VecTy = MRI.getType(VecReg);
2850
2852 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2853 TargetOpcode::G_ANYEXT);
2854
2855 widenScalarDst(MI, WideTy, 0);
2857 return Legalized;
2858 }
2859
2860 if (TypeIdx != 2)
2861 return UnableToLegalize;
2863 // TODO: Probably should be zext
2864 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2866 return Legalized;
2867 }
2868 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2869 if (TypeIdx == 0) {
2871 const LLT WideEltTy = WideTy.getElementType();
2872
2873 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2874 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2875 widenScalarDst(MI, WideTy, 0);
2877 return Legalized;
2878 }
2879
2880 if (TypeIdx == 1) {
2882
2883 Register VecReg = MI.getOperand(1).getReg();
2884 LLT VecTy = MRI.getType(VecReg);
2885 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2886
2887 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2888 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2889 widenScalarDst(MI, WideVecTy, 0);
2891 return Legalized;
2892 }
2893
2894 if (TypeIdx == 2) {
2896 // TODO: Probably should be zext
2897 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2899 return Legalized;
2900 }
2901
2902 return UnableToLegalize;
2903 }
2904 case TargetOpcode::G_FADD:
2905 case TargetOpcode::G_FMUL:
2906 case TargetOpcode::G_FSUB:
2907 case TargetOpcode::G_FMA:
2908 case TargetOpcode::G_FMAD:
2909 case TargetOpcode::G_FNEG:
2910 case TargetOpcode::G_FABS:
2911 case TargetOpcode::G_FCANONICALIZE:
2912 case TargetOpcode::G_FMINNUM:
2913 case TargetOpcode::G_FMAXNUM:
2914 case TargetOpcode::G_FMINNUM_IEEE:
2915 case TargetOpcode::G_FMAXNUM_IEEE:
2916 case TargetOpcode::G_FMINIMUM:
2917 case TargetOpcode::G_FMAXIMUM:
2918 case TargetOpcode::G_FDIV:
2919 case TargetOpcode::G_FREM:
2920 case TargetOpcode::G_FCEIL:
2921 case TargetOpcode::G_FFLOOR:
2922 case TargetOpcode::G_FCOS:
2923 case TargetOpcode::G_FSIN:
2924 case TargetOpcode::G_FTAN:
2925 case TargetOpcode::G_FACOS:
2926 case TargetOpcode::G_FASIN:
2927 case TargetOpcode::G_FATAN:
2928 case TargetOpcode::G_FCOSH:
2929 case TargetOpcode::G_FSINH:
2930 case TargetOpcode::G_FTANH:
2931 case TargetOpcode::G_FLOG10:
2932 case TargetOpcode::G_FLOG:
2933 case TargetOpcode::G_FLOG2:
2934 case TargetOpcode::G_FRINT:
2935 case TargetOpcode::G_FNEARBYINT:
2936 case TargetOpcode::G_FSQRT:
2937 case TargetOpcode::G_FEXP:
2938 case TargetOpcode::G_FEXP2:
2939 case TargetOpcode::G_FEXP10:
2940 case TargetOpcode::G_FPOW:
2941 case TargetOpcode::G_INTRINSIC_TRUNC:
2942 case TargetOpcode::G_INTRINSIC_ROUND:
2943 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2944 assert(TypeIdx == 0);
2946
2947 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2948 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2949
2950 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2952 return Legalized;
2953 case TargetOpcode::G_FPOWI:
2954 case TargetOpcode::G_FLDEXP:
2955 case TargetOpcode::G_STRICT_FLDEXP: {
2956 if (TypeIdx == 0) {
2957 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2958 return UnableToLegalize;
2959
2961 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2962 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2964 return Legalized;
2965 }
2966
2967 if (TypeIdx == 1) {
2968 // For some reason SelectionDAG tries to promote to a libcall without
2969 // actually changing the integer type for promotion.
2971 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2973 return Legalized;
2974 }
2975
2976 return UnableToLegalize;
2977 }
2978 case TargetOpcode::G_FFREXP: {
2980
2981 if (TypeIdx == 0) {
2982 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2983 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2984 } else {
2985 widenScalarDst(MI, WideTy, 1);
2986 }
2987
2989 return Legalized;
2990 }
2991 case TargetOpcode::G_INTTOPTR:
2992 if (TypeIdx != 1)
2993 return UnableToLegalize;
2994
2996 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2998 return Legalized;
2999 case TargetOpcode::G_PTRTOINT:
3000 if (TypeIdx != 0)
3001 return UnableToLegalize;
3002
3004 widenScalarDst(MI, WideTy, 0);
3006 return Legalized;
3007 case TargetOpcode::G_BUILD_VECTOR: {
3009
3010 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3011 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3012 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3013
3014 // Avoid changing the result vector type if the source element type was
3015 // requested.
3016 if (TypeIdx == 1) {
3017 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3018 } else {
3019 widenScalarDst(MI, WideTy, 0);
3020 }
3021
3023 return Legalized;
3024 }
3025 case TargetOpcode::G_SEXT_INREG:
3026 if (TypeIdx != 0)
3027 return UnableToLegalize;
3028
3030 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3031 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3033 return Legalized;
3034 case TargetOpcode::G_PTRMASK: {
3035 if (TypeIdx != 1)
3036 return UnableToLegalize;
3038 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3040 return Legalized;
3041 }
3042 case TargetOpcode::G_VECREDUCE_FADD:
3043 case TargetOpcode::G_VECREDUCE_FMUL:
3044 case TargetOpcode::G_VECREDUCE_FMIN:
3045 case TargetOpcode::G_VECREDUCE_FMAX:
3046 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3047 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3048 if (TypeIdx != 0)
3049 return UnableToLegalize;
3051 Register VecReg = MI.getOperand(1).getReg();
3052 LLT VecTy = MRI.getType(VecReg);
3053 LLT WideVecTy = VecTy.isVector()
3054 ? LLT::vector(VecTy.getElementCount(), WideTy)
3055 : WideTy;
3056 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3057 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3059 return Legalized;
3060 }
3061 case TargetOpcode::G_VSCALE: {
3062 MachineOperand &SrcMO = MI.getOperand(1);
3064 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3065 // The CImm is always a signed value
3066 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3068 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3069 widenScalarDst(MI, WideTy);
3071 return Legalized;
3072 }
3073 case TargetOpcode::G_SPLAT_VECTOR: {
3074 if (TypeIdx != 1)
3075 return UnableToLegalize;
3076
3078 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3080 return Legalized;
3081 }
3082 }
3083}
3084
3086 MachineIRBuilder &B, Register Src, LLT Ty) {
3087 auto Unmerge = B.buildUnmerge(Ty, Src);
3088 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3089 Pieces.push_back(Unmerge.getReg(I));
3090}
3091
3092static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3093 MachineIRBuilder &MIRBuilder) {
3094 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3095 MachineFunction &MF = MIRBuilder.getMF();
3096 const DataLayout &DL = MIRBuilder.getDataLayout();
3097 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3098 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3099 LLT DstLLT = MRI.getType(DstReg);
3100
3101 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3102
3103 auto Addr = MIRBuilder.buildConstantPool(
3104 AddrPtrTy,
3105 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3106
3107 MachineMemOperand *MMO =
3109 MachineMemOperand::MOLoad, DstLLT, Alignment);
3110
3111 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3112}
3113
3116 const MachineOperand &ConstOperand = MI.getOperand(1);
3117 const Constant *ConstantVal = ConstOperand.getCImm();
3118
3119 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3120 MI.eraseFromParent();
3121
3122 return Legalized;
3123}
3124
3127 const MachineOperand &ConstOperand = MI.getOperand(1);
3128 const Constant *ConstantVal = ConstOperand.getFPImm();
3129
3130 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3131 MI.eraseFromParent();
3132
3133 return Legalized;
3134}
3135
3138 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3139 if (SrcTy.isVector()) {
3140 LLT SrcEltTy = SrcTy.getElementType();
3142
3143 if (DstTy.isVector()) {
3144 int NumDstElt = DstTy.getNumElements();
3145 int NumSrcElt = SrcTy.getNumElements();
3146
3147 LLT DstEltTy = DstTy.getElementType();
3148 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3149 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3150
3151 // If there's an element size mismatch, insert intermediate casts to match
3152 // the result element type.
3153 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3154 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3155 //
3156 // =>
3157 //
3158 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3159 // %3:_(<2 x s8>) = G_BITCAST %2
3160 // %4:_(<2 x s8>) = G_BITCAST %3
3161 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3162 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3163 SrcPartTy = SrcEltTy;
3164 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3165 //
3166 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3167 //
3168 // =>
3169 //
3170 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3171 // %3:_(s16) = G_BITCAST %2
3172 // %4:_(s16) = G_BITCAST %3
3173 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3174 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3175 DstCastTy = DstEltTy;
3176 }
3177
3178 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3179 for (Register &SrcReg : SrcRegs)
3180 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3181 } else
3182 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3183
3184 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3185 MI.eraseFromParent();
3186 return Legalized;
3187 }
3188
3189 if (DstTy.isVector()) {
3191 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3192 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3193 MI.eraseFromParent();
3194 return Legalized;
3195 }
3196
3197 return UnableToLegalize;
3198}
3199
3200/// Figure out the bit offset into a register when coercing a vector index for
3201/// the wide element type. This is only for the case when promoting vector to
3202/// one with larger elements.
3203//
3204///
3205/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3206/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3208 Register Idx,
3209 unsigned NewEltSize,
3210 unsigned OldEltSize) {
3211 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3212 LLT IdxTy = B.getMRI()->getType(Idx);
3213
3214 // Now figure out the amount we need to shift to get the target bits.
3215 auto OffsetMask = B.buildConstant(
3216 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3217 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3218 return B.buildShl(IdxTy, OffsetIdx,
3219 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3220}
3221
3222/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3223/// is casting to a vector with a smaller element size, perform multiple element
3224/// extracts and merge the results. If this is coercing to a vector with larger
3225/// elements, index the bitcasted vector and extract the target element with bit
3226/// operations. This is intended to force the indexing in the native register
3227/// size for architectures that can dynamically index the register file.
3230 LLT CastTy) {
3231 if (TypeIdx != 1)
3232 return UnableToLegalize;
3233
3234 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3235
3236 LLT SrcEltTy = SrcVecTy.getElementType();
3237 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3238 unsigned OldNumElts = SrcVecTy.getNumElements();
3239
3240 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3241 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3242
3243 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3244 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3245 if (NewNumElts > OldNumElts) {
3246 // Decreasing the vector element size
3247 //
3248 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3249 // =>
3250 // v4i32:castx = bitcast x:v2i64
3251 //
3252 // i64 = bitcast
3253 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3254 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3255 //
3256 if (NewNumElts % OldNumElts != 0)
3257 return UnableToLegalize;
3258
3259 // Type of the intermediate result vector.
3260 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3261 LLT MidTy =
3262 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3263
3264 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3265
3266 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3267 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3268
3269 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3270 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3271 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3272 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3273 NewOps[I] = Elt.getReg(0);
3274 }
3275
3276 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3277 MIRBuilder.buildBitcast(Dst, NewVec);
3278 MI.eraseFromParent();
3279 return Legalized;
3280 }
3281
3282 if (NewNumElts < OldNumElts) {
3283 if (NewEltSize % OldEltSize != 0)
3284 return UnableToLegalize;
3285
3286 // This only depends on powers of 2 because we use bit tricks to figure out
3287 // the bit offset we need to shift to get the target element. A general
3288 // expansion could emit division/multiply.
3289 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3290 return UnableToLegalize;
3291
3292 // Increasing the vector element size.
3293 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3294 //
3295 // =>
3296 //
3297 // %cast = G_BITCAST %vec
3298 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3299 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3300 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3301 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3302 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3303 // %elt = G_TRUNC %elt_bits
3304
3305 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3306 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3307
3308 // Divide to get the index in the wider element type.
3309 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3310
3311 Register WideElt = CastVec;
3312 if (CastTy.isVector()) {
3313 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3314 ScaledIdx).getReg(0);
3315 }
3316
3317 // Compute the bit offset into the register of the target element.
3319 MIRBuilder, Idx, NewEltSize, OldEltSize);
3320
3321 // Shift the wide element to get the target element.
3322 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3323 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3324 MI.eraseFromParent();
3325 return Legalized;
3326 }
3327
3328 return UnableToLegalize;
3329}
3330
3331/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3332/// TargetReg, while preserving other bits in \p TargetReg.
3333///
3334/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3336 Register TargetReg, Register InsertReg,
3337 Register OffsetBits) {
3338 LLT TargetTy = B.getMRI()->getType(TargetReg);
3339 LLT InsertTy = B.getMRI()->getType(InsertReg);
3340 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3341 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3342
3343 // Produce a bitmask of the value to insert
3344 auto EltMask = B.buildConstant(
3345 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3346 InsertTy.getSizeInBits()));
3347 // Shift it into position
3348 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3349 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3350
3351 // Clear out the bits in the wide element
3352 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3353
3354 // The value to insert has all zeros already, so stick it into the masked
3355 // wide element.
3356 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3357}
3358
3359/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3360/// is increasing the element size, perform the indexing in the target element
3361/// type, and use bit operations to insert at the element position. This is
3362/// intended for architectures that can dynamically index the register file and
3363/// want to force indexing in the native register size.
3366 LLT CastTy) {
3367 if (TypeIdx != 0)
3368 return UnableToLegalize;
3369
3370 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3371 MI.getFirst4RegLLTs();
3372 LLT VecTy = DstTy;
3373
3374 LLT VecEltTy = VecTy.getElementType();
3375 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3376 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3377 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3378
3379 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3380 unsigned OldNumElts = VecTy.getNumElements();
3381
3382 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3383 if (NewNumElts < OldNumElts) {
3384 if (NewEltSize % OldEltSize != 0)
3385 return UnableToLegalize;
3386
3387 // This only depends on powers of 2 because we use bit tricks to figure out
3388 // the bit offset we need to shift to get the target element. A general
3389 // expansion could emit division/multiply.
3390 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3391 return UnableToLegalize;
3392
3393 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3394 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3395
3396 // Divide to get the index in the wider element type.
3397 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3398
3399 Register ExtractedElt = CastVec;
3400 if (CastTy.isVector()) {
3401 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3402 ScaledIdx).getReg(0);
3403 }
3404
3405 // Compute the bit offset into the register of the target element.
3407 MIRBuilder, Idx, NewEltSize, OldEltSize);
3408
3409 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3410 Val, OffsetBits);
3411 if (CastTy.isVector()) {
3413 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3414 }
3415
3416 MIRBuilder.buildBitcast(Dst, InsertedElt);
3417 MI.eraseFromParent();
3418 return Legalized;
3419 }
3420
3421 return UnableToLegalize;
3422}
3423
3425 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3426 Register DstReg = LoadMI.getDstReg();
3427 Register PtrReg = LoadMI.getPointerReg();
3428 LLT DstTy = MRI.getType(DstReg);
3429 MachineMemOperand &MMO = LoadMI.getMMO();
3430 LLT MemTy = MMO.getMemoryType();
3432
3433 unsigned MemSizeInBits = MemTy.getSizeInBits();
3434 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3435
3436 if (MemSizeInBits != MemStoreSizeInBits) {
3437 if (MemTy.isVector())
3438 return UnableToLegalize;
3439
3440 // Promote to a byte-sized load if not loading an integral number of
3441 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3442 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3443 MachineMemOperand *NewMMO =
3444 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3445
3446 Register LoadReg = DstReg;
3447 LLT LoadTy = DstTy;
3448
3449 // If this wasn't already an extending load, we need to widen the result
3450 // register to avoid creating a load with a narrower result than the source.
3451 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3452 LoadTy = WideMemTy;
3453 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3454 }
3455
3456 if (isa<GSExtLoad>(LoadMI)) {
3457 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3458 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3459 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3460 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3461 // The extra bits are guaranteed to be zero, since we stored them that
3462 // way. A zext load from Wide thus automatically gives zext from MemVT.
3463 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3464 } else {
3465 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3466 }
3467
3468 if (DstTy != LoadTy)
3469 MIRBuilder.buildTrunc(DstReg, LoadReg);
3470
3471 LoadMI.eraseFromParent();
3472 return Legalized;
3473 }
3474
3475 // Big endian lowering not implemented.
3477 return UnableToLegalize;
3478
3479 // This load needs splitting into power of 2 sized loads.
3480 //
3481 // Our strategy here is to generate anyextending loads for the smaller
3482 // types up to next power-2 result type, and then combine the two larger
3483 // result values together, before truncating back down to the non-pow-2
3484 // type.
3485 // E.g. v1 = i24 load =>
3486 // v2 = i32 zextload (2 byte)
3487 // v3 = i32 load (1 byte)
3488 // v4 = i32 shl v3, 16
3489 // v5 = i32 or v4, v2
3490 // v1 = i24 trunc v5
3491 // By doing this we generate the correct truncate which should get
3492 // combined away as an artifact with a matching extend.
3493
3494 uint64_t LargeSplitSize, SmallSplitSize;
3495
3496 if (!isPowerOf2_32(MemSizeInBits)) {
3497 // This load needs splitting into power of 2 sized loads.
3498 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3499 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3500 } else {
3501 // This is already a power of 2, but we still need to split this in half.
3502 //
3503 // Assume we're being asked to decompose an unaligned load.
3504 // TODO: If this requires multiple splits, handle them all at once.
3505 auto &Ctx = MF.getFunction().getContext();
3506 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3507 return UnableToLegalize;
3508
3509 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3510 }
3511
3512 if (MemTy.isVector()) {
3513 // TODO: Handle vector extloads
3514 if (MemTy != DstTy)
3515 return UnableToLegalize;
3516
3517 // TODO: We can do better than scalarizing the vector and at least split it
3518 // in half.
3519 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3520 }
3521
3522 MachineMemOperand *LargeMMO =
3523 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3524 MachineMemOperand *SmallMMO =
3525 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3526
3527 LLT PtrTy = MRI.getType(PtrReg);
3528 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3529 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3530 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3531 PtrReg, *LargeMMO);
3532
3533 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3534 LargeSplitSize / 8);
3535 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3536 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3537 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3538 SmallPtr, *SmallMMO);
3539
3540 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3541 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3542
3543 if (AnyExtTy == DstTy)
3544 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3545 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3546 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3547 MIRBuilder.buildTrunc(DstReg, {Or});
3548 } else {
3549 assert(DstTy.isPointer() && "expected pointer");
3550 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3551
3552 // FIXME: We currently consider this to be illegal for non-integral address
3553 // spaces, but we need still need a way to reinterpret the bits.
3554 MIRBuilder.buildIntToPtr(DstReg, Or);
3555 }
3556
3557 LoadMI.eraseFromParent();
3558 return Legalized;
3559}
3560
3562 // Lower a non-power of 2 store into multiple pow-2 stores.
3563 // E.g. split an i24 store into an i16 store + i8 store.
3564 // We do this by first extending the stored value to the next largest power
3565 // of 2 type, and then using truncating stores to store the components.
3566 // By doing this, likewise with G_LOAD, generate an extend that can be
3567 // artifact-combined away instead of leaving behind extracts.
3568 Register SrcReg = StoreMI.getValueReg();
3569 Register PtrReg = StoreMI.getPointerReg();
3570 LLT SrcTy = MRI.getType(SrcReg);
3572 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3573 LLT MemTy = MMO.getMemoryType();
3574
3575 unsigned StoreWidth = MemTy.getSizeInBits();
3576 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3577
3578 if (StoreWidth != StoreSizeInBits) {
3579 if (SrcTy.isVector())
3580 return UnableToLegalize;
3581
3582 // Promote to a byte-sized store with upper bits zero if not
3583 // storing an integral number of bytes. For example, promote
3584 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3585 LLT WideTy = LLT::scalar(StoreSizeInBits);
3586
3587 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3588 // Avoid creating a store with a narrower source than result.
3589 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3590 SrcTy = WideTy;
3591 }
3592
3593 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3594
3595 MachineMemOperand *NewMMO =
3596 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3597 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3598 StoreMI.eraseFromParent();
3599 return Legalized;
3600 }
3601
3602 if (MemTy.isVector()) {
3603 // TODO: Handle vector trunc stores
3604 if (MemTy != SrcTy)
3605 return UnableToLegalize;
3606
3607 // TODO: We can do better than scalarizing the vector and at least split it
3608 // in half.
3609 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3610 }
3611
3612 unsigned MemSizeInBits = MemTy.getSizeInBits();
3613 uint64_t LargeSplitSize, SmallSplitSize;
3614
3615 if (!isPowerOf2_32(MemSizeInBits)) {
3616 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3617 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3618 } else {
3619 auto &Ctx = MF.getFunction().getContext();
3620 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3621 return UnableToLegalize; // Don't know what we're being asked to do.
3622
3623 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3624 }
3625
3626 // Extend to the next pow-2. If this store was itself the result of lowering,
3627 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3628 // that's wider than the stored size.
3629 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3630 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3631
3632 if (SrcTy.isPointer()) {
3633 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3634 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3635 }
3636
3637 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3638
3639 // Obtain the smaller value by shifting away the larger value.
3640 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3641 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3642
3643 // Generate the PtrAdd and truncating stores.
3644 LLT PtrTy = MRI.getType(PtrReg);
3645 auto OffsetCst = MIRBuilder.buildConstant(
3646 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3647 auto SmallPtr =
3648 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3649
3650 MachineMemOperand *LargeMMO =
3651 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3652 MachineMemOperand *SmallMMO =
3653 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3654 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3655 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3656 StoreMI.eraseFromParent();
3657 return Legalized;
3658}
3659
3661LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3662 switch (MI.getOpcode()) {
3663 case TargetOpcode::G_LOAD: {
3664 if (TypeIdx != 0)
3665 return UnableToLegalize;
3666 MachineMemOperand &MMO = **MI.memoperands_begin();
3667
3668 // Not sure how to interpret a bitcast of an extending load.
3669 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3670 return UnableToLegalize;
3671
3673 bitcastDst(MI, CastTy, 0);
3674 MMO.setType(CastTy);
3675 // The range metadata is no longer valid when reinterpreted as a different
3676 // type.
3677 MMO.clearRanges();
3679 return Legalized;
3680 }
3681 case TargetOpcode::G_STORE: {
3682 if (TypeIdx != 0)
3683 return UnableToLegalize;
3684
3685 MachineMemOperand &MMO = **MI.memoperands_begin();
3686
3687 // Not sure how to interpret a bitcast of a truncating store.
3688 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3689 return UnableToLegalize;
3690
3692 bitcastSrc(MI, CastTy, 0);
3693 MMO.setType(CastTy);
3695 return Legalized;
3696 }
3697 case TargetOpcode::G_SELECT: {
3698 if (TypeIdx != 0)
3699 return UnableToLegalize;
3700
3701 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3702 LLVM_DEBUG(
3703 dbgs() << "bitcast action not implemented for vector select\n");
3704 return UnableToLegalize;
3705 }
3706
3708 bitcastSrc(MI, CastTy, 2);
3709 bitcastSrc(MI, CastTy, 3);
3710 bitcastDst(MI, CastTy, 0);
3712 return Legalized;
3713 }
3714 case TargetOpcode::G_AND:
3715 case TargetOpcode::G_OR:
3716 case TargetOpcode::G_XOR: {
3718 bitcastSrc(MI, CastTy, 1);
3719 bitcastSrc(MI, CastTy, 2);
3720 bitcastDst(MI, CastTy, 0);
3722 return Legalized;
3723 }
3724 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3725 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3726 case TargetOpcode::G_INSERT_VECTOR_ELT:
3727 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3728 default:
3729 return UnableToLegalize;
3730 }
3731}
3732
3733// Legalize an instruction by changing the opcode in place.
3734void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3736 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3738}
3739
3741LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3742 using namespace TargetOpcode;
3743
3744 switch(MI.getOpcode()) {
3745 default:
3746 return UnableToLegalize;
3747 case TargetOpcode::G_FCONSTANT:
3748 return lowerFConstant(MI);
3749 case TargetOpcode::G_BITCAST:
3750 return lowerBitcast(MI);
3751 case TargetOpcode::G_SREM:
3752 case TargetOpcode::G_UREM: {
3753 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3754 auto Quot =
3755 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3756 {MI.getOperand(1), MI.getOperand(2)});
3757
3758 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3759 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3760 MI.eraseFromParent();
3761 return Legalized;
3762 }
3763 case TargetOpcode::G_SADDO:
3764 case TargetOpcode::G_SSUBO:
3765 return lowerSADDO_SSUBO(MI);
3766 case TargetOpcode::G_UMULH:
3767 case TargetOpcode::G_SMULH:
3768 return lowerSMULH_UMULH(MI);
3769 case TargetOpcode::G_SMULO:
3770 case TargetOpcode::G_UMULO: {
3771 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3772 // result.
3773 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3774 LLT Ty = MRI.getType(Res);
3775
3776 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3777 ? TargetOpcode::G_SMULH
3778 : TargetOpcode::G_UMULH;
3779
3781 const auto &TII = MIRBuilder.getTII();
3782 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3783 MI.removeOperand(1);
3785
3786 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3787 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3788
3789 // Move insert point forward so we can use the Res register if needed.
3791
3792 // For *signed* multiply, overflow is detected by checking:
3793 // (hi != (lo >> bitwidth-1))
3794 if (Opcode == TargetOpcode::G_SMULH) {
3795 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3796 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3797 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3798 } else {
3799 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3800 }
3801 return Legalized;
3802 }
3803 case TargetOpcode::G_FNEG: {
3804 auto [Res, SubByReg] = MI.getFirst2Regs();
3805 LLT Ty = MRI.getType(Res);
3806
3807 // TODO: Handle vector types once we are able to
3808 // represent them.
3809 if (Ty.isVector())
3810 return UnableToLegalize;
3811 auto SignMask =
3813 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3814 MI.eraseFromParent();
3815 return Legalized;
3816 }
3817 case TargetOpcode::G_FSUB:
3818 case TargetOpcode::G_STRICT_FSUB: {
3819 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3820 LLT Ty = MRI.getType(Res);
3821
3822 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3823 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3824
3825 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3826 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3827 else
3828 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3829
3830 MI.eraseFromParent();
3831 return Legalized;
3832 }
3833 case TargetOpcode::G_FMAD:
3834 return lowerFMad(MI);
3835 case TargetOpcode::G_FFLOOR:
3836 return lowerFFloor(MI);
3837 case TargetOpcode::G_INTRINSIC_ROUND:
3838 return lowerIntrinsicRound(MI);
3839 case TargetOpcode::G_FRINT: {
3840 // Since round even is the assumed rounding mode for unconstrained FP
3841 // operations, rint and roundeven are the same operation.
3842 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3843 return Legalized;
3844 }
3845 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3846 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3847 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3848 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3849 **MI.memoperands_begin());
3850 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3851 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3852 MI.eraseFromParent();
3853 return Legalized;
3854 }
3855 case TargetOpcode::G_LOAD:
3856 case TargetOpcode::G_SEXTLOAD:
3857 case TargetOpcode::G_ZEXTLOAD:
3858 return lowerLoad(cast<GAnyLoad>(MI));
3859 case TargetOpcode::G_STORE:
3860 return lowerStore(cast<GStore>(MI));
3861 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3862 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3863 case TargetOpcode::G_CTLZ:
3864 case TargetOpcode::G_CTTZ:
3865 case TargetOpcode::G_CTPOP:
3866 return lowerBitCount(MI);
3867 case G_UADDO: {
3868 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3869
3870 Register NewRes = MRI.cloneVirtualRegister(Res);
3871
3872 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3873 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3874
3875 MIRBuilder.buildCopy(Res, NewRes);
3876
3877 MI.eraseFromParent();
3878 return Legalized;
3879 }
3880 case G_UADDE: {
3881 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3882 const LLT CondTy = MRI.getType(CarryOut);
3883 const LLT Ty = MRI.getType(Res);
3884
3885 Register NewRes = MRI.cloneVirtualRegister(Res);
3886
3887 // Initial add of the two operands.
3888 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3889
3890 // Initial check for carry.
3891 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3892
3893 // Add the sum and the carry.
3894 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3895 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3896
3897 // Second check for carry. We can only carry if the initial sum is all 1s
3898 // and the carry is set, resulting in a new sum of 0.
3899 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3900 auto ResEqZero =
3901 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3902 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3903 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3904
3905 MIRBuilder.buildCopy(Res, NewRes);
3906
3907 MI.eraseFromParent();
3908 return Legalized;
3909 }
3910 case G_USUBO: {
3911 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3912
3913 MIRBuilder.buildSub(Res, LHS, RHS);
3915
3916 MI.eraseFromParent();
3917 return Legalized;
3918 }
3919 case G_USUBE: {
3920 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3921 const LLT CondTy = MRI.getType(BorrowOut);
3922 const LLT Ty = MRI.getType(Res);
3923
3924 // Initial subtract of the two operands.
3925 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3926
3927 // Initial check for borrow.
3928 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3929
3930 // Subtract the borrow from the first subtract.
3931 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3932 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3933
3934 // Second check for borrow. We can only borrow if the initial difference is
3935 // 0 and the borrow is set, resulting in a new difference of all 1s.
3936 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3937 auto TmpResEqZero =
3938 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3939 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3940 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3941
3942 MI.eraseFromParent();
3943 return Legalized;
3944 }
3945 case G_UITOFP:
3946 return lowerUITOFP(MI);
3947 case G_SITOFP:
3948 return lowerSITOFP(MI);
3949 case G_FPTOUI:
3950 return lowerFPTOUI(MI);
3951 case G_FPTOSI:
3952 return lowerFPTOSI(MI);
3953 case G_FPTRUNC:
3954 return lowerFPTRUNC(MI);
3955 case G_FPOWI:
3956 return lowerFPOWI(MI);
3957 case G_SMIN:
3958 case G_SMAX:
3959 case G_UMIN:
3960 case G_UMAX:
3961 return lowerMinMax(MI);
3962 case G_FCOPYSIGN:
3963 return lowerFCopySign(MI);
3964 case G_FMINNUM:
3965 case G_FMAXNUM:
3966 return lowerFMinNumMaxNum(MI);
3967 case G_MERGE_VALUES:
3968 return lowerMergeValues(MI);
3969 case G_UNMERGE_VALUES:
3970 return lowerUnmergeValues(MI);
3971 case TargetOpcode::G_SEXT_INREG: {
3972 assert(MI.getOperand(2).isImm() && "Expected immediate");
3973 int64_t SizeInBits = MI.getOperand(2).getImm();
3974
3975 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3976 LLT DstTy = MRI.getType(DstReg);
3977 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3978
3979 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3980 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3981 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3982 MI.eraseFromParent();
3983 return Legalized;
3984 }
3985 case G_EXTRACT_VECTOR_ELT:
3986 case G_INSERT_VECTOR_ELT:
3988 case G_SHUFFLE_VECTOR:
3989 return lowerShuffleVector(MI);
3990 case G_DYN_STACKALLOC:
3991 return lowerDynStackAlloc(MI);
3992 case G_STACKSAVE:
3993 return lowerStackSave(MI);
3994 case G_STACKRESTORE:
3995 return lowerStackRestore(MI);
3996 case G_EXTRACT:
3997 return lowerExtract(MI);
3998 case G_INSERT:
3999 return lowerInsert(MI);
4000 case G_BSWAP:
4001 return lowerBswap(MI);
4002 case G_BITREVERSE:
4003 return lowerBitreverse(MI);
4004 case G_READ_REGISTER:
4005 case G_WRITE_REGISTER:
4006 return lowerReadWriteRegister(MI);
4007 case G_UADDSAT:
4008 case G_USUBSAT: {
4009 // Try to make a reasonable guess about which lowering strategy to use. The
4010 // target can override this with custom lowering and calling the
4011 // implementation functions.
4012 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4013 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4014 return lowerAddSubSatToMinMax(MI);
4016 }
4017 case G_SADDSAT:
4018 case G_SSUBSAT: {
4019 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4020
4021 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4022 // since it's a shorter expansion. However, we would need to figure out the
4023 // preferred boolean type for the carry out for the query.
4024 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4025 return lowerAddSubSatToMinMax(MI);
4027 }
4028 case G_SSHLSAT:
4029 case G_USHLSAT:
4030 return lowerShlSat(MI);
4031 case G_ABS:
4032 return lowerAbsToAddXor(MI);
4033 case G_SELECT:
4034 return lowerSelect(MI);
4035 case G_IS_FPCLASS:
4036 return lowerISFPCLASS(MI);
4037 case G_SDIVREM:
4038 case G_UDIVREM:
4039 return lowerDIVREM(MI);
4040 case G_FSHL:
4041 case G_FSHR:
4042 return lowerFunnelShift(MI);
4043 case G_ROTL:
4044 case G_ROTR:
4045 return lowerRotate(MI);
4046 case G_MEMSET:
4047 case G_MEMCPY:
4048 case G_MEMMOVE:
4049 return lowerMemCpyFamily(MI);
4050 case G_MEMCPY_INLINE:
4051 return lowerMemcpyInline(MI);
4052 case G_ZEXT:
4053 case G_SEXT:
4054 case G_ANYEXT:
4055 return lowerEXT(MI);
4056 case G_TRUNC:
4057 return lowerTRUNC(MI);
4059 return lowerVectorReduction(MI);
4060 case G_VAARG:
4061 return lowerVAArg(MI);
4062 }
4063}
4064
4066 Align MinAlign) const {
4067 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4068 // datalayout for the preferred alignment. Also there should be a target hook
4069 // for this to allow targets to reduce the alignment and ignore the
4070 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4071 // the type.
4072 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4073}
4074
4077 MachinePointerInfo &PtrInfo) {
4080 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4081
4082 unsigned AddrSpace = DL.getAllocaAddrSpace();
4083 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4084
4085 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4086 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4087}
4088
4090 LLT VecTy) {
4091 LLT IdxTy = B.getMRI()->getType(IdxReg);
4092 unsigned NElts = VecTy.getNumElements();
4093
4094 int64_t IdxVal;
4095 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4096 if (IdxVal < VecTy.getNumElements())
4097 return IdxReg;
4098 // If a constant index would be out of bounds, clamp it as well.
4099 }
4100
4101 if (isPowerOf2_32(NElts)) {
4102 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4103 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4104 }
4105
4106 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4107 .getReg(0);
4108}
4109
4111 Register Index) {
4112 LLT EltTy = VecTy.getElementType();
4113
4114 // Calculate the element offset and add it to the pointer.
4115 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4116 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4117 "Converting bits to bytes lost precision");
4118
4120
4121 // Convert index to the correct size for the address space.
4123 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4124 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4125 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4126 if (IdxTy != MRI.getType(Index))
4128
4129 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4130 MIRBuilder.buildConstant(IdxTy, EltSize));
4131
4132 LLT PtrTy = MRI.getType(VecPtr);
4133 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4134}
4135
4136#ifndef NDEBUG
4137/// Check that all vector operands have same number of elements. Other operands
4138/// should be listed in NonVecOp.
4141 std::initializer_list<unsigned> NonVecOpIndices) {
4142 if (MI.getNumMemOperands() != 0)
4143 return false;
4144
4145 LLT VecTy = MRI.getType(MI.getReg(0));
4146 if (!VecTy.isVector())
4147 return false;
4148 unsigned NumElts = VecTy.getNumElements();
4149
4150 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4151 MachineOperand &Op = MI.getOperand(OpIdx);
4152 if (!Op.isReg()) {
4153 if (!is_contained(NonVecOpIndices, OpIdx))
4154 return false;
4155 continue;
4156 }
4157
4158 LLT Ty = MRI.getType(Op.getReg());
4159 if (!Ty.isVector()) {
4160 if (!is_contained(NonVecOpIndices, OpIdx))
4161 return false;
4162 continue;
4163 }
4164
4165 if (Ty.getNumElements() != NumElts)
4166 return false;
4167 }
4168
4169 return true;
4170}
4171#endif
4172
4173/// Fill \p DstOps with DstOps that have same number of elements combined as
4174/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4175/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4176/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4177static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4178 unsigned NumElts) {
4179 LLT LeftoverTy;
4180 assert(Ty.isVector() && "Expected vector type");
4181 LLT EltTy = Ty.getElementType();
4182 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4183 int NumParts, NumLeftover;
4184 std::tie(NumParts, NumLeftover) =
4185 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4186
4187 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4188 for (int i = 0; i < NumParts; ++i) {
4189 DstOps.push_back(NarrowTy);
4190 }
4191
4192 if (LeftoverTy.isValid()) {
4193 assert(NumLeftover == 1 && "expected exactly one leftover");
4194 DstOps.push_back(LeftoverTy);
4195 }
4196}
4197
4198/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4199/// made from \p Op depending on operand type.
4200static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4201 MachineOperand &Op) {
4202 for (unsigned i = 0; i < N; ++i) {
4203 if (Op.isReg())
4204 Ops.push_back(Op.getReg());
4205 else if (Op.isImm())
4206 Ops.push_back(Op.getImm());
4207 else if (Op.isPredicate())
4208 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4209 else
4210 llvm_unreachable("Unsupported type");
4211 }
4212}
4213
4214// Handle splitting vector operations which need to have the same number of
4215// elements in each type index, but each type index may have a different element
4216// type.
4217//
4218// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4219// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4220// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4221//
4222// Also handles some irregular breakdown cases, e.g.
4223// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4224// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4225// s64 = G_SHL s64, s32
4228 GenericMachineInstr &MI, unsigned NumElts,
4229 std::initializer_list<unsigned> NonVecOpIndices) {
4230 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4231 "Non-compatible opcode or not specified non-vector operands");
4232 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4233
4234 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4235 unsigned NumDefs = MI.getNumDefs();
4236
4237 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4238 // Build instructions with DstOps to use instruction found by CSE directly.
4239 // CSE copies found instruction into given vreg when building with vreg dest.
4240 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4241 // Output registers will be taken from created instructions.
4242 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4243 for (unsigned i = 0; i < NumDefs; ++i) {
4244 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4245 }
4246
4247 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4248 // Operands listed in NonVecOpIndices will be used as is without splitting;
4249 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4250 // scalar condition (op 1), immediate in sext_inreg (op 2).
4251 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4252 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4253 ++UseIdx, ++UseNo) {
4254 if (is_contained(NonVecOpIndices, UseIdx)) {
4255 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4256 MI.getOperand(UseIdx));
4257 } else {
4258 SmallVector<Register, 8> SplitPieces;
4259 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4260 MRI);
4261 for (auto Reg : SplitPieces)
4262 InputOpsPieces[UseNo].push_back(Reg);
4263 }
4264 }
4265
4266 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4267
4268 // Take i-th piece of each input operand split and build sub-vector/scalar
4269 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4270 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4272 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4273 Defs.push_back(OutputOpsPieces[DstNo][i]);
4274
4276 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4277 Uses.push_back(InputOpsPieces[InputNo][i]);
4278
4279 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4280 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4281 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4282 }
4283
4284 // Merge small outputs into MI's output for each def operand.
4285 if (NumLeftovers) {
4286 for (unsigned i = 0; i < NumDefs; ++i)
4287 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4288 } else {
4289 for (unsigned i = 0; i < NumDefs; ++i)
4290 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4291 }
4292
4293 MI.eraseFromParent();
4294 return Legalized;
4295}
4296
4299 unsigned NumElts) {
4300 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4301
4302 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4303 unsigned NumDefs = MI.getNumDefs();
4304
4305 SmallVector<DstOp, 8> OutputOpsPieces;
4306 SmallVector<Register, 8> OutputRegs;
4307 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4308
4309 // Instructions that perform register split will be inserted in basic block
4310 // where register is defined (basic block is in the next operand).
4311 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4312 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4313 UseIdx += 2, ++UseNo) {
4314 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4316 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4317 MIRBuilder, MRI);
4318 }
4319
4320 // Build PHIs with fewer elements.
4321 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4322 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4323 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4324 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4325 Phi.addDef(
4326 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4327 OutputRegs.push_back(Phi.getReg(0));
4328
4329 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4330 Phi.addUse(InputOpsPieces[j][i]);
4331 Phi.add(MI.getOperand(1 + j * 2 + 1));
4332 }
4333 }
4334
4335 // Set the insert point after the existing PHIs
4336 MachineBasicBlock &MBB = *MI.getParent();
4338
4339 // Merge small outputs into MI's def.
4340 if (NumLeftovers) {
4341 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4342 } else {
4343 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4344 }
4345
4346 MI.eraseFromParent();
4347 return Legalized;
4348}
4349
4352 unsigned TypeIdx,
4353 LLT NarrowTy) {
4354 const int NumDst = MI.getNumOperands() - 1;
4355 const Register SrcReg = MI.getOperand(NumDst).getReg();
4356 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4357 LLT SrcTy = MRI.getType(SrcReg);
4358
4359 if (TypeIdx != 1 || NarrowTy == DstTy)
4360 return UnableToLegalize;
4361
4362 // Requires compatible types. Otherwise SrcReg should have been defined by
4363 // merge-like instruction that would get artifact combined. Most likely
4364 // instruction that defines SrcReg has to perform more/fewer elements
4365 // legalization compatible with NarrowTy.
4366 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4367 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4368
4369 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4370 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4371 return UnableToLegalize;
4372
4373 // This is most likely DstTy (smaller then register size) packed in SrcTy
4374 // (larger then register size) and since unmerge was not combined it will be
4375 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4376 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4377
4378 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4379 //
4380 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4381 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4382 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4383 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4384 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4385 const int PartsPerUnmerge = NumDst / NumUnmerge;
4386
4387 for (int I = 0; I != NumUnmerge; ++I) {
4388 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4389
4390 for (int J = 0; J != PartsPerUnmerge; ++J)
4391 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4392 MIB.addUse(Unmerge.getReg(I));
4393 }
4394
4395 MI.eraseFromParent();
4396 return Legalized;
4397}
4398
4401 LLT NarrowTy) {
4402 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();