LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FMODF:
475 RTLIBCASE(MODF_F);
476 case TargetOpcode::G_FLOG10:
477 RTLIBCASE(LOG10_F);
478 case TargetOpcode::G_FLOG:
479 RTLIBCASE(LOG_F);
480 case TargetOpcode::G_FLOG2:
481 RTLIBCASE(LOG2_F);
482 case TargetOpcode::G_FLDEXP:
483 RTLIBCASE(LDEXP_F);
484 case TargetOpcode::G_FCEIL:
485 RTLIBCASE(CEIL_F);
486 case TargetOpcode::G_FFLOOR:
487 RTLIBCASE(FLOOR_F);
488 case TargetOpcode::G_FMINNUM:
489 RTLIBCASE(FMIN_F);
490 case TargetOpcode::G_FMAXNUM:
491 RTLIBCASE(FMAX_F);
492 case TargetOpcode::G_FMINIMUMNUM:
493 RTLIBCASE(FMINIMUM_NUM_F);
494 case TargetOpcode::G_FMAXIMUMNUM:
495 RTLIBCASE(FMAXIMUM_NUM_F);
496 case TargetOpcode::G_FSQRT:
497 RTLIBCASE(SQRT_F);
498 case TargetOpcode::G_FRINT:
499 RTLIBCASE(RINT_F);
500 case TargetOpcode::G_FNEARBYINT:
501 RTLIBCASE(NEARBYINT_F);
502 case TargetOpcode::G_INTRINSIC_TRUNC:
503 RTLIBCASE(TRUNC_F);
504 case TargetOpcode::G_INTRINSIC_ROUND:
505 RTLIBCASE(ROUND_F);
506 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
507 RTLIBCASE(ROUNDEVEN_F);
508 case TargetOpcode::G_INTRINSIC_LRINT:
509 RTLIBCASE(LRINT_F);
510 case TargetOpcode::G_INTRINSIC_LLRINT:
511 RTLIBCASE(LLRINT_F);
512 }
513 llvm_unreachable("Unknown libcall function");
514#undef RTLIBCASE_INT
515#undef RTLIBCASE
516}
517
518/// True if an instruction is in tail position in its caller. Intended for
519/// legalizing libcalls as tail calls when possible.
522 const TargetInstrInfo &TII,
524 MachineBasicBlock &MBB = *MI.getParent();
525 const Function &F = MBB.getParent()->getFunction();
526
527 // Conservatively require the attributes of the call to match those of
528 // the return. Ignore NoAlias and NonNull because they don't affect the
529 // call sequence.
530 AttributeList CallerAttrs = F.getAttributes();
531 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
532 .removeAttribute(Attribute::NoAlias)
533 .removeAttribute(Attribute::NonNull)
534 .hasAttributes())
535 return false;
536
537 // It's not safe to eliminate the sign / zero extension of the return value.
538 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
539 CallerAttrs.hasRetAttr(Attribute::SExt))
540 return false;
541
542 // Only tail call if the following instruction is a standard return or if we
543 // have a `thisreturn` callee, and a sequence like:
544 //
545 // G_MEMCPY %0, %1, %2
546 // $x0 = COPY %0
547 // RET_ReallyLR implicit $x0
548 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
549 if (Next != MBB.instr_end() && Next->isCopy()) {
550 if (MI.getOpcode() == TargetOpcode::G_BZERO)
551 return false;
552
553 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
554 // mempy/etc routines return the same parameter. For other it will be the
555 // returned value.
556 Register VReg = MI.getOperand(0).getReg();
557 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
558 return false;
559
560 Register PReg = Next->getOperand(0).getReg();
561 if (!PReg.isPhysical())
562 return false;
563
564 auto Ret = next_nodbg(Next, MBB.instr_end());
565 if (Ret == MBB.instr_end() || !Ret->isReturn())
566 return false;
567
568 if (Ret->getNumImplicitOperands() != 1)
569 return false;
570
571 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
572 return false;
573
574 // Skip over the COPY that we just validated.
575 Next = Ret;
576 }
577
578 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
579 return false;
580
581 return true;
582}
583
585llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
586 const CallLowering::ArgInfo &Result,
588 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
589 MachineInstr *MI) {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
633 const CallLowering::ArgInfo &Result,
635 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
636 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
637 const char *Name = TLI.getLibcallName(Libcall);
638 if (!Name)
640 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
641 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
642}
643
644// Useful for libcalls where all operands have the same type.
647 Type *OpType, LostDebugLocObserver &LocObserver) {
648 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
649
650 // FIXME: What does the original arg index mean here?
652 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
653 Args.push_back({MO.getReg(), OpType, 0});
654 return createLibcall(MIRBuilder, Libcall,
655 {MI.getOperand(0).getReg(), OpType, 0}, Args,
656 LocObserver, &MI);
657}
658
659LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
660 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
661 LostDebugLocObserver &LocObserver) {
662 MachineFunction &MF = *MI.getMF();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664
665 Register DstSin = MI.getOperand(0).getReg();
666 Register DstCos = MI.getOperand(1).getReg();
667 Register Src = MI.getOperand(2).getReg();
668 LLT DstTy = MRI.getType(DstSin);
669
670 int MemSize = DstTy.getSizeInBytes();
671 Align Alignment = getStackTemporaryAlignment(DstTy);
672 const DataLayout &DL = MIRBuilder.getDataLayout();
673 unsigned AddrSpace = DL.getAllocaAddrSpace();
674 MachinePointerInfo PtrInfo;
675
676 Register StackPtrSin =
677 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
678 .getReg(0);
679 Register StackPtrCos =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682
683 auto &Ctx = MF.getFunction().getContext();
684 auto LibcallResult =
686 {{0}, Type::getVoidTy(Ctx), 0},
687 {{Src, OpType, 0},
688 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
689 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
690 LocObserver, &MI);
691
692 if (LibcallResult != LegalizeResult::Legalized)
694
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
699
700 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
701 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
702 MI.eraseFromParent();
703
705}
706
708LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
709 unsigned Size, Type *OpType,
710 LostDebugLocObserver &LocObserver) {
711 MachineFunction &MF = MIRBuilder.getMF();
712 MachineRegisterInfo &MRI = MF.getRegInfo();
713
714 Register DstFrac = MI.getOperand(0).getReg();
715 Register DstInt = MI.getOperand(1).getReg();
716 Register Src = MI.getOperand(2).getReg();
717 LLT DstTy = MRI.getType(DstFrac);
718
719 int MemSize = DstTy.getSizeInBytes();
720 Align Alignment = getStackTemporaryAlignment(DstTy);
721 const DataLayout &DL = MIRBuilder.getDataLayout();
722 unsigned AddrSpace = DL.getAllocaAddrSpace();
723 MachinePointerInfo PtrInfo;
724
725 Register StackPtrInt =
726 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
727 .getReg(0);
728
729 auto &Ctx = MF.getFunction().getContext();
730 auto LibcallResult = createLibcall(
731 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
732 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
733 LocObserver, &MI);
734
735 if (LibcallResult != LegalizeResult::Legalized)
737
739 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
740
741 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
742 MI.eraseFromParent();
743
745}
746
749 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
750 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
751
753 // Add all the args, except for the last which is an imm denoting 'tail'.
754 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
755 Register Reg = MI.getOperand(i).getReg();
756
757 // Need derive an IR type for call lowering.
758 LLT OpLLT = MRI.getType(Reg);
759 Type *OpTy = nullptr;
760 if (OpLLT.isPointer())
761 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
762 else
763 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
764 Args.push_back({Reg, OpTy, 0});
765 }
766
767 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
768 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
769 RTLIB::Libcall RTLibcall;
770 unsigned Opc = MI.getOpcode();
771 const char *Name;
772 switch (Opc) {
773 case TargetOpcode::G_BZERO:
774 RTLibcall = RTLIB::BZERO;
775 Name = TLI.getLibcallName(RTLibcall);
776 break;
777 case TargetOpcode::G_MEMCPY:
778 RTLibcall = RTLIB::MEMCPY;
779 Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
780 Args[0].Flags[0].setReturned();
781 break;
782 case TargetOpcode::G_MEMMOVE:
783 RTLibcall = RTLIB::MEMMOVE;
784 Name = TLI.getLibcallName(RTLibcall);
785 Args[0].Flags[0].setReturned();
786 break;
787 case TargetOpcode::G_MEMSET:
788 RTLibcall = RTLIB::MEMSET;
789 Name = TLI.getLibcallName(RTLibcall);
790 Args[0].Flags[0].setReturned();
791 break;
792 default:
793 llvm_unreachable("unsupported opcode");
794 }
795
796 // Unsupported libcall on the target.
797 if (!Name) {
798 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
799 << MIRBuilder.getTII().getName(Opc) << "\n");
801 }
802
804 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
805 Info.Callee = MachineOperand::CreateES(Name);
806 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
807 Info.IsTailCall =
808 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
809 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
810
811 llvm::append_range(Info.OrigArgs, Args);
812 if (!CLI.lowerCall(MIRBuilder, Info))
814
815 if (Info.LoweredTailCall) {
816 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
817
818 // Check debug locations before removing the return.
819 LocObserver.checkpoint(true);
820
821 // We must have a return following the call (or debug insts) to get past
822 // isLibCallInTailPosition.
823 do {
824 MachineInstr *Next = MI.getNextNode();
825 assert(Next &&
826 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
827 "Expected instr following MI to be return or debug inst?");
828 // We lowered a tail call, so the call is now the return from the block.
829 // Delete the old return.
830 Next->eraseFromParent();
831 } while (MI.getNextNode());
832
833 // We expect to lose the debug location from the return.
834 LocObserver.checkpoint(false);
835 }
836
838}
839
840static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
841 unsigned Opc = MI.getOpcode();
842 auto &AtomicMI = cast<GMemOperation>(MI);
843 auto &MMO = AtomicMI.getMMO();
844 auto Ordering = MMO.getMergedOrdering();
845 LLT MemType = MMO.getMemoryType();
846 uint64_t MemSize = MemType.getSizeInBytes();
847 if (MemType.isVector())
848 return RTLIB::UNKNOWN_LIBCALL;
849
850#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
851#define LCALL5(A) \
852 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
856 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
857 return getOutlineAtomicHelper(LC, Ordering, MemSize);
858 }
859 case TargetOpcode::G_ATOMICRMW_XCHG: {
860 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
861 return getOutlineAtomicHelper(LC, Ordering, MemSize);
862 }
863 case TargetOpcode::G_ATOMICRMW_ADD:
864 case TargetOpcode::G_ATOMICRMW_SUB: {
865 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
866 return getOutlineAtomicHelper(LC, Ordering, MemSize);
867 }
868 case TargetOpcode::G_ATOMICRMW_AND: {
869 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
870 return getOutlineAtomicHelper(LC, Ordering, MemSize);
871 }
872 case TargetOpcode::G_ATOMICRMW_OR: {
873 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
874 return getOutlineAtomicHelper(LC, Ordering, MemSize);
875 }
876 case TargetOpcode::G_ATOMICRMW_XOR: {
877 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
878 return getOutlineAtomicHelper(LC, Ordering, MemSize);
879 }
880 default:
881 return RTLIB::UNKNOWN_LIBCALL;
882 }
883#undef LCALLS
884#undef LCALL5
885}
886
889 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
890
891 Type *RetTy;
892 SmallVector<Register> RetRegs;
894 unsigned Opc = MI.getOpcode();
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 LLT SuccessLLT;
900 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
901 MI.getFirst4RegLLTs();
902 RetRegs.push_back(Ret);
903 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
904 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
905 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
906 NewLLT) = MI.getFirst5RegLLTs();
907 RetRegs.push_back(Success);
908 RetTy = StructType::get(
909 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
910 }
911 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
912 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
913 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
914 break;
915 }
916 case TargetOpcode::G_ATOMICRMW_XCHG:
917 case TargetOpcode::G_ATOMICRMW_ADD:
918 case TargetOpcode::G_ATOMICRMW_SUB:
919 case TargetOpcode::G_ATOMICRMW_AND:
920 case TargetOpcode::G_ATOMICRMW_OR:
921 case TargetOpcode::G_ATOMICRMW_XOR: {
922 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
923 RetRegs.push_back(Ret);
924 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
925 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
926 Val =
927 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
928 .getReg(0);
929 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
930 Val =
931 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
932 .getReg(0);
933 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
934 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
935 break;
936 }
937 default:
938 llvm_unreachable("unsupported opcode");
939 }
940
941 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
942 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
943 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
944 const char *Name = TLI.getLibcallName(RTLibcall);
945
946 // Unsupported libcall on the target.
947 if (!Name) {
948 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
949 << MIRBuilder.getTII().getName(Opc) << "\n");
951 }
952
954 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
955 Info.Callee = MachineOperand::CreateES(Name);
956 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
957
958 llvm::append_range(Info.OrigArgs, Args);
959 if (!CLI.lowerCall(MIRBuilder, Info))
961
963}
964
965static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
966 Type *FromType) {
967 auto ToMVT = MVT::getVT(ToType);
968 auto FromMVT = MVT::getVT(FromType);
969
970 switch (Opcode) {
971 case TargetOpcode::G_FPEXT:
972 return RTLIB::getFPEXT(FromMVT, ToMVT);
973 case TargetOpcode::G_FPTRUNC:
974 return RTLIB::getFPROUND(FromMVT, ToMVT);
975 case TargetOpcode::G_FPTOSI:
976 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
977 case TargetOpcode::G_FPTOUI:
978 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
979 case TargetOpcode::G_SITOFP:
980 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
981 case TargetOpcode::G_UITOFP:
982 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
983 }
984 llvm_unreachable("Unsupported libcall function");
985}
986
989 Type *FromType, LostDebugLocObserver &LocObserver,
990 const TargetLowering &TLI, bool IsSigned = false) {
991 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
992 if (FromType->isIntegerTy()) {
993 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
994 Arg.Flags[0].setSExt();
995 else
996 Arg.Flags[0].setZExt();
997 }
998
999 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
1000 return createLibcall(MIRBuilder, Libcall,
1001 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1002 &MI);
1003}
1004
1005static RTLIB::Libcall
1007 RTLIB::Libcall RTLibcall;
1008 switch (MI.getOpcode()) {
1009 case TargetOpcode::G_GET_FPENV:
1010 RTLibcall = RTLIB::FEGETENV;
1011 break;
1012 case TargetOpcode::G_SET_FPENV:
1013 case TargetOpcode::G_RESET_FPENV:
1014 RTLibcall = RTLIB::FESETENV;
1015 break;
1016 case TargetOpcode::G_GET_FPMODE:
1017 RTLibcall = RTLIB::FEGETMODE;
1018 break;
1019 case TargetOpcode::G_SET_FPMODE:
1020 case TargetOpcode::G_RESET_FPMODE:
1021 RTLibcall = RTLIB::FESETMODE;
1022 break;
1023 default:
1024 llvm_unreachable("Unexpected opcode");
1025 }
1026 return RTLibcall;
1027}
1028
1029// Some library functions that read FP state (fegetmode, fegetenv) write the
1030// state into a region in memory. IR intrinsics that do the same operations
1031// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1032// intrinsics via the library functions, we need to use temporary variable,
1033// for example:
1034//
1035// %0:_(s32) = G_GET_FPMODE
1036//
1037// is transformed to:
1038//
1039// %1:_(p0) = G_FRAME_INDEX %stack.0
1040// BL &fegetmode
1041// %0:_(s32) = G_LOAD % 1
1042//
1044LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1046 LostDebugLocObserver &LocObserver) {
1047 const DataLayout &DL = MIRBuilder.getDataLayout();
1048 auto &MF = MIRBuilder.getMF();
1049 auto &MRI = *MIRBuilder.getMRI();
1050 auto &Ctx = MF.getFunction().getContext();
1051
1052 // Create temporary, where library function will put the read state.
1053 Register Dst = MI.getOperand(0).getReg();
1054 LLT StateTy = MRI.getType(Dst);
1055 TypeSize StateSize = StateTy.getSizeInBytes();
1056 Align TempAlign = getStackTemporaryAlignment(StateTy);
1057 MachinePointerInfo TempPtrInfo;
1058 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1059
1060 // Create a call to library function, with the temporary as an argument.
1061 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1062 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1063 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1064 auto Res =
1065 createLibcall(MIRBuilder, RTLibcall,
1066 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1068 LocObserver, nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1086 LostDebugLocObserver &LocObserver) {
1087 const DataLayout &DL = MIRBuilder.getDataLayout();
1088 auto &MF = MIRBuilder.getMF();
1089 auto &MRI = *MIRBuilder.getMRI();
1090 auto &Ctx = MF.getFunction().getContext();
1091
1092 // Create temporary, where library function will get the new state.
1093 Register Src = MI.getOperand(0).getReg();
1094 LLT StateTy = MRI.getType(Src);
1095 TypeSize StateSize = StateTy.getSizeInBytes();
1096 Align TempAlign = getStackTemporaryAlignment(StateTy);
1097 MachinePointerInfo TempPtrInfo;
1098 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1099
1100 // Put the new state into the temporary.
1101 MachineMemOperand *MMO = MF.getMachineMemOperand(
1102 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1103 MIRBuilder.buildStore(Src, Temp, *MMO);
1104
1105 // Create a call to library function, with the temporary as an argument.
1106 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1107 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1108 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1109 return createLibcall(MIRBuilder, RTLibcall,
1110 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1111 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1112 LocObserver, nullptr);
1113}
1114
1115/// Returns the corresponding libcall for the given Pred and
1116/// the ICMP predicate that should be generated to compare with #0
1117/// after the libcall.
1118static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1120#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1121 do { \
1122 switch (Size) { \
1123 case 32: \
1124 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1125 case 64: \
1126 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1127 case 128: \
1128 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1129 default: \
1130 llvm_unreachable("unexpected size"); \
1131 } \
1132 } while (0)
1133
1134 switch (Pred) {
1135 case CmpInst::FCMP_OEQ:
1137 case CmpInst::FCMP_UNE:
1139 case CmpInst::FCMP_OGE:
1141 case CmpInst::FCMP_OLT:
1143 case CmpInst::FCMP_OLE:
1145 case CmpInst::FCMP_OGT:
1147 case CmpInst::FCMP_UNO:
1149 default:
1150 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1151 }
1152}
1153
1155LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1157 LostDebugLocObserver &LocObserver) {
1158 auto &MF = MIRBuilder.getMF();
1159 auto &Ctx = MF.getFunction().getContext();
1160 const GFCmp *Cmp = cast<GFCmp>(&MI);
1161
1162 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1163 unsigned Size = OpLLT.getSizeInBits();
1164 if ((Size != 32 && Size != 64 && Size != 128) ||
1165 OpLLT != MRI.getType(Cmp->getRHSReg()))
1166 return UnableToLegalize;
1167
1168 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1169
1170 // DstReg type is s32
1171 const Register DstReg = Cmp->getReg(0);
1172 LLT DstTy = MRI.getType(DstReg);
1173 const auto Cond = Cmp->getCond();
1174
1175 // Reference:
1176 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1177 // Generates a libcall followed by ICMP.
1178 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1179 const CmpInst::Predicate ICmpPred,
1180 const DstOp &Res) -> Register {
1181 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1182 constexpr LLT TempLLT = LLT::scalar(32);
1183 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1184 // Generate libcall, holding result in Temp
1185 const auto Status = createLibcall(
1186 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1187 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1188 LocObserver, &MI);
1189 if (!Status)
1190 return {};
1191
1192 // Compare temp with #0 to get the final result.
1193 return MIRBuilder
1194 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1195 .getReg(0);
1196 };
1197
1198 // Simple case if we have a direct mapping from predicate to libcall
1199 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1200 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1201 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1202 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1203 return Legalized;
1204 }
1205 return UnableToLegalize;
1206 }
1207
1208 // No direct mapping found, should be generated as combination of libcalls.
1209
1210 switch (Cond) {
1211 case CmpInst::FCMP_UEQ: {
1212 // FCMP_UEQ: unordered or equal
1213 // Convert into (FCMP_OEQ || FCMP_UNO).
1214
1215 const auto [OeqLibcall, OeqPred] =
1217 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1218
1219 const auto [UnoLibcall, UnoPred] =
1221 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1222 if (Oeq && Uno)
1223 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1224 else
1225 return UnableToLegalize;
1226
1227 break;
1228 }
1229 case CmpInst::FCMP_ONE: {
1230 // FCMP_ONE: ordered and operands are unequal
1231 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1232
1233 // We inverse the predicate instead of generating a NOT
1234 // to save one instruction.
1235 // On AArch64 isel can even select two cmp into a single ccmp.
1236 const auto [OeqLibcall, OeqPred] =
1238 const auto NotOeq =
1239 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1240
1241 const auto [UnoLibcall, UnoPred] =
1243 const auto NotUno =
1244 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1245
1246 if (NotOeq && NotUno)
1247 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1248 else
1249 return UnableToLegalize;
1250
1251 break;
1252 }
1253 case CmpInst::FCMP_ULT:
1254 case CmpInst::FCMP_UGE:
1255 case CmpInst::FCMP_UGT:
1256 case CmpInst::FCMP_ULE:
1257 case CmpInst::FCMP_ORD: {
1258 // Convert into: !(inverse(Pred))
1259 // E.g. FCMP_ULT becomes !FCMP_OGE
1260 // This is equivalent to the following, but saves some instructions.
1261 // MIRBuilder.buildNot(
1262 // PredTy,
1263 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1264 // Op1, Op2));
1265 const auto [InversedLibcall, InversedPred] =
1267 if (!BuildLibcall(InversedLibcall,
1268 CmpInst::getInversePredicate(InversedPred), DstReg))
1269 return UnableToLegalize;
1270 break;
1271 }
1272 default:
1273 return UnableToLegalize;
1274 }
1275
1276 return Legalized;
1277}
1278
1279// The function is used to legalize operations that set default environment
1280// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1281// On most targets supported in glibc FE_DFL_MODE is defined as
1282// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1283// it is not true, the target must provide custom lowering.
1285LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1287 LostDebugLocObserver &LocObserver) {
1288 const DataLayout &DL = MIRBuilder.getDataLayout();
1289 auto &MF = MIRBuilder.getMF();
1290 auto &Ctx = MF.getFunction().getContext();
1291
1292 // Create an argument for the library function.
1293 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1294 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1295 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1296 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1297 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1298 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1299 MIRBuilder.buildIntToPtr(Dest, DefValue);
1300
1301 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1302 return createLibcall(MIRBuilder, RTLibcall,
1303 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1304 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1305 LocObserver, &MI);
1306}
1307
1310 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1311
1312 switch (MI.getOpcode()) {
1313 default:
1314 return UnableToLegalize;
1315 case TargetOpcode::G_MUL:
1316 case TargetOpcode::G_SDIV:
1317 case TargetOpcode::G_UDIV:
1318 case TargetOpcode::G_SREM:
1319 case TargetOpcode::G_UREM:
1320 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1321 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1322 unsigned Size = LLTy.getSizeInBits();
1323 Type *HLTy = IntegerType::get(Ctx, Size);
1324 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1325 if (Status != Legalized)
1326 return Status;
1327 break;
1328 }
1329 case TargetOpcode::G_FADD:
1330 case TargetOpcode::G_FSUB:
1331 case TargetOpcode::G_FMUL:
1332 case TargetOpcode::G_FDIV:
1333 case TargetOpcode::G_FMA:
1334 case TargetOpcode::G_FPOW:
1335 case TargetOpcode::G_FREM:
1336 case TargetOpcode::G_FCOS:
1337 case TargetOpcode::G_FSIN:
1338 case TargetOpcode::G_FTAN:
1339 case TargetOpcode::G_FACOS:
1340 case TargetOpcode::G_FASIN:
1341 case TargetOpcode::G_FATAN:
1342 case TargetOpcode::G_FATAN2:
1343 case TargetOpcode::G_FCOSH:
1344 case TargetOpcode::G_FSINH:
1345 case TargetOpcode::G_FTANH:
1346 case TargetOpcode::G_FLOG10:
1347 case TargetOpcode::G_FLOG:
1348 case TargetOpcode::G_FLOG2:
1349 case TargetOpcode::G_FEXP:
1350 case TargetOpcode::G_FEXP2:
1351 case TargetOpcode::G_FEXP10:
1352 case TargetOpcode::G_FCEIL:
1353 case TargetOpcode::G_FFLOOR:
1354 case TargetOpcode::G_FMINNUM:
1355 case TargetOpcode::G_FMAXNUM:
1356 case TargetOpcode::G_FMINIMUMNUM:
1357 case TargetOpcode::G_FMAXIMUMNUM:
1358 case TargetOpcode::G_FSQRT:
1359 case TargetOpcode::G_FRINT:
1360 case TargetOpcode::G_FNEARBYINT:
1361 case TargetOpcode::G_INTRINSIC_TRUNC:
1362 case TargetOpcode::G_INTRINSIC_ROUND:
1363 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1364 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1365 unsigned Size = LLTy.getSizeInBits();
1366 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1372 if (Status != Legalized)
1373 return Status;
1374 break;
1375 }
1376 case TargetOpcode::G_FSINCOS: {
1377 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1378 unsigned Size = LLTy.getSizeInBits();
1379 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1380 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1381 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1382 return UnableToLegalize;
1383 }
1384 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1385 }
1386 case TargetOpcode::G_FMODF: {
1387 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1388 unsigned Size = LLTy.getSizeInBits();
1389 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1390 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1391 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1392 return UnableToLegalize;
1393 }
1394 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1395 }
1396 case TargetOpcode::G_LROUND:
1397 case TargetOpcode::G_LLROUND:
1398 case TargetOpcode::G_INTRINSIC_LRINT:
1399 case TargetOpcode::G_INTRINSIC_LLRINT: {
1400 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1401 unsigned Size = LLTy.getSizeInBits();
1402 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1403 Type *ITy = IntegerType::get(
1404 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1405 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1406 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1407 return UnableToLegalize;
1408 }
1409 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1411 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1412 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1413 if (Status != Legalized)
1414 return Status;
1415 MI.eraseFromParent();
1416 return Legalized;
1417 }
1418 case TargetOpcode::G_FPOWI:
1419 case TargetOpcode::G_FLDEXP: {
1420 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1421 unsigned Size = LLTy.getSizeInBits();
1422 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1423 Type *ITy = IntegerType::get(
1424 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1425 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1426 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1427 return UnableToLegalize;
1428 }
1429 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1431 {MI.getOperand(1).getReg(), HLTy, 0},
1432 {MI.getOperand(2).getReg(), ITy, 1}};
1433 Args[1].Flags[0].setSExt();
1435 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1436 Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1448 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1484 LocObserver, TLI, IsSigned);
1485 if (Status != Legalized)
1486 return Status;
1487 break;
1488 }
1489 case TargetOpcode::G_ATOMICRMW_XCHG:
1490 case TargetOpcode::G_ATOMICRMW_ADD:
1491 case TargetOpcode::G_ATOMICRMW_SUB:
1492 case TargetOpcode::G_ATOMICRMW_AND:
1493 case TargetOpcode::G_ATOMICRMW_OR:
1494 case TargetOpcode::G_ATOMICRMW_XOR:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG:
1496 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1498 if (Status != Legalized)
1499 return Status;
1500 break;
1501 }
1502 case TargetOpcode::G_BZERO:
1503 case TargetOpcode::G_MEMCPY:
1504 case TargetOpcode::G_MEMMOVE:
1505 case TargetOpcode::G_MEMSET: {
1506 LegalizeResult Result =
1507 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1508 if (Result != Legalized)
1509 return Result;
1510 MI.eraseFromParent();
1511 return Result;
1512 }
1513 case TargetOpcode::G_GET_FPENV:
1514 case TargetOpcode::G_GET_FPMODE: {
1515 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_SET_FPENV:
1521 case TargetOpcode::G_SET_FPMODE: {
1522 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 case TargetOpcode::G_RESET_FPENV:
1528 case TargetOpcode::G_RESET_FPMODE: {
1529 LegalizeResult Result =
1530 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1531 if (Result != Legalized)
1532 return Result;
1533 break;
1534 }
1535 }
1536
1537 MI.eraseFromParent();
1538 return Legalized;
1539}
1540
1542 unsigned TypeIdx,
1543 LLT NarrowTy) {
1544 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1545 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 return UnableToLegalize;
1550 case TargetOpcode::G_IMPLICIT_DEF: {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 LLT DstTy = MRI.getType(DstReg);
1553
1554 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1555 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1556 // FIXME: Although this would also be legal for the general case, it causes
1557 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1558 // combines not being hit). This seems to be a problem related to the
1559 // artifact combiner.
1560 if (SizeOp0 % NarrowSize != 0) {
1561 LLT ImplicitTy = NarrowTy;
1562 if (DstTy.isVector())
1563 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1564
1565 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1566 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1567
1568 MI.eraseFromParent();
1569 return Legalized;
1570 }
1571
1572 int NumParts = SizeOp0 / NarrowSize;
1573
1575 for (int i = 0; i < NumParts; ++i)
1576 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1577
1578 if (DstTy.isVector())
1579 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1580 else
1581 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_CONSTANT: {
1586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1587 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1588 unsigned TotalSize = Ty.getSizeInBits();
1589 unsigned NarrowSize = NarrowTy.getSizeInBits();
1590 int NumParts = TotalSize / NarrowSize;
1591
1592 SmallVector<Register, 4> PartRegs;
1593 for (int I = 0; I != NumParts; ++I) {
1594 unsigned Offset = I * NarrowSize;
1595 auto K = MIRBuilder.buildConstant(NarrowTy,
1596 Val.lshr(Offset).trunc(NarrowSize));
1597 PartRegs.push_back(K.getReg(0));
1598 }
1599
1600 LLT LeftoverTy;
1601 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1602 SmallVector<Register, 1> LeftoverRegs;
1603 if (LeftoverBits != 0) {
1604 LeftoverTy = LLT::scalar(LeftoverBits);
1605 auto K = MIRBuilder.buildConstant(
1606 LeftoverTy,
1607 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1608 LeftoverRegs.push_back(K.getReg(0));
1609 }
1610
1611 insertParts(MI.getOperand(0).getReg(),
1612 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1613
1614 MI.eraseFromParent();
1615 return Legalized;
1616 }
1617 case TargetOpcode::G_SEXT:
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_ANYEXT:
1620 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1621 case TargetOpcode::G_TRUNC: {
1622 if (TypeIdx != 1)
1623 return UnableToLegalize;
1624
1625 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1626 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1627 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1628 return UnableToLegalize;
1629 }
1630
1631 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1632 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1633 MI.eraseFromParent();
1634 return Legalized;
1635 }
1636 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1637 case TargetOpcode::G_FREEZE: {
1638 if (TypeIdx != 0)
1639 return UnableToLegalize;
1640
1641 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1642 // Should widen scalar first
1643 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1644 return UnableToLegalize;
1645
1646 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1648 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1649 Parts.push_back(
1650 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1651 .getReg(0));
1652 }
1653
1654 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1655 MI.eraseFromParent();
1656 return Legalized;
1657 }
1658 case TargetOpcode::G_ADD:
1659 case TargetOpcode::G_SUB:
1660 case TargetOpcode::G_SADDO:
1661 case TargetOpcode::G_SSUBO:
1662 case TargetOpcode::G_SADDE:
1663 case TargetOpcode::G_SSUBE:
1664 case TargetOpcode::G_UADDO:
1665 case TargetOpcode::G_USUBO:
1666 case TargetOpcode::G_UADDE:
1667 case TargetOpcode::G_USUBE:
1668 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_MUL:
1670 case TargetOpcode::G_UMULH:
1671 return narrowScalarMul(MI, NarrowTy);
1672 case TargetOpcode::G_EXTRACT:
1673 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1674 case TargetOpcode::G_INSERT:
1675 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1676 case TargetOpcode::G_LOAD: {
1677 auto &LoadMI = cast<GLoad>(MI);
1678 Register DstReg = LoadMI.getDstReg();
1679 LLT DstTy = MRI.getType(DstReg);
1680 if (DstTy.isVector())
1681 return UnableToLegalize;
1682
1683 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1684 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1685 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1686 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1687 LoadMI.eraseFromParent();
1688 return Legalized;
1689 }
1690
1691 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1692 }
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_SEXTLOAD: {
1695 auto &LoadMI = cast<GExtLoad>(MI);
1696 Register DstReg = LoadMI.getDstReg();
1697 Register PtrReg = LoadMI.getPointerReg();
1698
1699 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1700 auto &MMO = LoadMI.getMMO();
1701 unsigned MemSize = MMO.getSizeInBits().getValue();
1702
1703 if (MemSize == NarrowSize) {
1704 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1705 } else if (MemSize < NarrowSize) {
1706 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1707 } else if (MemSize > NarrowSize) {
1708 // FIXME: Need to split the load.
1709 return UnableToLegalize;
1710 }
1711
1712 if (isa<GZExtLoad>(LoadMI))
1713 MIRBuilder.buildZExt(DstReg, TmpReg);
1714 else
1715 MIRBuilder.buildSExt(DstReg, TmpReg);
1716
1717 LoadMI.eraseFromParent();
1718 return Legalized;
1719 }
1720 case TargetOpcode::G_STORE: {
1721 auto &StoreMI = cast<GStore>(MI);
1722
1723 Register SrcReg = StoreMI.getValueReg();
1724 LLT SrcTy = MRI.getType(SrcReg);
1725 if (SrcTy.isVector())
1726 return UnableToLegalize;
1727
1728 int NumParts = SizeOp0 / NarrowSize;
1729 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1730 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1731 if (SrcTy.isVector() && LeftoverBits != 0)
1732 return UnableToLegalize;
1733
1734 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1735 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1736 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1737 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1738 StoreMI.eraseFromParent();
1739 return Legalized;
1740 }
1741
1742 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1743 }
1744 case TargetOpcode::G_SELECT:
1745 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1746 case TargetOpcode::G_AND:
1747 case TargetOpcode::G_OR:
1748 case TargetOpcode::G_XOR: {
1749 // Legalize bitwise operation:
1750 // A = BinOp<Ty> B, C
1751 // into:
1752 // B1, ..., BN = G_UNMERGE_VALUES B
1753 // C1, ..., CN = G_UNMERGE_VALUES C
1754 // A1 = BinOp<Ty/N> B1, C2
1755 // ...
1756 // AN = BinOp<Ty/N> BN, CN
1757 // A = G_MERGE_VALUES A1, ..., AN
1758 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1759 }
1760 case TargetOpcode::G_SHL:
1761 case TargetOpcode::G_LSHR:
1762 case TargetOpcode::G_ASHR:
1763 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTLZ:
1765 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1766 case TargetOpcode::G_CTTZ:
1767 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1768 case TargetOpcode::G_CTPOP:
1769 if (TypeIdx == 1)
1770 switch (MI.getOpcode()) {
1771 case TargetOpcode::G_CTLZ:
1772 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1773 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTTZ:
1775 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1776 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1777 case TargetOpcode::G_CTPOP:
1778 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1779 default:
1780 return UnableToLegalize;
1781 }
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_INTTOPTR:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarSrc(MI, NarrowTy, 1);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PTRTOINT:
1796 if (TypeIdx != 0)
1797 return UnableToLegalize;
1798
1799 Observer.changingInstr(MI);
1800 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 case TargetOpcode::G_PHI: {
1804 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1805 // NarrowSize.
1806 if (SizeOp0 % NarrowSize != 0)
1807 return UnableToLegalize;
1808
1809 unsigned NumParts = SizeOp0 / NarrowSize;
1810 SmallVector<Register, 2> DstRegs(NumParts);
1811 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1812 Observer.changingInstr(MI);
1813 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1814 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1815 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1816 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1817 SrcRegs[i / 2], MIRBuilder, MRI);
1818 }
1819 MachineBasicBlock &MBB = *MI.getParent();
1820 MIRBuilder.setInsertPt(MBB, MI);
1821 for (unsigned i = 0; i < NumParts; ++i) {
1822 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1824 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1825 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1826 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1827 }
1828 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1829 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1830 Observer.changedInstr(MI);
1831 MI.eraseFromParent();
1832 return Legalized;
1833 }
1834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1835 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 if (TypeIdx != 2)
1837 return UnableToLegalize;
1838
1839 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1840 Observer.changingInstr(MI);
1841 narrowScalarSrc(MI, NarrowTy, OpIdx);
1842 Observer.changedInstr(MI);
1843 return Legalized;
1844 }
1845 case TargetOpcode::G_ICMP: {
1846 Register LHS = MI.getOperand(2).getReg();
1847 LLT SrcTy = MRI.getType(LHS);
1848 CmpInst::Predicate Pred =
1849 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1850
1851 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1852 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1853 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1854 LHSLeftoverRegs, MIRBuilder, MRI))
1855 return UnableToLegalize;
1856
1857 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1858 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1859 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1860 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1861 return UnableToLegalize;
1862
1863 // We now have the LHS and RHS of the compare split into narrow-type
1864 // registers, plus potentially some leftover type.
1865 Register Dst = MI.getOperand(0).getReg();
1866 LLT ResTy = MRI.getType(Dst);
1867 if (ICmpInst::isEquality(Pred)) {
1868 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1869 // them together. For each equal part, the result should be all 0s. For
1870 // each non-equal part, we'll get at least one 1.
1871 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1873 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1874 auto LHS = std::get<0>(LHSAndRHS);
1875 auto RHS = std::get<1>(LHSAndRHS);
1876 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1877 Xors.push_back(Xor);
1878 }
1879
1880 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1881 // to the desired narrow type so that we can OR them together later.
1882 SmallVector<Register, 4> WidenedXors;
1883 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1884 auto LHS = std::get<0>(LHSAndRHS);
1885 auto RHS = std::get<1>(LHSAndRHS);
1886 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1887 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1888 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1889 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1890 llvm::append_range(Xors, WidenedXors);
1891 }
1892
1893 // Now, for each part we broke up, we know if they are equal/not equal
1894 // based off the G_XOR. We can OR these all together and compare against
1895 // 0 to get the result.
1896 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1897 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1898 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1899 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1900 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1901 } else {
1902 Register CmpIn;
1903 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1904 Register CmpOut;
1905 CmpInst::Predicate PartPred;
1906
1907 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1908 PartPred = Pred;
1909 CmpOut = Dst;
1910 } else {
1911 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1912 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 }
1914
1915 if (!CmpIn) {
1916 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 } else {
1919 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1920 RHSPartRegs[I]);
1921 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1922 LHSPartRegs[I], RHSPartRegs[I]);
1923 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1924 }
1925
1926 CmpIn = CmpOut;
1927 }
1928
1929 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1930 Register CmpOut;
1931 CmpInst::Predicate PartPred;
1932
1933 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1934 PartPred = Pred;
1935 CmpOut = Dst;
1936 } else {
1937 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1938 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 }
1940
1941 if (!CmpIn) {
1942 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 } else {
1945 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1946 RHSLeftoverRegs[I]);
1947 auto CmpEq =
1948 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1949 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1950 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 }
1952
1953 CmpIn = CmpOut;
1954 }
1955 }
1956 MI.eraseFromParent();
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FCMP:
1960 if (TypeIdx != 0)
1961 return UnableToLegalize;
1962
1963 Observer.changingInstr(MI);
1964 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1965 Observer.changedInstr(MI);
1966 return Legalized;
1967
1968 case TargetOpcode::G_SEXT_INREG: {
1969 if (TypeIdx != 0)
1970 return UnableToLegalize;
1971
1972 int64_t SizeInBits = MI.getOperand(2).getImm();
1973
1974 // So long as the new type has more bits than the bits we're extending we
1975 // don't need to break it apart.
1976 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1977 Observer.changingInstr(MI);
1978 // We don't lose any non-extension bits by truncating the src and
1979 // sign-extending the dst.
1980 MachineOperand &MO1 = MI.getOperand(1);
1981 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1982 MO1.setReg(TruncMIB.getReg(0));
1983
1984 MachineOperand &MO2 = MI.getOperand(0);
1985 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1987 MIRBuilder.buildSExt(MO2, DstExt);
1988 MO2.setReg(DstExt);
1989 Observer.changedInstr(MI);
1990 return Legalized;
1991 }
1992
1993 // Break it apart. Components below the extension point are unmodified. The
1994 // component containing the extension point becomes a narrower SEXT_INREG.
1995 // Components above it are ashr'd from the component containing the
1996 // extension point.
1997 if (SizeOp0 % NarrowSize != 0)
1998 return UnableToLegalize;
1999 int NumParts = SizeOp0 / NarrowSize;
2000
2001 // List the registers where the destination will be scattered.
2003 // List the registers where the source will be split.
2005
2006 // Create all the temporary registers.
2007 for (int i = 0; i < NumParts; ++i) {
2008 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2009
2010 SrcRegs.push_back(SrcReg);
2011 }
2012
2013 // Explode the big arguments into smaller chunks.
2014 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2015
2016 Register AshrCstReg =
2017 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2018 .getReg(0);
2019 Register FullExtensionReg;
2020 Register PartialExtensionReg;
2021
2022 // Do the operation on each small part.
2023 for (int i = 0; i < NumParts; ++i) {
2024 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2025 DstRegs.push_back(SrcRegs[i]);
2026 PartialExtensionReg = DstRegs.back();
2027 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2028 assert(PartialExtensionReg &&
2029 "Expected to visit partial extension before full");
2030 if (FullExtensionReg) {
2031 DstRegs.push_back(FullExtensionReg);
2032 continue;
2033 }
2034 DstRegs.push_back(
2035 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2036 .getReg(0));
2037 FullExtensionReg = DstRegs.back();
2038 } else {
2039 DstRegs.push_back(
2041 .buildInstr(
2042 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2043 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2044 .getReg(0));
2045 PartialExtensionReg = DstRegs.back();
2046 }
2047 }
2048
2049 // Gather the destination registers into the final destination.
2050 Register DstReg = MI.getOperand(0).getReg();
2051 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2052 MI.eraseFromParent();
2053 return Legalized;
2054 }
2055 case TargetOpcode::G_BSWAP:
2056 case TargetOpcode::G_BITREVERSE: {
2057 if (SizeOp0 % NarrowSize != 0)
2058 return UnableToLegalize;
2059
2060 Observer.changingInstr(MI);
2061 SmallVector<Register, 2> SrcRegs, DstRegs;
2062 unsigned NumParts = SizeOp0 / NarrowSize;
2063 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2064 MIRBuilder, MRI);
2065
2066 for (unsigned i = 0; i < NumParts; ++i) {
2067 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2068 {SrcRegs[NumParts - 1 - i]});
2069 DstRegs.push_back(DstPart.getReg(0));
2070 }
2071
2072 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2073
2074 Observer.changedInstr(MI);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 case TargetOpcode::G_PTRMASK: {
2080 if (TypeIdx != 1)
2081 return UnableToLegalize;
2082 Observer.changingInstr(MI);
2083 narrowScalarSrc(MI, NarrowTy, 2);
2084 Observer.changedInstr(MI);
2085 return Legalized;
2086 }
2087 case TargetOpcode::G_FPTOUI:
2088 case TargetOpcode::G_FPTOSI:
2089 case TargetOpcode::G_FPTOUI_SAT:
2090 case TargetOpcode::G_FPTOSI_SAT:
2091 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2092 case TargetOpcode::G_FPEXT:
2093 if (TypeIdx != 0)
2094 return UnableToLegalize;
2095 Observer.changingInstr(MI);
2096 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2097 Observer.changedInstr(MI);
2098 return Legalized;
2099 case TargetOpcode::G_FLDEXP:
2100 case TargetOpcode::G_STRICT_FLDEXP:
2101 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2102 case TargetOpcode::G_VSCALE: {
2103 Register Dst = MI.getOperand(0).getReg();
2104 LLT Ty = MRI.getType(Dst);
2105
2106 // Assume VSCALE(1) fits into a legal integer
2107 const APInt One(NarrowTy.getSizeInBits(), 1);
2108 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2109 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2110 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2111 MIRBuilder.buildMul(Dst, ZExt, C);
2112
2113 MI.eraseFromParent();
2114 return Legalized;
2115 }
2116 }
2117}
2118
2120 LLT Ty = MRI.getType(Val);
2121 if (Ty.isScalar())
2122 return Val;
2123
2124 const DataLayout &DL = MIRBuilder.getDataLayout();
2125 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2126 if (Ty.isPointer()) {
2127 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2128 return Register();
2129 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2130 }
2131
2132 Register NewVal = Val;
2133
2134 assert(Ty.isVector());
2135 if (Ty.isPointerVector())
2136 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2137 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138}
2139
2141 unsigned OpIdx, unsigned ExtOpcode) {
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2144 MO.setReg(ExtB.getReg(0));
2145}
2146
2148 unsigned OpIdx) {
2149 MachineOperand &MO = MI.getOperand(OpIdx);
2150 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2151 MO.setReg(ExtB.getReg(0));
2152}
2153
2155 unsigned OpIdx, unsigned TruncOpcode) {
2156 MachineOperand &MO = MI.getOperand(OpIdx);
2157 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2158 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2159 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2160 MO.setReg(DstExt);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2169 MO.setReg(DstTrunc);
2170}
2171
2173 unsigned OpIdx) {
2174 MachineOperand &MO = MI.getOperand(OpIdx);
2175 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2176 Register Dst = MO.getReg();
2177 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178 MO.setReg(DstExt);
2179 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2180}
2181
2183 unsigned OpIdx) {
2184 MachineOperand &MO = MI.getOperand(OpIdx);
2185 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2186}
2187
2189 MachineOperand &Op = MI.getOperand(OpIdx);
2190 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2191}
2192
2194 MachineOperand &MO = MI.getOperand(OpIdx);
2195 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2196 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2197 MIRBuilder.buildBitcast(MO, CastDst);
2198 MO.setReg(CastDst);
2199}
2200
2202LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 1)
2205 return UnableToLegalize;
2206
2207 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2208 if (DstTy.isVector())
2209 return UnableToLegalize;
2210
2211 LLT SrcTy = MRI.getType(Src1Reg);
2212 const int DstSize = DstTy.getSizeInBits();
2213 const int SrcSize = SrcTy.getSizeInBits();
2214 const int WideSize = WideTy.getSizeInBits();
2215 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2216
2217 unsigned NumOps = MI.getNumOperands();
2218 unsigned NumSrc = MI.getNumOperands() - 1;
2219 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2220
2221 if (WideSize >= DstSize) {
2222 // Directly pack the bits in the target type.
2223 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2224
2225 for (unsigned I = 2; I != NumOps; ++I) {
2226 const unsigned Offset = (I - 1) * PartSize;
2227
2228 Register SrcReg = MI.getOperand(I).getReg();
2229 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2230
2231 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2232
2233 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2234 MRI.createGenericVirtualRegister(WideTy);
2235
2236 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2237 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2238 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2239 ResultReg = NextResult;
2240 }
2241
2242 if (WideSize > DstSize)
2243 MIRBuilder.buildTrunc(DstReg, ResultReg);
2244 else if (DstTy.isPointer())
2245 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2246
2247 MI.eraseFromParent();
2248 return Legalized;
2249 }
2250
2251 // Unmerge the original values to the GCD type, and recombine to the next
2252 // multiple greater than the original type.
2253 //
2254 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2255 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2256 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2257 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2258 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2259 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2260 // %12:_(s12) = G_MERGE_VALUES %10, %11
2261 //
2262 // Padding with undef if necessary:
2263 //
2264 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2265 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2266 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2267 // %7:_(s2) = G_IMPLICIT_DEF
2268 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2269 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2270 // %10:_(s12) = G_MERGE_VALUES %8, %9
2271
2272 const int GCD = std::gcd(SrcSize, WideSize);
2273 LLT GCDTy = LLT::scalar(GCD);
2274
2275 SmallVector<Register, 8> NewMergeRegs;
2276 SmallVector<Register, 8> Unmerges;
2277 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2278
2279 // Decompose the original operands if they don't evenly divide.
2280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2281 Register SrcReg = MO.getReg();
2282 if (GCD == SrcSize) {
2283 Unmerges.push_back(SrcReg);
2284 } else {
2285 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2286 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2287 Unmerges.push_back(Unmerge.getReg(J));
2288 }
2289 }
2290
2291 // Pad with undef to the next size that is a multiple of the requested size.
2292 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2293 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2294 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2295 Unmerges.push_back(UndefReg);
2296 }
2297
2298 const int PartsPerGCD = WideSize / GCD;
2299
2300 // Build merges of each piece.
2301 ArrayRef<Register> Slicer(Unmerges);
2302 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2303 auto Merge =
2304 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2305 NewMergeRegs.push_back(Merge.getReg(0));
2306 }
2307
2308 // A truncate may be necessary if the requested type doesn't evenly divide the
2309 // original result type.
2310 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2311 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2312 } else {
2313 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2314 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2315 }
2316
2317 MI.eraseFromParent();
2318 return Legalized;
2319}
2320
2322LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2323 LLT WideTy) {
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 int NumDst = MI.getNumOperands() - 1;
2328 Register SrcReg = MI.getOperand(NumDst).getReg();
2329 LLT SrcTy = MRI.getType(SrcReg);
2330 if (SrcTy.isVector())
2331 return UnableToLegalize;
2332
2333 Register Dst0Reg = MI.getOperand(0).getReg();
2334 LLT DstTy = MRI.getType(Dst0Reg);
2335 if (!DstTy.isScalar())
2336 return UnableToLegalize;
2337
2338 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2339 if (SrcTy.isPointer()) {
2340 const DataLayout &DL = MIRBuilder.getDataLayout();
2341 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2342 LLVM_DEBUG(
2343 dbgs() << "Not casting non-integral address space integer\n");
2344 return UnableToLegalize;
2345 }
2346
2347 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2348 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2349 }
2350
2351 // Widen SrcTy to WideTy. This does not affect the result, but since the
2352 // user requested this size, it is probably better handled than SrcTy and
2353 // should reduce the total number of legalization artifacts.
2354 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2355 SrcTy = WideTy;
2356 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2357 }
2358
2359 // Theres no unmerge type to target. Directly extract the bits from the
2360 // source type
2361 unsigned DstSize = DstTy.getSizeInBits();
2362
2363 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2364 for (int I = 1; I != NumDst; ++I) {
2365 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2366 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2368 }
2369
2370 MI.eraseFromParent();
2371 return Legalized;
2372 }
2373
2374 // Extend the source to a wider type.
2375 LLT LCMTy = getLCMType(SrcTy, WideTy);
2376
2377 Register WideSrc = SrcReg;
2378 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2379 // TODO: If this is an integral address space, cast to integer and anyext.
2380 if (SrcTy.isPointer()) {
2381 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2382 return UnableToLegalize;
2383 }
2384
2385 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2386 }
2387
2388 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2389
2390 // Create a sequence of unmerges and merges to the original results. Since we
2391 // may have widened the source, we will need to pad the results with dead defs
2392 // to cover the source register.
2393 // e.g. widen s48 to s64:
2394 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2395 //
2396 // =>
2397 // %4:_(s192) = G_ANYEXT %0:_(s96)
2398 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2399 // ; unpack to GCD type, with extra dead defs
2400 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2401 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2402 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2403 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2404 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2405 const LLT GCDTy = getGCDType(WideTy, DstTy);
2406 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2407 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2408
2409 // Directly unmerge to the destination without going through a GCD type
2410 // if possible
2411 if (PartsPerRemerge == 1) {
2412 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2413
2414 for (int I = 0; I != NumUnmerge; ++I) {
2415 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2416
2417 for (int J = 0; J != PartsPerUnmerge; ++J) {
2418 int Idx = I * PartsPerUnmerge + J;
2419 if (Idx < NumDst)
2420 MIB.addDef(MI.getOperand(Idx).getReg());
2421 else {
2422 // Create dead def for excess components.
2423 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 }
2425 }
2426
2427 MIB.addUse(Unmerge.getReg(I));
2428 }
2429 } else {
2430 SmallVector<Register, 16> Parts;
2431 for (int J = 0; J != NumUnmerge; ++J)
2432 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2433
2434 SmallVector<Register, 8> RemergeParts;
2435 for (int I = 0; I != NumDst; ++I) {
2436 for (int J = 0; J < PartsPerRemerge; ++J) {
2437 const int Idx = I * PartsPerRemerge + J;
2438 RemergeParts.emplace_back(Parts[Idx]);
2439 }
2440
2441 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2442 RemergeParts.clear();
2443 }
2444 }
2445
2446 MI.eraseFromParent();
2447 return Legalized;
2448}
2449
2451LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2452 LLT WideTy) {
2453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2454 unsigned Offset = MI.getOperand(2).getImm();
2455
2456 if (TypeIdx == 0) {
2457 if (SrcTy.isVector() || DstTy.isVector())
2458 return UnableToLegalize;
2459
2460 SrcOp Src(SrcReg);
2461 if (SrcTy.isPointer()) {
2462 // Extracts from pointers can be handled only if they are really just
2463 // simple integers.
2464 const DataLayout &DL = MIRBuilder.getDataLayout();
2465 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2466 return UnableToLegalize;
2467
2468 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2469 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 SrcTy = SrcAsIntTy;
2471 }
2472
2473 if (DstTy.isPointer())
2474 return UnableToLegalize;
2475
2476 if (Offset == 0) {
2477 // Avoid a shift in the degenerate case.
2478 MIRBuilder.buildTrunc(DstReg,
2479 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2480 MI.eraseFromParent();
2481 return Legalized;
2482 }
2483
2484 // Do a shift in the source type.
2485 LLT ShiftTy = SrcTy;
2486 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2487 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2488 ShiftTy = WideTy;
2489 }
2490
2491 auto LShr = MIRBuilder.buildLShr(
2492 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2493 MIRBuilder.buildTrunc(DstReg, LShr);
2494 MI.eraseFromParent();
2495 return Legalized;
2496 }
2497
2498 if (SrcTy.isScalar()) {
2499 Observer.changingInstr(MI);
2500 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2501 Observer.changedInstr(MI);
2502 return Legalized;
2503 }
2504
2505 if (!SrcTy.isVector())
2506 return UnableToLegalize;
2507
2508 if (DstTy != SrcTy.getElementType())
2509 return UnableToLegalize;
2510
2511 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2512 return UnableToLegalize;
2513
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2516
2517 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2518 Offset);
2519 widenScalarDst(MI, WideTy.getScalarType(), 0);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522}
2523
2525LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2526 LLT WideTy) {
2527 if (TypeIdx != 0 || WideTy.isVector())
2528 return UnableToLegalize;
2529 Observer.changingInstr(MI);
2530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2531 widenScalarDst(MI, WideTy);
2532 Observer.changedInstr(MI);
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 unsigned Opcode;
2540 unsigned ExtOpcode;
2541 std::optional<Register> CarryIn;
2542 switch (MI.getOpcode()) {
2543 default:
2544 llvm_unreachable("Unexpected opcode!");
2545 case TargetOpcode::G_SADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_SSUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_SEXT;
2552 break;
2553 case TargetOpcode::G_UADDO:
2554 Opcode = TargetOpcode::G_ADD;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_USUBO:
2558 Opcode = TargetOpcode::G_SUB;
2559 ExtOpcode = TargetOpcode::G_ZEXT;
2560 break;
2561 case TargetOpcode::G_SADDE:
2562 Opcode = TargetOpcode::G_UADDE;
2563 ExtOpcode = TargetOpcode::G_SEXT;
2564 CarryIn = MI.getOperand(4).getReg();
2565 break;
2566 case TargetOpcode::G_SSUBE:
2567 Opcode = TargetOpcode::G_USUBE;
2568 ExtOpcode = TargetOpcode::G_SEXT;
2569 CarryIn = MI.getOperand(4).getReg();
2570 break;
2571 case TargetOpcode::G_UADDE:
2572 Opcode = TargetOpcode::G_UADDE;
2573 ExtOpcode = TargetOpcode::G_ZEXT;
2574 CarryIn = MI.getOperand(4).getReg();
2575 break;
2576 case TargetOpcode::G_USUBE:
2577 Opcode = TargetOpcode::G_USUBE;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 CarryIn = MI.getOperand(4).getReg();
2580 break;
2581 }
2582
2583 if (TypeIdx == 1) {
2584 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2585
2586 Observer.changingInstr(MI);
2587 if (CarryIn)
2588 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2589 widenScalarDst(MI, WideTy, 1);
2590
2591 Observer.changedInstr(MI);
2592 return Legalized;
2593 }
2594
2595 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2596 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2597 // Do the arithmetic in the larger type.
2598 Register NewOp;
2599 if (CarryIn) {
2600 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2601 NewOp = MIRBuilder
2602 .buildInstr(Opcode, {WideTy, CarryOutTy},
2603 {LHSExt, RHSExt, *CarryIn})
2604 .getReg(0);
2605 } else {
2606 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2607 }
2608 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2609 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2610 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2611 // There is no overflow if the ExtOp is the same as NewOp.
2612 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2613 // Now trunc the NewOp to the original result.
2614 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2615 MI.eraseFromParent();
2616 return Legalized;
2617}
2618
2620LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2621 LLT WideTy) {
2622 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2623 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2624 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2625 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2626 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2627 // We can convert this to:
2628 // 1. Any extend iN to iM
2629 // 2. SHL by M-N
2630 // 3. [US][ADD|SUB|SHL]SAT
2631 // 4. L/ASHR by M-N
2632 //
2633 // It may be more efficient to lower this to a min and a max operation in
2634 // the higher precision arithmetic if the promoted operation isn't legal,
2635 // but this decision is up to the target's lowering request.
2636 Register DstReg = MI.getOperand(0).getReg();
2637
2638 unsigned NewBits = WideTy.getScalarSizeInBits();
2639 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2640
2641 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2642 // must not left shift the RHS to preserve the shift amount.
2643 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2644 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2645 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2646 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2648 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2649
2650 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2651 {ShiftL, ShiftR}, MI.getFlags());
2652
2653 // Use a shift that will preserve the number of sign bits when the trunc is
2654 // folded away.
2655 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2656 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2657
2658 MIRBuilder.buildTrunc(DstReg, Result);
2659 MI.eraseFromParent();
2660 return Legalized;
2661}
2662
2664LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2665 LLT WideTy) {
2666 if (TypeIdx == 1) {
2667 Observer.changingInstr(MI);
2668 widenScalarDst(MI, WideTy, 1);
2669 Observer.changedInstr(MI);
2670 return Legalized;
2671 }
2672
2673 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2674 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2675 LLT SrcTy = MRI.getType(LHS);
2676 LLT OverflowTy = MRI.getType(OriginalOverflow);
2677 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2678
2679 // To determine if the result overflowed in the larger type, we extend the
2680 // input to the larger type, do the multiply (checking if it overflows),
2681 // then also check the high bits of the result to see if overflow happened
2682 // there.
2683 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2684 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2685 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2686
2687 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2688 // so we don't need to check the overflow result of larger type Mulo.
2689 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2690
2691 unsigned MulOpc =
2692 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2693
2694 MachineInstrBuilder Mulo;
2695 if (WideMulCanOverflow)
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2697 {LeftOperand, RightOperand});
2698 else
2699 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2700
2701 auto Mul = Mulo->getOperand(0);
2702 MIRBuilder.buildTrunc(Result, Mul);
2703
2704 MachineInstrBuilder ExtResult;
2705 // Overflow occurred if it occurred in the larger type, or if the high part
2706 // of the result does not zero/sign-extend the low part. Check this second
2707 // possibility first.
2708 if (IsSigned) {
2709 // For signed, overflow occurred when the high part does not sign-extend
2710 // the low part.
2711 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2712 } else {
2713 // Unsigned overflow occurred when the high part does not zero-extend the
2714 // low part.
2715 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2716 }
2717
2718 if (WideMulCanOverflow) {
2719 auto Overflow =
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2721 // Finally check if the multiplication in the larger type itself overflowed.
2722 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2723 } else {
2724 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2725 }
2726 MI.eraseFromParent();
2727 return Legalized;
2728}
2729
2732 unsigned Opcode = MI.getOpcode();
2733 switch (Opcode) {
2734 default:
2735 return UnableToLegalize;
2736 case TargetOpcode::G_ATOMICRMW_XCHG:
2737 case TargetOpcode::G_ATOMICRMW_ADD:
2738 case TargetOpcode::G_ATOMICRMW_SUB:
2739 case TargetOpcode::G_ATOMICRMW_AND:
2740 case TargetOpcode::G_ATOMICRMW_OR:
2741 case TargetOpcode::G_ATOMICRMW_XOR:
2742 case TargetOpcode::G_ATOMICRMW_MIN:
2743 case TargetOpcode::G_ATOMICRMW_MAX:
2744 case TargetOpcode::G_ATOMICRMW_UMIN:
2745 case TargetOpcode::G_ATOMICRMW_UMAX:
2746 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2747 Observer.changingInstr(MI);
2748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG:
2753 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2761 if (TypeIdx == 0) {
2762 Observer.changingInstr(MI);
2763 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2764 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2765 widenScalarDst(MI, WideTy, 0);
2766 Observer.changedInstr(MI);
2767 return Legalized;
2768 }
2769 assert(TypeIdx == 1 &&
2770 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2771 Observer.changingInstr(MI);
2772 widenScalarDst(MI, WideTy, 1);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_EXTRACT:
2776 return widenScalarExtract(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_INSERT:
2778 return widenScalarInsert(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_MERGE_VALUES:
2780 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2781 case TargetOpcode::G_UNMERGE_VALUES:
2782 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2783 case TargetOpcode::G_SADDO:
2784 case TargetOpcode::G_SSUBO:
2785 case TargetOpcode::G_UADDO:
2786 case TargetOpcode::G_USUBO:
2787 case TargetOpcode::G_SADDE:
2788 case TargetOpcode::G_SSUBE:
2789 case TargetOpcode::G_UADDE:
2790 case TargetOpcode::G_USUBE:
2791 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_UMULO:
2793 case TargetOpcode::G_SMULO:
2794 return widenScalarMulo(MI, TypeIdx, WideTy);
2795 case TargetOpcode::G_SADDSAT:
2796 case TargetOpcode::G_SSUBSAT:
2797 case TargetOpcode::G_SSHLSAT:
2798 case TargetOpcode::G_UADDSAT:
2799 case TargetOpcode::G_USUBSAT:
2800 case TargetOpcode::G_USHLSAT:
2801 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_CTTZ:
2803 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2804 case TargetOpcode::G_CTLZ:
2805 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2806 case TargetOpcode::G_CTPOP: {
2807 if (TypeIdx == 0) {
2808 Observer.changingInstr(MI);
2809 widenScalarDst(MI, WideTy, 0);
2810 Observer.changedInstr(MI);
2811 return Legalized;
2812 }
2813
2814 Register SrcReg = MI.getOperand(1).getReg();
2815
2816 // First extend the input.
2817 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2818 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2819 ? TargetOpcode::G_ANYEXT
2820 : TargetOpcode::G_ZEXT;
2821 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2822 LLT CurTy = MRI.getType(SrcReg);
2823 unsigned NewOpc = Opcode;
2824 if (NewOpc == TargetOpcode::G_CTTZ) {
2825 // The count is the same in the larger type except if the original
2826 // value was zero. This can be handled by setting the bit just off
2827 // the top of the original type.
2828 auto TopBit =
2830 MIBSrc = MIRBuilder.buildOr(
2831 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2832 // Now we know the operand is non-zero, use the more relaxed opcode.
2833 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2834 }
2835
2836 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2837
2838 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2839 // An optimization where the result is the CTLZ after the left shift by
2840 // (Difference in widety and current ty), that is,
2841 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2842 // Result = ctlz MIBSrc
2843 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2844 MIRBuilder.buildConstant(WideTy, SizeDiff));
2845 }
2846
2847 // Perform the operation at the larger size.
2848 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2849 // This is already the correct result for CTPOP and CTTZs
2850 if (Opcode == TargetOpcode::G_CTLZ) {
2851 // The correct result is NewOp - (Difference in widety and current ty).
2852 MIBNewOp = MIRBuilder.buildSub(
2853 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2857 MI.eraseFromParent();
2858 return Legalized;
2859 }
2860 case TargetOpcode::G_BSWAP: {
2861 Observer.changingInstr(MI);
2862 Register DstReg = MI.getOperand(0).getReg();
2863
2864 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2865 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2866 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2867 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2868
2869 MI.getOperand(0).setReg(DstExt);
2870
2871 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2872
2873 LLT Ty = MRI.getType(DstReg);
2874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2875 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2876 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2877
2878 MIRBuilder.buildTrunc(DstReg, ShrReg);
2879 Observer.changedInstr(MI);
2880 return Legalized;
2881 }
2882 case TargetOpcode::G_BITREVERSE: {
2883 Observer.changingInstr(MI);
2884
2885 Register DstReg = MI.getOperand(0).getReg();
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888
2889 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2890 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2891 MI.getOperand(0).setReg(DstExt);
2892 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2893
2894 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2895 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2896 MIRBuilder.buildTrunc(DstReg, Shift);
2897 Observer.changedInstr(MI);
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_FREEZE:
2901 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2902 Observer.changingInstr(MI);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 widenScalarDst(MI, WideTy);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907
2908 case TargetOpcode::G_ABS:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ADD:
2916 case TargetOpcode::G_AND:
2917 case TargetOpcode::G_MUL:
2918 case TargetOpcode::G_OR:
2919 case TargetOpcode::G_XOR:
2920 case TargetOpcode::G_SUB:
2921 case TargetOpcode::G_SHUFFLE_VECTOR:
2922 // Perform operation at larger width (any extension is fines here, high bits
2923 // don't affect the result) and then truncate the result back to the
2924 // original type.
2925 Observer.changingInstr(MI);
2926 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 Observer.changedInstr(MI);
2930 return Legalized;
2931
2932 case TargetOpcode::G_SBFX:
2933 case TargetOpcode::G_UBFX:
2934 Observer.changingInstr(MI);
2935
2936 if (TypeIdx == 0) {
2937 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2938 widenScalarDst(MI, WideTy);
2939 } else {
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2941 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2942 }
2943
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_SHL:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962
2963 case TargetOpcode::G_ROTR:
2964 case TargetOpcode::G_ROTL:
2965 if (TypeIdx != 1)
2966 return UnableToLegalize;
2967
2968 Observer.changingInstr(MI);
2969 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972
2973 case TargetOpcode::G_SDIV:
2974 case TargetOpcode::G_SREM:
2975 case TargetOpcode::G_SMIN:
2976 case TargetOpcode::G_SMAX:
2977 case TargetOpcode::G_ABDS:
2978 Observer.changingInstr(MI);
2979 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2981 widenScalarDst(MI, WideTy);
2982 Observer.changedInstr(MI);
2983 return Legalized;
2984
2985 case TargetOpcode::G_SDIVREM:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2991 widenScalarDst(MI, WideTy, 1);
2992 Observer.changedInstr(MI);
2993 return Legalized;
2994
2995 case TargetOpcode::G_ASHR:
2996 case TargetOpcode::G_LSHR:
2997 Observer.changingInstr(MI);
2998
2999 if (TypeIdx == 0) {
3000 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3001 : TargetOpcode::G_ZEXT;
3002
3003 widenScalarSrc(MI, WideTy, 1, CvtOp);
3004 widenScalarDst(MI, WideTy);
3005 } else {
3006 assert(TypeIdx == 1);
3007 // The "number of bits to shift" operand must preserve its value as an
3008 // unsigned integer:
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 }
3011
3012 Observer.changedInstr(MI);
3013 return Legalized;
3014 case TargetOpcode::G_UDIV:
3015 case TargetOpcode::G_UREM:
3016 case TargetOpcode::G_ABDU:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023 case TargetOpcode::G_UDIVREM:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3029 widenScalarDst(MI, WideTy, 1);
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UMIN:
3033 case TargetOpcode::G_UMAX: {
3034 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3035
3036 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3037 unsigned ExtOpc =
3038 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3039 getApproximateEVTForLLT(WideTy, Ctx))
3040 ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 Observer.changingInstr(MI);
3044 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3045 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3046 widenScalarDst(MI, WideTy);
3047 Observer.changedInstr(MI);
3048 return Legalized;
3049 }
3050
3051 case TargetOpcode::G_SELECT:
3052 Observer.changingInstr(MI);
3053 if (TypeIdx == 0) {
3054 // Perform operation at larger width (any extension is fine here, high
3055 // bits don't affect the result) and then truncate the result back to the
3056 // original type.
3057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3059 widenScalarDst(MI, WideTy);
3060 } else {
3061 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3062 // Explicit extension is required here since high bits affect the result.
3063 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3064 }
3065 Observer.changedInstr(MI);
3066 return Legalized;
3067
3068 case TargetOpcode::G_FPEXT:
3069 if (TypeIdx != 1)
3070 return UnableToLegalize;
3071
3072 Observer.changingInstr(MI);
3073 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3074 Observer.changedInstr(MI);
3075 return Legalized;
3076 case TargetOpcode::G_FPTOSI:
3077 case TargetOpcode::G_FPTOUI:
3078 case TargetOpcode::G_INTRINSIC_LRINT:
3079 case TargetOpcode::G_INTRINSIC_LLRINT:
3080 case TargetOpcode::G_IS_FPCLASS:
3081 Observer.changingInstr(MI);
3082
3083 if (TypeIdx == 0)
3084 widenScalarDst(MI, WideTy);
3085 else
3086 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3087
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090 case TargetOpcode::G_SITOFP:
3091 Observer.changingInstr(MI);
3092
3093 if (TypeIdx == 0)
3094 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3095 else
3096 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3097
3098 Observer.changedInstr(MI);
3099 return Legalized;
3100 case TargetOpcode::G_UITOFP:
3101 Observer.changingInstr(MI);
3102
3103 if (TypeIdx == 0)
3104 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3105 else
3106 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3107
3108 Observer.changedInstr(MI);
3109 return Legalized;
3110 case TargetOpcode::G_FPTOSI_SAT:
3111 case TargetOpcode::G_FPTOUI_SAT:
3112 Observer.changingInstr(MI);
3113
3114 if (TypeIdx == 0) {
3115 Register OldDst = MI.getOperand(0).getReg();
3116 LLT Ty = MRI.getType(OldDst);
3117 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3118 Register NewDst;
3119 MI.getOperand(0).setReg(ExtReg);
3120 uint64_t ShortBits = Ty.getScalarSizeInBits();
3121 uint64_t WideBits = WideTy.getScalarSizeInBits();
3122 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3123 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3124 // z = i16 fptosi_sat(a)
3125 // ->
3126 // x = i32 fptosi_sat(a)
3127 // y = smin(x, 32767)
3128 // z = smax(y, -32768)
3129 auto MaxVal = MIRBuilder.buildConstant(
3130 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3131 auto MinVal = MIRBuilder.buildConstant(
3132 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3133 Register MidReg =
3134 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3135 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3136 } else {
3137 // z = i16 fptoui_sat(a)
3138 // ->
3139 // x = i32 fptoui_sat(a)
3140 // y = smin(x, 65535)
3141 auto MaxVal = MIRBuilder.buildConstant(
3142 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3143 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3144 }
3145 MIRBuilder.buildTrunc(OldDst, NewDst);
3146 } else
3147 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3148
3149 Observer.changedInstr(MI);
3150 return Legalized;
3151 case TargetOpcode::G_LOAD:
3152 case TargetOpcode::G_SEXTLOAD:
3153 case TargetOpcode::G_ZEXTLOAD:
3154 Observer.changingInstr(MI);
3155 widenScalarDst(MI, WideTy);
3156 Observer.changedInstr(MI);
3157 return Legalized;
3158
3159 case TargetOpcode::G_STORE: {
3160 if (TypeIdx != 0)
3161 return UnableToLegalize;
3162
3163 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3164 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3165 if (!Ty.isScalar()) {
3166 // We need to widen the vector element type.
3167 Observer.changingInstr(MI);
3168 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3169 // We also need to adjust the MMO to turn this into a truncating store.
3170 MachineMemOperand &MMO = **MI.memoperands_begin();
3171 MachineFunction &MF = MIRBuilder.getMF();
3172 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3173 MI.setMemRefs(MF, {NewMMO});
3174 Observer.changedInstr(MI);
3175 return Legalized;
3176 }
3177
3178 Observer.changingInstr(MI);
3179
3180 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3181 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3182 widenScalarSrc(MI, WideTy, 0, ExtType);
3183
3184 Observer.changedInstr(MI);
3185 return Legalized;
3186 }
3187 case TargetOpcode::G_CONSTANT: {
3188 MachineOperand &SrcMO = MI.getOperand(1);
3189 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3190 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3191 MRI.getType(MI.getOperand(0).getReg()));
3192 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3193 ExtOpc == TargetOpcode::G_ANYEXT) &&
3194 "Illegal Extend");
3195 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3196 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3197 ? SrcVal.sext(WideTy.getSizeInBits())
3198 : SrcVal.zext(WideTy.getSizeInBits());
3199 Observer.changingInstr(MI);
3200 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3201
3202 widenScalarDst(MI, WideTy);
3203 Observer.changedInstr(MI);
3204 return Legalized;
3205 }
3206 case TargetOpcode::G_FCONSTANT: {
3207 // To avoid changing the bits of the constant due to extension to a larger
3208 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3209 MachineOperand &SrcMO = MI.getOperand(1);
3210 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3211 MIRBuilder.setInstrAndDebugLoc(MI);
3212 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3213 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3214 MI.eraseFromParent();
3215 return Legalized;
3216 }
3217 case TargetOpcode::G_IMPLICIT_DEF: {
3218 Observer.changingInstr(MI);
3219 widenScalarDst(MI, WideTy);
3220 Observer.changedInstr(MI);
3221 return Legalized;
3222 }
3223 case TargetOpcode::G_BRCOND:
3224 Observer.changingInstr(MI);
3225 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3226 Observer.changedInstr(MI);
3227 return Legalized;
3228
3229 case TargetOpcode::G_FCMP:
3230 Observer.changingInstr(MI);
3231 if (TypeIdx == 0)
3232 widenScalarDst(MI, WideTy);
3233 else {
3234 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3235 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3236 }
3237 Observer.changedInstr(MI);
3238 return Legalized;
3239
3240 case TargetOpcode::G_ICMP:
3241 Observer.changingInstr(MI);
3242 if (TypeIdx == 0)
3243 widenScalarDst(MI, WideTy);
3244 else {
3245 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3246 CmpInst::Predicate Pred =
3247 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3248
3249 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3250 unsigned ExtOpcode =
3251 (CmpInst::isSigned(Pred) ||
3252 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3253 getApproximateEVTForLLT(WideTy, Ctx)))
3254 ? TargetOpcode::G_SEXT
3255 : TargetOpcode::G_ZEXT;
3256 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3257 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3258 }
3259 Observer.changedInstr(MI);
3260 return Legalized;
3261
3262 case TargetOpcode::G_PTR_ADD:
3263 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3264 Observer.changingInstr(MI);
3265 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3266 Observer.changedInstr(MI);
3267 return Legalized;
3268
3269 case TargetOpcode::G_PHI: {
3270 assert(TypeIdx == 0 && "Expecting only Idx 0");
3271
3272 Observer.changingInstr(MI);
3273 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3274 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3275 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3276 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3277 }
3278
3279 MachineBasicBlock &MBB = *MI.getParent();
3280 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3281 widenScalarDst(MI, WideTy);
3282 Observer.changedInstr(MI);
3283 return Legalized;
3284 }
3285 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3286 if (TypeIdx == 0) {
3287 Register VecReg = MI.getOperand(1).getReg();
3288 LLT VecTy = MRI.getType(VecReg);
3289 Observer.changingInstr(MI);
3290
3292 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3293 TargetOpcode::G_ANYEXT);
3294
3295 widenScalarDst(MI, WideTy, 0);
3296 Observer.changedInstr(MI);
3297 return Legalized;
3298 }
3299
3300 if (TypeIdx != 2)
3301 return UnableToLegalize;
3302 Observer.changingInstr(MI);
3303 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3304 Observer.changedInstr(MI);
3305 return Legalized;
3306 }
3307 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3308 if (TypeIdx == 0) {
3309 Observer.changingInstr(MI);
3310 const LLT WideEltTy = WideTy.getElementType();
3311
3312 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3313 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3314 widenScalarDst(MI, WideTy, 0);
3315 Observer.changedInstr(MI);
3316 return Legalized;
3317 }
3318
3319 if (TypeIdx == 1) {
3320 Observer.changingInstr(MI);
3321
3322 Register VecReg = MI.getOperand(1).getReg();
3323 LLT VecTy = MRI.getType(VecReg);
3324 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3325
3326 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3327 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3328 widenScalarDst(MI, WideVecTy, 0);
3329 Observer.changedInstr(MI);
3330 return Legalized;
3331 }
3332
3333 if (TypeIdx == 2) {
3334 Observer.changingInstr(MI);
3335 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3336 Observer.changedInstr(MI);
3337 return Legalized;
3338 }
3339
3340 return UnableToLegalize;
3341 }
3342 case TargetOpcode::G_FADD:
3343 case TargetOpcode::G_FMUL:
3344 case TargetOpcode::G_FSUB:
3345 case TargetOpcode::G_FMA:
3346 case TargetOpcode::G_FMAD:
3347 case TargetOpcode::G_FNEG:
3348 case TargetOpcode::G_FABS:
3349 case TargetOpcode::G_FCANONICALIZE:
3350 case TargetOpcode::G_FMINNUM:
3351 case TargetOpcode::G_FMAXNUM:
3352 case TargetOpcode::G_FMINNUM_IEEE:
3353 case TargetOpcode::G_FMAXNUM_IEEE:
3354 case TargetOpcode::G_FMINIMUM:
3355 case TargetOpcode::G_FMAXIMUM:
3356 case TargetOpcode::G_FMINIMUMNUM:
3357 case TargetOpcode::G_FMAXIMUMNUM:
3358 case TargetOpcode::G_FDIV:
3359 case TargetOpcode::G_FREM:
3360 case TargetOpcode::G_FCEIL:
3361 case TargetOpcode::G_FFLOOR:
3362 case TargetOpcode::G_FCOS:
3363 case TargetOpcode::G_FSIN:
3364 case TargetOpcode::G_FTAN:
3365 case TargetOpcode::G_FACOS:
3366 case TargetOpcode::G_FASIN:
3367 case TargetOpcode::G_FATAN:
3368 case TargetOpcode::G_FATAN2:
3369 case TargetOpcode::G_FCOSH:
3370 case TargetOpcode::G_FSINH:
3371 case TargetOpcode::G_FTANH:
3372 case TargetOpcode::G_FLOG10:
3373 case TargetOpcode::G_FLOG:
3374 case TargetOpcode::G_FLOG2:
3375 case TargetOpcode::G_FRINT:
3376 case TargetOpcode::G_FNEARBYINT:
3377 case TargetOpcode::G_FSQRT:
3378 case TargetOpcode::G_FEXP:
3379 case TargetOpcode::G_FEXP2:
3380 case TargetOpcode::G_FEXP10:
3381 case TargetOpcode::G_FPOW:
3382 case TargetOpcode::G_INTRINSIC_TRUNC:
3383 case TargetOpcode::G_INTRINSIC_ROUND:
3384 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3385 assert(TypeIdx == 0);
3386 Observer.changingInstr(MI);
3387
3388 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3389 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3390
3391 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3392 Observer.changedInstr(MI);
3393 return Legalized;
3394 case TargetOpcode::G_FMODF: {
3395 Observer.changingInstr(MI);
3396 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3397
3398 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3399 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3400 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3401 Observer.changedInstr(MI);
3402 return Legalized;
3403 }
3404 case TargetOpcode::G_FPOWI:
3405 case TargetOpcode::G_FLDEXP:
3406 case TargetOpcode::G_STRICT_FLDEXP: {
3407 if (TypeIdx == 0) {
3408 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3409 return UnableToLegalize;
3410
3411 Observer.changingInstr(MI);
3412 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3413 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3414 Observer.changedInstr(MI);
3415 return Legalized;
3416 }
3417
3418 if (TypeIdx == 1) {
3419 // For some reason SelectionDAG tries to promote to a libcall without
3420 // actually changing the integer type for promotion.
3421 Observer.changingInstr(MI);
3422 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3423 Observer.changedInstr(MI);
3424 return Legalized;
3425 }
3426
3427 return UnableToLegalize;
3428 }
3429 case TargetOpcode::G_FFREXP: {
3430 Observer.changingInstr(MI);
3431
3432 if (TypeIdx == 0) {
3433 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3434 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3435 } else {
3436 widenScalarDst(MI, WideTy, 1);
3437 }
3438
3439 Observer.changedInstr(MI);
3440 return Legalized;
3441 }
3442 case TargetOpcode::G_LROUND:
3443 case TargetOpcode::G_LLROUND:
3444 Observer.changingInstr(MI);
3445
3446 if (TypeIdx == 0)
3447 widenScalarDst(MI, WideTy);
3448 else
3449 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3450
3451 Observer.changedInstr(MI);
3452 return Legalized;
3453
3454 case TargetOpcode::G_INTTOPTR:
3455 if (TypeIdx != 1)
3456 return UnableToLegalize;
3457
3458 Observer.changingInstr(MI);
3459 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462 case TargetOpcode::G_PTRTOINT:
3463 if (TypeIdx != 0)
3464 return UnableToLegalize;
3465
3466 Observer.changingInstr(MI);
3467 widenScalarDst(MI, WideTy, 0);
3468 Observer.changedInstr(MI);
3469 return Legalized;
3470 case TargetOpcode::G_BUILD_VECTOR: {
3471 Observer.changingInstr(MI);
3472
3473 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3474 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3475 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3476
3477 // Avoid changing the result vector type if the source element type was
3478 // requested.
3479 if (TypeIdx == 1) {
3480 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3481 } else {
3482 widenScalarDst(MI, WideTy, 0);
3483 }
3484
3485 Observer.changedInstr(MI);
3486 return Legalized;
3487 }
3488 case TargetOpcode::G_SEXT_INREG:
3489 if (TypeIdx != 0)
3490 return UnableToLegalize;
3491
3492 Observer.changingInstr(MI);
3493 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3494 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3495 Observer.changedInstr(MI);
3496 return Legalized;
3497 case TargetOpcode::G_PTRMASK: {
3498 if (TypeIdx != 1)
3499 return UnableToLegalize;
3500 Observer.changingInstr(MI);
3501 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3502 Observer.changedInstr(MI);
3503 return Legalized;
3504 }
3505 case TargetOpcode::G_VECREDUCE_ADD: {
3506 if (TypeIdx != 1)
3507 return UnableToLegalize;
3508 Observer.changingInstr(MI);
3509 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3510 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3511 Observer.changedInstr(MI);
3512 return Legalized;
3513 }
3514 case TargetOpcode::G_VECREDUCE_FADD:
3515 case TargetOpcode::G_VECREDUCE_FMUL:
3516 case TargetOpcode::G_VECREDUCE_FMIN:
3517 case TargetOpcode::G_VECREDUCE_FMAX:
3518 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3519 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3520 if (TypeIdx != 0)
3521 return UnableToLegalize;
3522 Observer.changingInstr(MI);
3523 Register VecReg = MI.getOperand(1).getReg();
3524 LLT VecTy = MRI.getType(VecReg);
3525 LLT WideVecTy = VecTy.isVector()
3526 ? LLT::vector(VecTy.getElementCount(), WideTy)
3527 : WideTy;
3528 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3529 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3530 Observer.changedInstr(MI);
3531 return Legalized;
3532 }
3533 case TargetOpcode::G_VSCALE: {
3534 MachineOperand &SrcMO = MI.getOperand(1);
3535 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3536 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3537 // The CImm is always a signed value
3538 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3539 Observer.changingInstr(MI);
3540 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3541 widenScalarDst(MI, WideTy);
3542 Observer.changedInstr(MI);
3543 return Legalized;
3544 }
3545 case TargetOpcode::G_SPLAT_VECTOR: {
3546 if (TypeIdx != 1)
3547 return UnableToLegalize;
3548
3549 Observer.changingInstr(MI);
3550 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3551 Observer.changedInstr(MI);
3552 return Legalized;
3553 }
3554 case TargetOpcode::G_INSERT_SUBVECTOR: {
3555 if (TypeIdx != 0)
3556 return UnableToLegalize;
3557
3559 Register BigVec = IS.getBigVec();
3560 Register SubVec = IS.getSubVec();
3561
3562 LLT SubVecTy = MRI.getType(SubVec);
3563 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3564
3565 // Widen the G_INSERT_SUBVECTOR
3566 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3567 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3568 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3569 IS.getIndexImm());
3570
3571 // Truncate back down
3572 auto SplatZero = MIRBuilder.buildSplatVector(
3573 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3574 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3575 SplatZero);
3576
3577 MI.eraseFromParent();
3578
3579 return Legalized;
3580 }
3581 }
3582}
3583
3585 MachineIRBuilder &B, Register Src, LLT Ty) {
3586 auto Unmerge = B.buildUnmerge(Ty, Src);
3587 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3588 Pieces.push_back(Unmerge.getReg(I));
3589}
3590
3591static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3592 MachineIRBuilder &MIRBuilder) {
3593 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3594 MachineFunction &MF = MIRBuilder.getMF();
3595 const DataLayout &DL = MIRBuilder.getDataLayout();
3596 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3597 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3598 LLT DstLLT = MRI.getType(DstReg);
3599
3600 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3601
3602 auto Addr = MIRBuilder.buildConstantPool(
3603 AddrPtrTy,
3604 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3605
3606 MachineMemOperand *MMO =
3608 MachineMemOperand::MOLoad, DstLLT, Alignment);
3609
3610 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3611}
3612
3615 const MachineOperand &ConstOperand = MI.getOperand(1);
3616 const Constant *ConstantVal = ConstOperand.getCImm();
3617
3618 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3619 MI.eraseFromParent();
3620
3621 return Legalized;
3622}
3623
3626 const MachineOperand &ConstOperand = MI.getOperand(1);
3627 const Constant *ConstantVal = ConstOperand.getFPImm();
3628
3629 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3630 MI.eraseFromParent();
3631
3632 return Legalized;
3633}
3634
3637 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3638 if (SrcTy.isVector()) {
3639 LLT SrcEltTy = SrcTy.getElementType();
3641
3642 if (DstTy.isVector()) {
3643 int NumDstElt = DstTy.getNumElements();
3644 int NumSrcElt = SrcTy.getNumElements();
3645
3646 LLT DstEltTy = DstTy.getElementType();
3647 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3648 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3649
3650 // If there's an element size mismatch, insert intermediate casts to match
3651 // the result element type.
3652 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3653 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3654 //
3655 // =>
3656 //
3657 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3658 // %3:_(<2 x s8>) = G_BITCAST %2
3659 // %4:_(<2 x s8>) = G_BITCAST %3
3660 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3661 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3662 SrcPartTy = SrcEltTy;
3663 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3664 //
3665 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3666 //
3667 // =>
3668 //
3669 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3670 // %3:_(s16) = G_BITCAST %2
3671 // %4:_(s16) = G_BITCAST %3
3672 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3673 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3674 DstCastTy = DstEltTy;
3675 }
3676
3677 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3678 for (Register &SrcReg : SrcRegs)
3679 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3680 } else
3681 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3682
3683 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3684 MI.eraseFromParent();
3685 return Legalized;
3686 }
3687
3688 if (DstTy.isVector()) {
3690 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3691 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3692 MI.eraseFromParent();
3693 return Legalized;
3694 }
3695
3696 return UnableToLegalize;
3697}
3698
3699/// Figure out the bit offset into a register when coercing a vector index for
3700/// the wide element type. This is only for the case when promoting vector to
3701/// one with larger elements.
3702//
3703///
3704/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3705/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3707 Register Idx,
3708 unsigned NewEltSize,
3709 unsigned OldEltSize) {
3710 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3711 LLT IdxTy = B.getMRI()->getType(Idx);
3712
3713 // Now figure out the amount we need to shift to get the target bits.
3714 auto OffsetMask = B.buildConstant(
3715 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3716 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3717 return B.buildShl(IdxTy, OffsetIdx,
3718 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3719}
3720
3721/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3722/// is casting to a vector with a smaller element size, perform multiple element
3723/// extracts and merge the results. If this is coercing to a vector with larger
3724/// elements, index the bitcasted vector and extract the target element with bit
3725/// operations. This is intended to force the indexing in the native register
3726/// size for architectures that can dynamically index the register file.
3729 LLT CastTy) {
3730 if (TypeIdx != 1)
3731 return UnableToLegalize;
3732
3733 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3734
3735 LLT SrcEltTy = SrcVecTy.getElementType();
3736 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3737 unsigned OldNumElts = SrcVecTy.getNumElements();
3738
3739 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3740 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3741
3742 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3743 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3744 if (NewNumElts > OldNumElts) {
3745 // Decreasing the vector element size
3746 //
3747 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3748 // =>
3749 // v4i32:castx = bitcast x:v2i64
3750 //
3751 // i64 = bitcast
3752 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3753 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3754 //
3755 if (NewNumElts % OldNumElts != 0)
3756 return UnableToLegalize;
3757
3758 // Type of the intermediate result vector.
3759 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3760 LLT MidTy =
3761 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3762
3763 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3764
3765 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3766 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3767
3768 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3769 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3770 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3771 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3772 NewOps[I] = Elt.getReg(0);
3773 }
3774
3775 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3776 MIRBuilder.buildBitcast(Dst, NewVec);
3777 MI.eraseFromParent();
3778 return Legalized;
3779 }
3780
3781 if (NewNumElts < OldNumElts) {
3782 if (NewEltSize % OldEltSize != 0)
3783 return UnableToLegalize;
3784
3785 // This only depends on powers of 2 because we use bit tricks to figure out
3786 // the bit offset we need to shift to get the target element. A general
3787 // expansion could emit division/multiply.
3788 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3789 return UnableToLegalize;
3790
3791 // Increasing the vector element size.
3792 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3793 //
3794 // =>
3795 //
3796 // %cast = G_BITCAST %vec
3797 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3798 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3799 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3800 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3801 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3802 // %elt = G_TRUNC %elt_bits
3803
3804 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3805 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3806
3807 // Divide to get the index in the wider element type.
3808 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3809
3810 Register WideElt = CastVec;
3811 if (CastTy.isVector()) {
3812 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3813 ScaledIdx).getReg(0);
3814 }
3815
3816 // Compute the bit offset into the register of the target element.
3818 MIRBuilder, Idx, NewEltSize, OldEltSize);
3819
3820 // Shift the wide element to get the target element.
3821 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3822 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3823 MI.eraseFromParent();
3824 return Legalized;
3825 }
3826
3827 return UnableToLegalize;
3828}
3829
3830/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3831/// TargetReg, while preserving other bits in \p TargetReg.
3832///
3833/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3835 Register TargetReg, Register InsertReg,
3836 Register OffsetBits) {
3837 LLT TargetTy = B.getMRI()->getType(TargetReg);
3838 LLT InsertTy = B.getMRI()->getType(InsertReg);
3839 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3840 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3841
3842 // Produce a bitmask of the value to insert
3843 auto EltMask = B.buildConstant(
3844 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3845 InsertTy.getSizeInBits()));
3846 // Shift it into position
3847 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3848 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3849
3850 // Clear out the bits in the wide element
3851 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3852
3853 // The value to insert has all zeros already, so stick it into the masked
3854 // wide element.
3855 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3856}
3857
3858/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3859/// is increasing the element size, perform the indexing in the target element
3860/// type, and use bit operations to insert at the element position. This is
3861/// intended for architectures that can dynamically index the register file and
3862/// want to force indexing in the native register size.
3865 LLT CastTy) {
3866 if (TypeIdx != 0)
3867 return UnableToLegalize;
3868
3869 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3870 MI.getFirst4RegLLTs();
3871 LLT VecTy = DstTy;
3872
3873 LLT VecEltTy = VecTy.getElementType();
3874 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3875 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3876 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3877
3878 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3879 unsigned OldNumElts = VecTy.getNumElements();
3880
3881 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3882 if (NewNumElts < OldNumElts) {
3883 if (NewEltSize % OldEltSize != 0)
3884 return UnableToLegalize;
3885
3886 // This only depends on powers of 2 because we use bit tricks to figure out
3887 // the bit offset we need to shift to get the target element. A general
3888 // expansion could emit division/multiply.
3889 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3890 return UnableToLegalize;
3891
3892 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3893 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3894
3895 // Divide to get the index in the wider element type.
3896 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3897
3898 Register ExtractedElt = CastVec;
3899 if (CastTy.isVector()) {
3900 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3901 ScaledIdx).getReg(0);
3902 }
3903
3904 // Compute the bit offset into the register of the target element.
3906 MIRBuilder, Idx, NewEltSize, OldEltSize);
3907
3908 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3909 Val, OffsetBits);
3910 if (CastTy.isVector()) {
3911 InsertedElt = MIRBuilder.buildInsertVectorElement(
3912 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3913 }
3914
3915 MIRBuilder.buildBitcast(Dst, InsertedElt);
3916 MI.eraseFromParent();
3917 return Legalized;
3918 }
3919
3920 return UnableToLegalize;
3921}
3922
3923// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3924// those that have smaller than legal operands.
3925//
3926// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3927//
3928// ===>
3929//
3930// s32 = G_BITCAST <4 x s8>
3931// s32 = G_BITCAST <4 x s8>
3932// s32 = G_BITCAST <4 x s8>
3933// s32 = G_BITCAST <4 x s8>
3934// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3935// <16 x s8> = G_BITCAST <4 x s32>
3938 LLT CastTy) {
3939 // Convert it to CONCAT instruction
3940 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3941 if (!ConcatMI) {
3942 return UnableToLegalize;
3943 }
3944
3945 // Check if bitcast is Legal
3946 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3947 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3948
3949 // Check if the build vector is Legal
3950 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3951 return UnableToLegalize;
3952 }
3953
3954 // Bitcast the sources
3955 SmallVector<Register> BitcastRegs;
3956 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3957 BitcastRegs.push_back(
3958 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3959 .getReg(0));
3960 }
3961
3962 // Build the scalar values into a vector
3963 Register BuildReg =
3964 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3965 MIRBuilder.buildBitcast(DstReg, BuildReg);
3966
3967 MI.eraseFromParent();
3968 return Legalized;
3969}
3970
3971// This bitcasts a shuffle vector to a different type currently of the same
3972// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3973// will be used instead.
3974//
3975// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3976// ===>
3977// <4 x s64> = G_PTRTOINT <4 x p0>
3978// <4 x s64> = G_PTRTOINT <4 x p0>
3979// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3980// <16 x p0> = G_INTTOPTR <16 x s64>
3983 LLT CastTy) {
3984 auto ShuffleMI = cast<GShuffleVector>(&MI);
3985 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3986 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3987
3988 // We currently only handle vectors of the same size.
3989 if (TypeIdx != 0 ||
3990 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3991 CastTy.getElementCount() != DstTy.getElementCount())
3992 return UnableToLegalize;
3993
3994 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3995
3996 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3997 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3998 auto Shuf =
3999 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4000 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4001
4002 MI.eraseFromParent();
4003 return Legalized;
4004}
4005
4006/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4007///
4008/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4009///
4010/// ===>
4011///
4012/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4013/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4014/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4017 LLT CastTy) {
4018 auto ES = cast<GExtractSubvector>(&MI);
4019
4020 if (!CastTy.isVector())
4021 return UnableToLegalize;
4022
4023 if (TypeIdx != 0)
4024 return UnableToLegalize;
4025
4026 Register Dst = ES->getReg(0);
4027 Register Src = ES->getSrcVec();
4028 uint64_t Idx = ES->getIndexImm();
4029
4030 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4031
4032 LLT DstTy = MRI.getType(Dst);
4033 LLT SrcTy = MRI.getType(Src);
4034 ElementCount DstTyEC = DstTy.getElementCount();
4035 ElementCount SrcTyEC = SrcTy.getElementCount();
4036 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4037 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4038
4039 if (DstTy == CastTy)
4040 return Legalized;
4041
4042 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4043 return UnableToLegalize;
4044
4045 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4046 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4047 if (CastEltSize < DstEltSize)
4048 return UnableToLegalize;
4049
4050 auto AdjustAmt = CastEltSize / DstEltSize;
4051 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4052 SrcTyMinElts % AdjustAmt != 0)
4053 return UnableToLegalize;
4054
4055 Idx /= AdjustAmt;
4056 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4057 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4058 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4059 MIRBuilder.buildBitcast(Dst, PromotedES);
4060
4061 ES->eraseFromParent();
4062 return Legalized;
4063}
4064
4065/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4066///
4067/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4068/// <vscale x 8 x i1>,
4069/// N
4070///
4071/// ===>
4072///
4073/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4074/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4075/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4076/// <vscale x 1 x i8>, N / 8
4077/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4080 LLT CastTy) {
4081 auto ES = cast<GInsertSubvector>(&MI);
4082
4083 if (!CastTy.isVector())
4084 return UnableToLegalize;
4085
4086 if (TypeIdx != 0)
4087 return UnableToLegalize;
4088
4089 Register Dst = ES->getReg(0);
4090 Register BigVec = ES->getBigVec();
4091 Register SubVec = ES->getSubVec();
4092 uint64_t Idx = ES->getIndexImm();
4093
4094 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4095
4096 LLT DstTy = MRI.getType(Dst);
4097 LLT BigVecTy = MRI.getType(BigVec);
4098 LLT SubVecTy = MRI.getType(SubVec);
4099
4100 if (DstTy == CastTy)
4101 return Legalized;
4102
4103 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4104 return UnableToLegalize;
4105
4106 ElementCount DstTyEC = DstTy.getElementCount();
4107 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4108 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4109 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4110 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4111 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4112
4113 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4114 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4115 if (CastEltSize < DstEltSize)
4116 return UnableToLegalize;
4117
4118 auto AdjustAmt = CastEltSize / DstEltSize;
4119 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4120 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4121 return UnableToLegalize;
4122
4123 Idx /= AdjustAmt;
4124 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4125 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4126 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4127 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4128 auto PromotedIS =
4129 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4130 MIRBuilder.buildBitcast(Dst, PromotedIS);
4131
4132 ES->eraseFromParent();
4133 return Legalized;
4134}
4135
4137 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4138 Register DstReg = LoadMI.getDstReg();
4139 Register PtrReg = LoadMI.getPointerReg();
4140 LLT DstTy = MRI.getType(DstReg);
4141 MachineMemOperand &MMO = LoadMI.getMMO();
4142 LLT MemTy = MMO.getMemoryType();
4143 MachineFunction &MF = MIRBuilder.getMF();
4144
4145 unsigned MemSizeInBits = MemTy.getSizeInBits();
4146 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4147
4148 if (MemSizeInBits != MemStoreSizeInBits) {
4149 if (MemTy.isVector())
4150 return UnableToLegalize;
4151
4152 // Promote to a byte-sized load if not loading an integral number of
4153 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4154 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4155 MachineMemOperand *NewMMO =
4156 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4157
4158 Register LoadReg = DstReg;
4159 LLT LoadTy = DstTy;
4160
4161 // If this wasn't already an extending load, we need to widen the result
4162 // register to avoid creating a load with a narrower result than the source.
4163 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4164 LoadTy = WideMemTy;
4165 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4166 }
4167
4168 if (isa<GSExtLoad>(LoadMI)) {
4169 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4170 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4171 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4172 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4173 // The extra bits are guaranteed to be zero, since we stored them that
4174 // way. A zext load from Wide thus automatically gives zext from MemVT.
4175 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4176 } else {
4177 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4178 }
4179
4180 if (DstTy != LoadTy)
4181 MIRBuilder.buildTrunc(DstReg, LoadReg);
4182
4183 LoadMI.eraseFromParent();
4184 return Legalized;
4185 }
4186
4187 // Big endian lowering not implemented.
4188 if (MIRBuilder.getDataLayout().isBigEndian())
4189 return UnableToLegalize;
4190
4191 // This load needs splitting into power of 2 sized loads.
4192 //
4193 // Our strategy here is to generate anyextending loads for the smaller
4194 // types up to next power-2 result type, and then combine the two larger
4195 // result values together, before truncating back down to the non-pow-2
4196 // type.
4197 // E.g. v1 = i24 load =>
4198 // v2 = i32 zextload (2 byte)
4199 // v3 = i32 load (1 byte)
4200 // v4 = i32 shl v3, 16
4201 // v5 = i32 or v4, v2
4202 // v1 = i24 trunc v5
4203 // By doing this we generate the correct truncate which should get
4204 // combined away as an artifact with a matching extend.
4205
4206 uint64_t LargeSplitSize, SmallSplitSize;
4207
4208 if (!isPowerOf2_32(MemSizeInBits)) {
4209 // This load needs splitting into power of 2 sized loads.
4210 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4211 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4212 } else {
4213 // This is already a power of 2, but we still need to split this in half.
4214 //
4215 // Assume we're being asked to decompose an unaligned load.
4216 // TODO: If this requires multiple splits, handle them all at once.
4217 auto &Ctx = MF.getFunction().getContext();
4218 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4219 return UnableToLegalize;
4220
4221 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4222 }
4223
4224 if (MemTy.isVector()) {
4225 // TODO: Handle vector extloads
4226 if (MemTy != DstTy)
4227 return UnableToLegalize;
4228
4229 Align Alignment = LoadMI.getAlign();
4230 // Given an alignment larger than the size of the memory, we can increase
4231 // the size of the load without needing to scalarize it.
4232 if (Alignment.value() * 8 > MemSizeInBits &&
4235 DstTy.getElementType());
4236 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4237 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4238 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4239 NewLoad.getReg(0));
4240 LoadMI.eraseFromParent();
4241 return Legalized;
4242 }
4243
4244 // TODO: We can do better than scalarizing the vector and at least split it
4245 // in half.
4246 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4247 }
4248
4249 MachineMemOperand *LargeMMO =
4250 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4251 MachineMemOperand *SmallMMO =
4252 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4253
4254 LLT PtrTy = MRI.getType(PtrReg);
4255 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4256 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4257 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4258 PtrReg, *LargeMMO);
4259
4260 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4261 LargeSplitSize / 8);
4262 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4263 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4264 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4265 SmallPtr, *SmallMMO);
4266
4267 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4268 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4269
4270 if (AnyExtTy == DstTy)
4271 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4272 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4273 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4274 MIRBuilder.buildTrunc(DstReg, {Or});
4275 } else {
4276 assert(DstTy.isPointer() && "expected pointer");
4277 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4278
4279 // FIXME: We currently consider this to be illegal for non-integral address
4280 // spaces, but we need still need a way to reinterpret the bits.
4281 MIRBuilder.buildIntToPtr(DstReg, Or);
4282 }
4283
4284 LoadMI.eraseFromParent();
4285 return Legalized;
4286}
4287
4289 // Lower a non-power of 2 store into multiple pow-2 stores.
4290 // E.g. split an i24 store into an i16 store + i8 store.
4291 // We do this by first extending the stored value to the next largest power
4292 // of 2 type, and then using truncating stores to store the components.
4293 // By doing this, likewise with G_LOAD, generate an extend that can be
4294 // artifact-combined away instead of leaving behind extracts.
4295 Register SrcReg = StoreMI.getValueReg();
4296 Register PtrReg = StoreMI.getPointerReg();
4297 LLT SrcTy = MRI.getType(SrcReg);
4298 MachineFunction &MF = MIRBuilder.getMF();
4299 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4300 LLT MemTy = MMO.getMemoryType();
4301
4302 unsigned StoreWidth = MemTy.getSizeInBits();
4303 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4304
4305 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4306 // Promote to a byte-sized store with upper bits zero if not
4307 // storing an integral number of bytes. For example, promote
4308 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4309 LLT WideTy = LLT::scalar(StoreSizeInBits);
4310
4311 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4312 // Avoid creating a store with a narrower source than result.
4313 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4314 SrcTy = WideTy;
4315 }
4316
4317 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4318
4319 MachineMemOperand *NewMMO =
4320 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4321 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4322 StoreMI.eraseFromParent();
4323 return Legalized;
4324 }
4325
4326 if (MemTy.isVector()) {
4327 if (MemTy != SrcTy)
4328 return scalarizeVectorBooleanStore(StoreMI);
4329
4330 // TODO: We can do better than scalarizing the vector and at least split it
4331 // in half.
4332 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4333 }
4334
4335 unsigned MemSizeInBits = MemTy.getSizeInBits();
4336 uint64_t LargeSplitSize, SmallSplitSize;
4337
4338 if (!isPowerOf2_32(MemSizeInBits)) {
4339 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4340 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4341 } else {
4342 auto &Ctx = MF.getFunction().getContext();
4343 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4344 return UnableToLegalize; // Don't know what we're being asked to do.
4345
4346 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4347 }
4348
4349 // Extend to the next pow-2. If this store was itself the result of lowering,
4350 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4351 // that's wider than the stored size.
4352 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4353 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4354
4355 if (SrcTy.isPointer()) {
4356 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4357 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4358 }
4359
4360 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4361
4362 // Obtain the smaller value by shifting away the larger value.
4363 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4364 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4365
4366 // Generate the PtrAdd and truncating stores.
4367 LLT PtrTy = MRI.getType(PtrReg);
4368 auto OffsetCst = MIRBuilder.buildConstant(
4369 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4370 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4371
4372 MachineMemOperand *LargeMMO =
4373 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4374 MachineMemOperand *SmallMMO =
4375 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4376 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4377 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4378 StoreMI.eraseFromParent();
4379 return Legalized;
4380}
4381
4384 Register SrcReg = StoreMI.getValueReg();
4385 Register PtrReg = StoreMI.getPointerReg();
4386 LLT SrcTy = MRI.getType(SrcReg);
4387 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4388 LLT MemTy = MMO.getMemoryType();
4389 LLT MemScalarTy = MemTy.getElementType();
4390 MachineFunction &MF = MIRBuilder.getMF();
4391
4392 assert(SrcTy.isVector() && "Expect a vector store type");
4393
4394 if (!MemScalarTy.isByteSized()) {
4395 // We need to build an integer scalar of the vector bit pattern.
4396 // It's not legal for us to add padding when storing a vector.
4397 unsigned NumBits = MemTy.getSizeInBits();
4398 LLT IntTy = LLT::scalar(NumBits);
4399 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4400 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4401
4402 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4403 auto Elt = MIRBuilder.buildExtractVectorElement(
4404 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4405 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4406 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4407 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4408 ? (MemTy.getNumElements() - 1) - I
4409 : I;
4410 auto ShiftAmt = MIRBuilder.buildConstant(
4411 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4412 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4413 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4414 }
4415 auto PtrInfo = MMO.getPointerInfo();
4416 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4417 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4418 StoreMI.eraseFromParent();
4419 return Legalized;
4420 }
4421
4422 // TODO: implement simple scalarization.
4423 return UnableToLegalize;
4424}
4425
4427LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4428 switch (MI.getOpcode()) {
4429 case TargetOpcode::G_LOAD: {
4430 if (TypeIdx != 0)
4431 return UnableToLegalize;
4432 MachineMemOperand &MMO = **MI.memoperands_begin();
4433
4434 // Not sure how to interpret a bitcast of an extending load.
4435 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4436 return UnableToLegalize;
4437
4438 Observer.changingInstr(MI);
4439 bitcastDst(MI, CastTy, 0);
4440 MMO.setType(CastTy);
4441 // The range metadata is no longer valid when reinterpreted as a different
4442 // type.
4443 MMO.clearRanges();
4444 Observer.changedInstr(MI);
4445 return Legalized;
4446 }
4447 case TargetOpcode::G_STORE: {
4448 if (TypeIdx != 0)
4449 return UnableToLegalize;
4450
4451 MachineMemOperand &MMO = **MI.memoperands_begin();
4452
4453 // Not sure how to interpret a bitcast of a truncating store.
4454 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4455 return UnableToLegalize;
4456
4457 Observer.changingInstr(MI);
4458 bitcastSrc(MI, CastTy, 0);
4459 MMO.setType(CastTy);
4460 Observer.changedInstr(MI);
4461 return Legalized;
4462 }
4463 case TargetOpcode::G_SELECT: {
4464 if (TypeIdx != 0)
4465 return UnableToLegalize;
4466
4467 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4468 LLVM_DEBUG(
4469 dbgs() << "bitcast action not implemented for vector select\n");
4470 return UnableToLegalize;
4471 }
4472
4473 Observer.changingInstr(MI);
4474 bitcastSrc(MI, CastTy, 2);
4475 bitcastSrc(MI, CastTy, 3);
4476 bitcastDst(MI, CastTy, 0);
4477 Observer.changedInstr(MI);
4478 return Legalized;
4479 }
4480 case TargetOpcode::G_AND:
4481 case TargetOpcode::G_OR:
4482 case TargetOpcode::G_XOR: {
4483 Observer.changingInstr(MI);
4484 bitcastSrc(MI, CastTy, 1);
4485 bitcastSrc(MI, CastTy, 2);
4486 bitcastDst(MI, CastTy, 0);
4487 Observer.changedInstr(MI);
4488 return Legalized;
4489 }
4490 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4491 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4492 case TargetOpcode::G_INSERT_VECTOR_ELT:
4493 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4494 case TargetOpcode::G_CONCAT_VECTORS:
4495 return bitcastConcatVector(MI, TypeIdx, CastTy);
4496 case TargetOpcode::G_SHUFFLE_VECTOR:
4497 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4498 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4499 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4500 case TargetOpcode::G_INSERT_SUBVECTOR:
4501 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4502 default:
4503 return UnableToLegalize;
4504 }
4505}
4506
4507// Legalize an instruction by changing the opcode in place.
4508void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4510 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4512}
4513
4515LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4516 using namespace TargetOpcode;
4517
4518 switch(MI.getOpcode()) {
4519 default:
4520 return UnableToLegalize;
4521 case TargetOpcode::G_FCONSTANT:
4522 return lowerFConstant(MI);
4523 case TargetOpcode::G_BITCAST:
4524 return lowerBitcast(MI);
4525 case TargetOpcode::G_SREM:
4526 case TargetOpcode::G_UREM: {
4527 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4528 auto Quot =
4529 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4530 {MI.getOperand(1), MI.getOperand(2)});
4531
4532 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4533 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4534 MI.eraseFromParent();
4535 return Legalized;
4536 }
4537 case TargetOpcode::G_SADDO:
4538 case TargetOpcode::G_SSUBO:
4539 return lowerSADDO_SSUBO(MI);
4540 case TargetOpcode::G_SADDE:
4541 return lowerSADDE(MI);
4542 case TargetOpcode::G_SSUBE:
4543 return lowerSSUBE(MI);
4544 case TargetOpcode::G_UMULH:
4545 case TargetOpcode::G_SMULH:
4546 return lowerSMULH_UMULH(MI);
4547 case TargetOpcode::G_SMULO:
4548 case TargetOpcode::G_UMULO: {
4549 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4550 // result.
4551 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4552 LLT Ty = MRI.getType(Res);
4553
4554 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4555 ? TargetOpcode::G_SMULH
4556 : TargetOpcode::G_UMULH;
4557
4558 Observer.changingInstr(MI);
4559 const auto &TII = MIRBuilder.getTII();
4560 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4561 MI.removeOperand(1);
4562 Observer.changedInstr(MI);
4563
4564 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4565 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4566
4567 // Move insert point forward so we can use the Res register if needed.
4568 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4569
4570 // For *signed* multiply, overflow is detected by checking:
4571 // (hi != (lo >> bitwidth-1))
4572 if (Opcode == TargetOpcode::G_SMULH) {
4573 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4574 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4575 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4576 } else {
4577 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4578 }
4579 return Legalized;
4580 }
4581 case TargetOpcode::G_FNEG: {
4582 auto [Res, SubByReg] = MI.getFirst2Regs();
4583 LLT Ty = MRI.getType(Res);
4584
4585 auto SignMask = MIRBuilder.buildConstant(
4586 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4587 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4588 MI.eraseFromParent();
4589 return Legalized;
4590 }
4591 case TargetOpcode::G_FSUB:
4592 case TargetOpcode::G_STRICT_FSUB: {
4593 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4594 LLT Ty = MRI.getType(Res);
4595
4596 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4597 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4598
4599 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4600 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4601 else
4602 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4603
4604 MI.eraseFromParent();
4605 return Legalized;
4606 }
4607 case TargetOpcode::G_FMAD:
4608 return lowerFMad(MI);
4609 case TargetOpcode::G_FFLOOR:
4610 return lowerFFloor(MI);
4611 case TargetOpcode::G_LROUND:
4612 case TargetOpcode::G_LLROUND: {
4613 Register DstReg = MI.getOperand(0).getReg();
4614 Register SrcReg = MI.getOperand(1).getReg();
4615 LLT SrcTy = MRI.getType(SrcReg);
4616 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4617 {SrcReg});
4618 MIRBuilder.buildFPTOSI(DstReg, Round);
4619 MI.eraseFromParent();
4620 return Legalized;
4621 }
4622 case TargetOpcode::G_INTRINSIC_ROUND:
4623 return lowerIntrinsicRound(MI);
4624 case TargetOpcode::G_FRINT: {
4625 // Since round even is the assumed rounding mode for unconstrained FP
4626 // operations, rint and roundeven are the same operation.
4627 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4628 return Legalized;
4629 }
4630 case TargetOpcode::G_INTRINSIC_LRINT:
4631 case TargetOpcode::G_INTRINSIC_LLRINT: {
4632 Register DstReg = MI.getOperand(0).getReg();
4633 Register SrcReg = MI.getOperand(1).getReg();
4634 LLT SrcTy = MRI.getType(SrcReg);
4635 auto Round =
4636 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4637 MIRBuilder.buildFPTOSI(DstReg, Round);
4638 MI.eraseFromParent();
4639 return Legalized;
4640 }
4641 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4642 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4643 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4644 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4645 **MI.memoperands_begin());
4646 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4647 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4648 MI.eraseFromParent();
4649 return Legalized;
4650 }
4651 case TargetOpcode::G_LOAD:
4652 case TargetOpcode::G_SEXTLOAD:
4653 case TargetOpcode::G_ZEXTLOAD:
4654 return lowerLoad(cast<GAnyLoad>(MI));
4655 case TargetOpcode::G_STORE:
4656 return lowerStore(cast<GStore>(MI));
4657 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4658 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4659 case TargetOpcode::G_CTLZ:
4660 case TargetOpcode::G_CTTZ:
4661 case TargetOpcode::G_CTPOP:
4662 return lowerBitCount(MI);
4663 case G_UADDO: {
4664 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4665
4666 Register NewRes = MRI.cloneVirtualRegister(Res);
4667
4668 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4669 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4670
4671 MIRBuilder.buildCopy(Res, NewRes);
4672
4673 MI.eraseFromParent();
4674 return Legalized;
4675 }
4676 case G_UADDE: {
4677 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4678 const LLT CondTy = MRI.getType(CarryOut);
4679 const LLT Ty = MRI.getType(Res);
4680
4681 Register NewRes = MRI.cloneVirtualRegister(Res);
4682
4683 // Initial add of the two operands.
4684 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4685
4686 // Initial check for carry.
4687 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4688
4689 // Add the sum and the carry.
4690 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4691 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4692
4693 // Second check for carry. We can only carry if the initial sum is all 1s
4694 // and the carry is set, resulting in a new sum of 0.
4695 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4696 auto ResEqZero =
4697 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4698 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4699 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4700
4701 MIRBuilder.buildCopy(Res, NewRes);
4702
4703 MI.eraseFromParent();
4704 return Legalized;
4705 }
4706 case G_USUBO: {
4707 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4708
4709 MIRBuilder.buildSub(Res, LHS, RHS);
4710 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4711
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715 case G_USUBE: {
4716 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4717 const LLT CondTy = MRI.getType(BorrowOut);
4718 const LLT Ty = MRI.getType(Res);
4719
4720 // Initial subtract of the two operands.
4721 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4722
4723 // Initial check for borrow.
4724 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4725
4726 // Subtract the borrow from the first subtract.
4727 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4728 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4729
4730 // Second check for borrow. We can only borrow if the initial difference is
4731 // 0 and the borrow is set, resulting in a new difference of all 1s.
4732 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4733 auto TmpResEqZero =
4734 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4735 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4736 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4737
4738 MI.eraseFromParent();
4739 return Legalized;
4740 }
4741 case G_UITOFP:
4742 return lowerUITOFP(MI);
4743 case G_SITOFP:
4744 return lowerSITOFP(MI);
4745 case G_FPTOUI:
4746 return lowerFPTOUI(MI);
4747 case G_FPTOSI:
4748 return lowerFPTOSI(MI);
4749 case G_FPTOUI_SAT:
4750 case G_FPTOSI_SAT:
4751 return lowerFPTOINT_SAT(MI);
4752 case G_FPTRUNC:
4753 return lowerFPTRUNC(MI);
4754 case G_FPOWI:
4755 return lowerFPOWI(MI);
4756 case G_SMIN:
4757 case G_SMAX:
4758 case G_UMIN:
4759 case G_UMAX:
4760 return lowerMinMax(MI);
4761 case G_SCMP:
4762 case G_UCMP:
4763 return lowerThreewayCompare(MI);
4764 case G_FCOPYSIGN:
4765 return lowerFCopySign(MI);
4766 case G_FMINNUM:
4767 case G_FMAXNUM:
4768 case G_FMINIMUMNUM:
4769 case G_FMAXIMUMNUM:
4770 return lowerFMinNumMaxNum(MI);
4771 case G_FMINIMUM:
4772 case G_FMAXIMUM:
4773 return lowerFMinimumMaximum(MI);
4774 case G_MERGE_VALUES:
4775 return lowerMergeValues(MI);
4776 case G_UNMERGE_VALUES:
4777 return lowerUnmergeValues(MI);
4778 case TargetOpcode::G_SEXT_INREG: {
4779 assert(MI.getOperand(2).isImm() && "Expected immediate");
4780 int64_t SizeInBits = MI.getOperand(2).getImm();
4781
4782 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4783 LLT DstTy = MRI.getType(DstReg);
4784 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4785
4786 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4787 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4788 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4789 MI.eraseFromParent();
4790 return Legalized;
4791 }
4792 case G_EXTRACT_VECTOR_ELT:
4793 case G_INSERT_VECTOR_ELT:
4795 case G_SHUFFLE_VECTOR:
4796 return lowerShuffleVector(MI);
4797 case G_VECTOR_COMPRESS:
4798 return lowerVECTOR_COMPRESS(MI);
4799 case G_DYN_STACKALLOC:
4800 return lowerDynStackAlloc(MI);
4801 case G_STACKSAVE:
4802 return lowerStackSave(MI);
4803 case G_STACKRESTORE:
4804 return lowerStackRestore(MI);
4805 case G_EXTRACT:
4806 return lowerExtract(MI);
4807 case G_INSERT:
4808 return lowerInsert(MI);
4809 case G_BSWAP:
4810 return lowerBswap(MI);
4811 case G_BITREVERSE:
4812 return lowerBitreverse(MI);
4813 case G_READ_REGISTER:
4814 case G_WRITE_REGISTER:
4815 return lowerReadWriteRegister(MI);
4816 case G_UADDSAT:
4817 case G_USUBSAT: {
4818 // Try to make a reasonable guess about which lowering strategy to use. The
4819 // target can override this with custom lowering and calling the
4820 // implementation functions.
4821 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4822 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4823 return lowerAddSubSatToMinMax(MI);
4825 }
4826 case G_SADDSAT:
4827 case G_SSUBSAT: {
4828 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4829
4830 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4831 // since it's a shorter expansion. However, we would need to figure out the
4832 // preferred boolean type for the carry out for the query.
4833 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4834 return lowerAddSubSatToMinMax(MI);
4836 }
4837 case G_SSHLSAT:
4838 case G_USHLSAT:
4839 return lowerShlSat(MI);
4840 case G_ABS:
4841 return lowerAbsToAddXor(MI);
4842 case G_ABDS:
4843 case G_ABDU: {
4844 bool IsSigned = MI.getOpcode() == G_ABDS;
4845 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4846 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4847 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4848 return lowerAbsDiffToMinMax(MI);
4849 }
4850 return lowerAbsDiffToSelect(MI);
4851 }
4852 case G_FABS:
4853 return lowerFAbs(MI);
4854 case G_SELECT:
4855 return lowerSelect(MI);
4856 case G_IS_FPCLASS:
4857 return lowerISFPCLASS(MI);
4858 case G_SDIVREM:
4859 case G_UDIVREM:
4860 return lowerDIVREM(MI);
4861 case G_FSHL:
4862 case G_FSHR:
4863 return lowerFunnelShift(MI);
4864 case G_ROTL:
4865 case G_ROTR:
4866 return lowerRotate(MI);
4867 case G_MEMSET:
4868 case G_MEMCPY:
4869 case G_MEMMOVE:
4870 return lowerMemCpyFamily(MI);
4871 case G_MEMCPY_INLINE:
4872 return lowerMemcpyInline(MI);
4873 case G_ZEXT:
4874 case G_SEXT:
4875 case G_ANYEXT:
4876 return lowerEXT(MI);
4877 case G_TRUNC:
4878 return lowerTRUNC(MI);
4880 return lowerVectorReduction(MI);
4881 case G_VAARG:
4882 return lowerVAArg(MI);
4883 case G_ATOMICRMW_SUB: {
4884 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4885 const LLT ValTy = MRI.getType(Val);
4886 MachineMemOperand *MMO = *MI.memoperands_begin();
4887
4888 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4889 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4890 MI.eraseFromParent();
4891 return Legalized;
4892 }
4893 }
4894}
4895
4897 Align MinAlign) const {
4898 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4899 // datalayout for the preferred alignment. Also there should be a target hook
4900 // for this to allow targets to reduce the alignment and ignore the
4901 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4902 // the type.
4903 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4904}
4905
4908 MachinePointerInfo &PtrInfo) {
4909 MachineFunction &MF = MIRBuilder.getMF();
4910 const DataLayout &DL = MIRBuilder.getDataLayout();
4911 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4912
4913 unsigned AddrSpace = DL.getAllocaAddrSpace();
4914 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4915
4916 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4917 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4918}
4919
4921 const SrcOp &Val) {
4922 LLT SrcTy = Val.getLLTTy(MRI);
4923 Align StackTypeAlign =
4924 std::max(getStackTemporaryAlignment(SrcTy),
4926 MachinePointerInfo PtrInfo;
4927 auto StackTemp =
4928 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4929
4930 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4931 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4932}
4933
4935 LLT VecTy) {
4936 LLT IdxTy = B.getMRI()->getType(IdxReg);
4937 unsigned NElts = VecTy.getNumElements();
4938
4939 int64_t IdxVal;
4940 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4941 if (IdxVal < VecTy.getNumElements())
4942 return IdxReg;
4943 // If a constant index would be out of bounds, clamp it as well.
4944 }
4945
4946 if (isPowerOf2_32(NElts)) {
4947 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4948 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4949 }
4950
4951 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4952 .getReg(0);
4953}
4954
4956 Register Index) {
4957 LLT EltTy = VecTy.getElementType();
4958
4959 // Calculate the element offset and add it to the pointer.
4960 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4961 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4962 "Converting bits to bytes lost precision");
4963
4964 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4965
4966 // Convert index to the correct size for the address space.
4967 const DataLayout &DL = MIRBuilder.getDataLayout();
4968 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4969 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4970 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4971 if (IdxTy != MRI.getType(Index))
4972 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4973
4974 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4975 MIRBuilder.buildConstant(IdxTy, EltSize));
4976
4977 LLT PtrTy = MRI.getType(VecPtr);
4978 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4979}
4980
4981#ifndef NDEBUG
4982/// Check that all vector operands have same number of elements. Other operands
4983/// should be listed in NonVecOp.
4986 std::initializer_list<unsigned> NonVecOpIndices) {
4987 if (MI.getNumMemOperands() != 0)
4988 return false;
4989
4990 LLT VecTy = MRI.getType(MI.getReg(0));
4991 if (!VecTy.isVector())
4992 return false;
4993 unsigned NumElts = VecTy.getNumElements();
4994
4995 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4996 MachineOperand &Op = MI.getOperand(OpIdx);
4997 if (!Op.isReg()) {
4998 if (!is_contained(NonVecOpIndices, OpIdx))
4999 return false;
5000 continue;
5001 }
5002
5003 LLT Ty = MRI.getType(Op.getReg());
5004 if (!Ty.isVector()) {
5005 if (!is_contained(NonVecOpIndices, OpIdx))
5006 return false;
5007 continue;
5008 }
5009
5010 if (Ty.getNumElements() != NumElts)
5011 return false;
5012 }
5013
5014 return true;
5015}
5016#endif
5017
5018/// Fill \p DstOps with DstOps that have same number of elements combined as
5019/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5020/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5021/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5022static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5023 unsigned NumElts) {
5024 LLT LeftoverTy;
5025 assert(Ty.isVector() && "Expected vector type");
5026 LLT EltTy = Ty.getElementType();
5027 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
5028 int NumParts, NumLeftover;
5029 std::tie(NumParts, NumLeftover) =
5030 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5031
5032 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5033 for (int i = 0; i < NumParts; ++i) {
5034 DstOps.push_back(NarrowTy);
5035 }
5036
5037 if (LeftoverTy.isValid()) {
5038 assert(NumLeftover == 1 && "expected exactly one leftover");
5039 DstOps.push_back(LeftoverTy);
5040 }
5041}
5042
5043/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5044/// made from \p Op depending on operand type.
5046 MachineOperand &Op) {
5047 for (unsigned i = 0; i < N; ++i) {
5048 if (Op.isReg())
5049 Ops.push_back(Op.getReg());
5050 else if (Op.isImm())
5051 Ops.push_back(Op.getImm());
5052 else if (Op.isPredicate())
5053 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5054 else
5055 llvm_unreachable("Unsupported type");
5056 }
5057}
5058
5059// Handle splitting vector operations which need to have the same number of
5060// elements in each type index, but each type index may have a different element
5061// type.
5062//
5063// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5064// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5065// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5066//
5067// Also handles some irregular breakdown cases, e.g.
5068// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5069// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5070// s64 = G_SHL s64, s32
5073 GenericMachineInstr &MI, unsigned NumElts,
5074 std::initializer_list<unsigned> NonVecOpIndices) {
5075 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5076 "Non-compatible opcode or not specified non-vector operands");
5077 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5078
5079 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5080 unsigned NumDefs = MI.getNumDefs();
5081
5082 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5083 // Build instructions with DstOps to use instruction found by CSE directly.
5084 // CSE copies found instruction into given vreg when building with vreg dest.
5085 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5086 // Output registers will be taken from created instructions.
5087 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5088 for (unsigned i = 0; i < NumDefs; ++i) {
5089 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5090 }
5091
5092 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5093 // Operands listed in NonVecOpIndices will be used as is without splitting;
5094 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5095 // scalar condition (op 1), immediate in sext_inreg (op 2).
5096 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5097 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5098 ++UseIdx, ++UseNo) {
5099 if (is_contained(NonVecOpIndices, UseIdx)) {
5100 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5101 MI.getOperand(UseIdx));
5102 } else {
5103 SmallVector<Register, 8> SplitPieces;
5104 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5105 MRI);
5106 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5107 }
5108 }
5109
5110 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5111
5112 // Take i-th piece of each input operand split and build sub-vector/scalar
5113 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5114 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5116 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5117 Defs.push_back(OutputOpsPieces[DstNo][i]);
5118
5120 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5121 Uses.push_back(InputOpsPieces[InputNo][i]);
5122
5123 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5124 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5125 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5126 }
5127
5128 // Merge small outputs into MI's output for each def operand.
5129 if (NumLeftovers) {
5130 for (unsigned i = 0; i < NumDefs; ++i)
5131 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5132 } else {
5133 for (unsigned i = 0; i < NumDefs; ++i)
5134 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5135 }
5136
5137 MI.eraseFromParent();
5138 return Legalized;
5139}
5140
5143 unsigned NumElts) {
5144 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5145
5146 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5147 unsigned NumDefs = MI.getNumDefs();
5148
5149 SmallVector<DstOp, 8> OutputOpsPieces;
5150 SmallVector<Register, 8> OutputRegs;
5151 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5152
5153 // Instructions that perform register split will be inserted in basic block
5154 // where register is defined (basic block is in the next operand).
5155 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5156 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5157 UseIdx += 2, ++UseNo) {
5158 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5159 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5160 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5161 MIRBuilder, MRI);
5162 }
5163
5164 // Build PHIs with fewer elements.
5165 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5166 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5167 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5168 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5169 Phi.addDef(
5170 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5171 OutputRegs.push_back(Phi.getReg(0));
5172
5173 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5174 Phi.addUse(InputOpsPieces[j][i]);
5175 Phi.add(MI.getOperand(1 + j * 2 + 1));
5176 }
5177 }
5178
5179 // Set the insert point after the existing PHIs
5180 MachineBasicBlock &MBB = *MI.getParent();
5181 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5182
5183 // Merge small outputs into MI's def.
5184 if (NumLeftovers) {
5185 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5186 } else {
5187 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5188 }
5189
5190 MI.eraseFromParent();
5191 return Legalized;
5192}
5193
5196 unsigned TypeIdx,
5197 LLT NarrowTy) {
5198 const int NumDst = MI.getNumOperands() - 1;
5199 const Register SrcReg = MI.getOperand(NumDst).getReg();
5200 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5201 LLT SrcTy = MRI.getType(SrcReg);
5202
5203 if (TypeIdx != 1 || NarrowTy == DstTy)
5204 return UnableToLegalize;
5205
5206 // Requires compatible types. Otherwise SrcReg should have been defined by
5207 // merge-like instruction that would get artifact combined. Most likely
5208 // instruction that defines SrcReg has to perform more/fewer elements
5209 // legalization compatible with NarrowTy.
5210 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5211 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5212
5213 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5214 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5215 return UnableToLegalize;
5216
5217 // This is most likely DstTy (smaller then register size) packed in SrcTy
5218 // (larger then register size) and since unmerge was not combined it will be
5219 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5220 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5221
5222 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5223 //
5224 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5225 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5226 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5227 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5228 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5229 const int PartsPerUnmerge = NumDst / NumUnmerge;
5230
5231 for (int I = 0; I != NumUnmerge; ++I) {
5232 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5233
5234 for (int J = 0; J != PartsPerUnmerge; ++J)
5235 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5236 MIB.addUse(Unmerge.getReg(I));
5237 }
5238
5239 MI.eraseFromParent();
5240 return Legalized;
5241}
5242
5245 LLT NarrowTy) {
5246 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5247 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5248 // that should have been artifact combined. Most likely instruction that uses
5249 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5250 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5251 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5252 if (NarrowTy == SrcTy)
5253 return UnableToLegalize;
5254
5255 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5256 // is for old mir tests. Since the changes to more/fewer elements it should no
5257 // longer be possible to generate MIR like this when starting from llvm-ir
5258 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5259 if (TypeIdx == 1) {
5260 assert(SrcTy.isVector() && "Expected vector types");
5261 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5262 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5263 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5264 return UnableToLegalize;
5265 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5266 //
5267 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5268 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5269 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5270 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5271 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5272 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5273
5275 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5276 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5277 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5278 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5279 Elts.push_back(Unmerge.getReg(j));
5280 }
5281
5282 SmallVector<Register, 8> NarrowTyElts;
5283 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5284 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5285 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5286 ++i, Offset += NumNarrowTyElts) {
5287 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5288 NarrowTyElts.push_back(
5289 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5290 }
5291
5292 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5293 MI.eraseFromParent();
5294 return Legalized;
5295 }
5296
5297 assert(TypeIdx == 0 && "Bad type index");
5298 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5299 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5300 return UnableToLegalize;
5301
5302 // This is most likely SrcTy (smaller then register size) packed in DstTy
5303 // (larger then register size) and since merge was not combined it will be
5304 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5305 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5306
5307 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5308 //
5309 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5310 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5311 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5312 SmallVector<Register, 8> NarrowTyElts;
5313 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5314 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5315 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5316 for (unsigned i = 0; i < NumParts; ++i) {
5318 for (unsigned j = 0; j < NumElts; ++j)
5319 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5320 NarrowTyElts.push_back(
5321 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5322 }
5323
5324 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5325 MI.eraseFromParent();
5326 return Legalized;
5327}
5328
5331 unsigned TypeIdx,
5332 LLT NarrowVecTy) {
5333 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5334 Register InsertVal;
5335 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5336
5337 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5338 if (IsInsert)
5339 InsertVal = MI.getOperand(2).getReg();
5340
5341 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5342 LLT VecTy = MRI.getType(SrcVec);
5343
5344 // If the index is a constant, we can really break this down as you would
5345 // expect, and index into the target size pieces.
5346 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5347 if (MaybeCst) {
5348 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5349 // Avoid out of bounds indexing the pieces.
5350 if (IdxVal >= VecTy.getNumElements()) {
5351 MIRBuilder.buildUndef(DstReg);
5352 MI.eraseFromParent();
5353 return Legalized;
5354 }
5355
5356 if (!NarrowVecTy.isVector()) {
5357 SmallVector<Register, 8> SplitPieces;
5358 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5359 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5360 if (IsInsert) {
5361 SplitPieces[IdxVal] = InsertVal;
5362 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5363 } else {
5364 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5365 }
5366 } else {
5367 SmallVector<Register, 8> VecParts;
5368 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5369
5370 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5371 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5372 TargetOpcode::G_ANYEXT);
5373
5374 unsigned NewNumElts = NarrowVecTy.getNumElements();
5375
5376 LLT IdxTy = MRI.getType(Idx);
5377 int64_t PartIdx = IdxVal / NewNumElts;
5378 auto NewIdx =
5379 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5380
5381 if (IsInsert) {
5382 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5383
5384 // Use the adjusted index to insert into one of the subvectors.
5385 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5386 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5387 VecParts[PartIdx] = InsertPart.getReg(0);
5388
5389 // Recombine the inserted subvector with the others to reform the result
5390 // vector.
5391 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5392 } else {
5393 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5394 }
5395 }
5396
5397 MI.eraseFromParent();
5398 return Legalized;
5399 }
5400
5401 // With a variable index, we can't perform the operation in a smaller type, so
5402 // we're forced to expand this.
5403 //
5404 // TODO: We could emit a chain of compare/select to figure out which piece to
5405 // index.
5407}
5408
5411 LLT NarrowTy) {
5412 // FIXME: Don't know how to handle secondary types yet.
5413 if (TypeIdx != 0)
5414 return UnableToLegalize;
5415
5416 if (!NarrowTy.isByteSized()) {
5417 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5418 return UnableToLegalize;
5419 }
5420
5421 // This implementation doesn't work for atomics. Give up instead of doing
5422 // something invalid.
5423 if (LdStMI.isAtomic())
5424 return UnableToLegalize;
5425
5426 bool IsLoad = isa<GLoad>(LdStMI);
5427 Register ValReg = LdStMI.getReg(0);
5428 Register AddrReg = LdStMI.getPointerReg();
5429 LLT ValTy = MRI.getType(ValReg);
5430
5431 // FIXME: Do we need a distinct NarrowMemory legalize action?
5432 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5433 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5434 return UnableToLegalize;
5435 }
5436
5437 int NumParts = -1;
5438 int NumLeftover = -1;
5439 LLT LeftoverTy;
5440 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5441 if (IsLoad) {
5442 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5443 } else {
5444 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5445 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5446 NumParts = NarrowRegs.size();
5447 NumLeftover = NarrowLeftoverRegs.size();
5448 }
5449 }
5450
5451 if (NumParts == -1)
5452 return UnableToLegalize;
5453
5454 LLT PtrTy = MRI.getType(AddrReg);
5455 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5456
5457 unsigned TotalSize = ValTy.getSizeInBits();
5458
5459 // Split the load/store into PartTy sized pieces starting at Offset. If this
5460 // is a load, return the new registers in ValRegs. For a store, each elements
5461 // of ValRegs should be PartTy. Returns the next offset that needs to be
5462 // handled.
5463 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5464 auto MMO = LdStMI.getMMO();
5465 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5466 unsigned NumParts, unsigned Offset) -> unsigned {
5467 MachineFunction &MF = MIRBuilder.getMF();
5468 unsigned PartSize = PartTy.getSizeInBits();
5469 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5470 ++Idx) {
5471 unsigned ByteOffset = Offset / 8;
5472 Register NewAddrReg;
5473
5474 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5475 ByteOffset);
5476
5477 MachineMemOperand *NewMMO =
5478 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5479
5480 if (IsLoad) {
5481 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5482 ValRegs.push_back(Dst);
5483 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5484 } else {
5485 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5486 }
5487 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5488 }
5489
5490 return Offset;
5491 };
5492
5493 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5494 unsigned HandledOffset =
5495 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5496
5497 // Handle the rest of the register if this isn't an even type breakdown.
5498 if (LeftoverTy.isValid())
5499 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5500
5501 if (IsLoad) {
5502 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5503 LeftoverTy, NarrowLeftoverRegs);
5504 }
5505
5506 LdStMI.eraseFromParent();
5507 return Legalized;
5508}
5509
5512 LLT NarrowTy) {
5513 using namespace TargetOpcode;
5515 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5516
5517 switch (MI.getOpcode()) {
5518 case G_IMPLICIT_DEF:
5519 case G_TRUNC:
5520 case G_AND:
5521 case G_OR:
5522 case G_XOR:
5523 case G_ADD:
5524 case G_SUB:
5525 case G_MUL:
5526 case G_PTR_ADD:
5527 case G_SMULH:
5528 case G_UMULH:
5529 case G_FADD:
5530 case G_FMUL:
5531 case G_FSUB:
5532 case G_FNEG:
5533 case G_FABS:
5534 case G_FCANONICALIZE:
5535 case G_FDIV:
5536 case G_FREM:
5537 case G_FMA:
5538 case G_FMAD:
5539 case G_FPOW:
5540 case G_FEXP:
5541 case G_FEXP2:
5542 case G_FEXP10:
5543 case G_FLOG:
5544 case G_FLOG2:
5545 case G_FLOG10:
5546 case G_FLDEXP:
5547 case G_FNEARBYINT:
5548 case G_FCEIL:
5549 case G_FFLOOR:
5550 case G_FRINT:
5551 case G_INTRINSIC_LRINT:
5552 case G_INTRINSIC_LLRINT:
5553 case G_INTRINSIC_ROUND:
5554 case G_INTRINSIC_ROUNDEVEN:
5555 case G_LROUND:
5556 case G_LLROUND:
5557 case G_INTRINSIC_TRUNC:
5558 case G_FMODF:
5559 case G_FCOS:
5560 case G_FSIN:
5561 case G_FTAN:
5562 case G_FACOS:
5563 case G_FASIN:
5564 case G_FATAN:
5565 case G_FATAN2:
5566 case G_FCOSH:
5567 case G_FSINH:
5568 case G_FTANH:
5569 case G_FSQRT:
5570 case G_BSWAP:
5571 case G_BITREVERSE:
5572 case G_SDIV:
5573 case G_UDIV:
5574 case G_SREM:
5575 case G_UREM:
5576 case G_SDIVREM:
5577 case G_UDIVREM:
5578 case G_SMIN:
5579 case G_SMAX:
5580 case G_UMIN:
5581 case G_UMAX:
5582 case G_ABS:
5583 case G_FMINNUM:
5584 case G_FMAXNUM:
5585 case G_FMINNUM_IEEE:
5586 case G_FMAXNUM_IEEE:
5587 case G_FMINIMUM:
5588 case G_FMAXIMUM:
5589 case G_FMINIMUMNUM:
5590 case G_FMAXIMUMNUM:
5591 case G_FSHL:
5592 case G_FSHR:
5593 case G_ROTL:
5594 case G_ROTR:
5595 case G_FREEZE:
5596 case G_SADDSAT:
5597 case G_SSUBSAT:
5598 case G_UADDSAT:
5599 case G_USUBSAT:
5600 case G_UMULO:
5601 case G_SMULO:
5602 case G_SHL:
5603 case G_LSHR:
5604 case G_ASHR:
5605 case G_SSHLSAT:
5606 case G_USHLSAT:
5607 case G_CTLZ:
5608 case G_CTLZ_ZERO_UNDEF:
5609 case G_CTTZ:
5610 case G_CTTZ_ZERO_UNDEF:
5611 case G_CTPOP:
5612 case G_FCOPYSIGN:
5613 case G_ZEXT:
5614 case G_SEXT:
5615 case G_ANYEXT:
5616 case G_FPEXT:
5617 case G_FPTRUNC:
5618 case G_SITOFP:
5619 case G_UITOFP:
5620 case G_FPTOSI:
5621 case G_FPTOUI:
5622 case G_FPTOSI_SAT:
5623 case G_FPTOUI_SAT:
5624 case G_INTTOPTR:
5625 case G_PTRTOINT:
5626 case G_ADDRSPACE_CAST:
5627 case G_UADDO:
5628 case G_USUBO:
5629 case G_UADDE:
5630 case G_USUBE:
5631 case G_SADDO:
5632 case G_SSUBO:
5633 case G_SADDE:
5634 case G_SSUBE:
5635 case G_STRICT_FADD:
5636 case G_STRICT_FSUB:
5637 case G_STRICT_FMUL:
5638 case G_STRICT_FMA:
5639 case G_STRICT_FLDEXP:
5640 case G_FFREXP:
5641 return fewerElementsVectorMultiEltType(GMI, NumElts);
5642 case G_ICMP:
5643 case G_FCMP:
5644 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5645 case G_IS_FPCLASS:
5646 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5647 case G_SELECT:
5648 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5649 return fewerElementsVectorMultiEltType(GMI, NumElts);
5650 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5651 case G_PHI:
5652 return fewerElementsVectorPhi(GMI, NumElts);
5653 case G_UNMERGE_VALUES:
5654 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5655 case G_BUILD_VECTOR:
5656 assert(TypeIdx == 0 && "not a vector type index");
5657 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5658 case G_CONCAT_VECTORS:
5659 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5660 return UnableToLegalize;
5661 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5662 case G_EXTRACT_VECTOR_ELT:
5663 case G_INSERT_VECTOR_ELT:
5664 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5665 case G_LOAD:
5666 case G_STORE:
5667 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5668 case G_SEXT_INREG:
5669 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5671 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5672 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5673 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5674 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5675 case G_SHUFFLE_VECTOR:
5676 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5677 case G_FPOWI:
5678 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5679 case G_BITCAST:
5680 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5681 case G_INTRINSIC_FPTRUNC_ROUND:
5682 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5683 default:
5684 return UnableToLegalize;
5685 }
5686}
5687
5690 LLT NarrowTy) {
5691 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5692 "Not a bitcast operation");
5693
5694 if (TypeIdx != 0)
5695 return UnableToLegalize;
5696
5697 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5698
5699 unsigned NewElemCount =
5700 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5701 SmallVector<Register> SrcVRegs, BitcastVRegs;
5702 if (NewElemCount == 1) {
5703 LLT SrcNarrowTy = SrcTy.getElementType();
5704
5705 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5706 getUnmergeResults(SrcVRegs, *Unmerge);
5707 } else {
5708 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5709
5710 // Split the Src and Dst Reg into smaller registers
5711 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5712 return UnableToLegalize;
5713 }
5714
5715 // Build new smaller bitcast instructions
5716 // Not supporting Leftover types for now but will have to
5717 for (Register Reg : SrcVRegs)
5718 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5719
5720 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5721 MI.eraseFromParent();
5722 return Legalized;
5723}
5724
5726 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5727 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5728 if (TypeIdx != 0)
5729 return UnableToLegalize;
5730
5731 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5732 MI.getFirst3RegLLTs();
5733 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5734 // The shuffle should be canonicalized by now.
5735 if (DstTy != Src1Ty)
5736 return UnableToLegalize;
5737 if (DstTy != Src2Ty)
5738 return UnableToLegalize;
5739
5740 if (!isPowerOf2_32(DstTy.getNumElements()))
5741 return UnableToLegalize;
5742
5743 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5744 // Further legalization attempts will be needed to do split further.
5745 NarrowTy =
5746 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5747 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5748
5749 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5750 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5751 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5752 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5753 SplitSrc2Regs[1]};
5754
5755 Register Hi, Lo;
5756
5757 // If Lo or Hi uses elements from at most two of the four input vectors, then
5758 // express it as a vector shuffle of those two inputs. Otherwise extract the
5759 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5761 for (unsigned High = 0; High < 2; ++High) {
5762 Register &Output = High ? Hi : Lo;
5763
5764 // Build a shuffle mask for the output, discovering on the fly which
5765 // input vectors to use as shuffle operands (recorded in InputUsed).
5766 // If building a suitable shuffle vector proves too hard, then bail
5767 // out with useBuildVector set.
5768 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5769 unsigned FirstMaskIdx = High * NewElts;
5770 bool UseBuildVector = false;
5771 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5772 // The mask element. This indexes into the input.
5773 int Idx = Mask[FirstMaskIdx + MaskOffset];
5774
5775 // The input vector this mask element indexes into.
5776 unsigned Input = (unsigned)Idx / NewElts;
5777
5778 if (Input >= std::size(Inputs)) {
5779 // The mask element does not index into any input vector.
5780 Ops.push_back(-1);
5781 continue;
5782 }
5783
5784 // Turn the index into an offset from the start of the input vector.
5785 Idx -= Input * NewElts;
5786
5787 // Find or create a shuffle vector operand to hold this input.
5788 unsigned OpNo;
5789 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5790 if (InputUsed[OpNo] == Input) {
5791 // This input vector is already an operand.
5792 break;
5793 } else if (InputUsed[OpNo] == -1U) {
5794 // Create a new operand for this input vector.
5795 InputUsed[OpNo] = Input;
5796 break;
5797 }
5798 }
5799
5800 if (OpNo >= std::size(InputUsed)) {
5801 // More than two input vectors used! Give up on trying to create a
5802 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5803 UseBuildVector = true;
5804 break;
5805 }
5806
5807 // Add the mask index for the new shuffle vector.
5808 Ops.push_back(Idx + OpNo * NewElts);
5809 }
5810
5811 if (UseBuildVector) {
5812 LLT EltTy = NarrowTy.getElementType();
5814
5815 // Extract the input elements by hand.
5816 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5817 // The mask element. This indexes into the input.
5818 int Idx = Mask[FirstMaskIdx + MaskOffset];
5819
5820 // The input vector this mask element indexes into.
5821 unsigned Input = (unsigned)Idx / NewElts;
5822
5823 if (Input >= std::size(Inputs)) {
5824 // The mask element is "undef" or indexes off the end of the input.
5825 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5826 continue;
5827 }
5828
5829 // Turn the index into an offset from the start of the input vector.
5830 Idx -= Input * NewElts;
5831
5832 // Extract the vector element by hand.
5833 SVOps.push_back(MIRBuilder
5834 .buildExtractVectorElement(
5835 EltTy, Inputs[Input],
5836 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5837 .getReg(0));
5838 }
5839
5840 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5841 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5842 } else if (InputUsed[0] == -1U) {
5843 // No input vectors were used! The result is undefined.
5844 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5845 } else if (NewElts == 1) {
5846 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5847 } else {
5848 Register Op0 = Inputs[InputUsed[0]];
5849 // If only one input was used, use an undefined vector for the other.
5850 Register Op1 = InputUsed[1] == -1U
5851 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5852 : Inputs[InputUsed[1]];
5853 // At least one input vector was used. Create a new shuffle vector.
5854 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5855 }
5856
5857 Ops.clear();
5858 }
5859
5860 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5861 MI.eraseFromParent();
5862 return Legalized;
5863}
5864
5866 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5867 auto &RdxMI = cast<GVecReduce>(MI);
5868
5869 if (TypeIdx != 1)
5870 return UnableToLegalize;
5871
5872 // The semantics of the normal non-sequential reductions allow us to freely
5873 // re-associate the operation.
5874 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5875
5876 if (NarrowTy.isVector() &&
5877 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5878 return UnableToLegalize;
5879
5880 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5881 SmallVector<Register> SplitSrcs;
5882 // If NarrowTy is a scalar then we're being asked to scalarize.
5883 const unsigned NumParts =
5884 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5885 : SrcTy.getNumElements();
5886
5887 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5888 if (NarrowTy.isScalar()) {
5889 if (DstTy != NarrowTy)
5890 return UnableToLegalize; // FIXME: handle implicit extensions.
5891
5892 if (isPowerOf2_32(NumParts)) {
5893 // Generate a tree of scalar operations to reduce the critical path.
5894 SmallVector<Register> PartialResults;
5895 unsigned NumPartsLeft = NumParts;
5896 while (NumPartsLeft > 1) {
5897 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5898 PartialResults.emplace_back(
5900 .buildInstr(ScalarOpc, {NarrowTy},
5901 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5902 .getReg(0));
5903 }
5904 SplitSrcs = PartialResults;
5905 PartialResults.clear();
5906 NumPartsLeft = SplitSrcs.size();
5907 }
5908 assert(SplitSrcs.size() == 1);
5909 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5910 MI.eraseFromParent();
5911 return Legalized;
5912 }
5913 // If we can't generate a tree, then just do sequential operations.
5914 Register Acc = SplitSrcs[0];
5915 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5916 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5917 .getReg(0);
5918 MIRBuilder.buildCopy(DstReg, Acc);
5919 MI.eraseFromParent();
5920 return Legalized;
5921 }
5922 SmallVector<Register> PartialReductions;
5923 for (unsigned Part = 0; Part < NumParts; ++Part) {
5924 PartialReductions.push_back(
5925 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5926 .getReg(0));
5927 }
5928
5929 // If the types involved are powers of 2, we can generate intermediate vector
5930 // ops, before generating a final reduction operation.
5931 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5932 isPowerOf2_32(NarrowTy.getNumElements())) {
5933 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5934 }
5935
5936 Register Acc = PartialReductions[0];
5937 for (unsigned Part = 1; Part < NumParts; ++Part) {
5938 if (Part == NumParts - 1) {
5939 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5940 {Acc, PartialReductions[Part]});
5941 } else {
5942 Acc = MIRBuilder
5943 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5944 .getReg(0);
5945 }
5946 }
5947 MI.eraseFromParent();
5948 return Legalized;
5949}
5950
5953 unsigned int TypeIdx,
5954 LLT NarrowTy) {
5955 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5956 MI.getFirst3RegLLTs();
5957 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5958 DstTy != NarrowTy)
5959 return UnableToLegalize;
5960
5961 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5962 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5963 "Unexpected vecreduce opcode");
5964 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5965 ? TargetOpcode::G_FADD
5966 : TargetOpcode::G_FMUL;
5967
5968 SmallVector<Register> SplitSrcs;
5969 unsigned NumParts = SrcTy.getNumElements();
5970 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5971 Register Acc = ScalarReg;
5972 for (unsigned i = 0; i < NumParts; i++)
5973 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5974 .getReg(0);
5975
5976 MIRBuilder.buildCopy(DstReg, Acc);
5977 MI.eraseFromParent();
5978 return Legalized;
5979}
5980
5982LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5983 LLT SrcTy, LLT NarrowTy,
5984 unsigned ScalarOpc) {
5985 SmallVector<Register> SplitSrcs;
5986 // Split the sources into NarrowTy size pieces.
5987 extractParts(SrcReg, NarrowTy,
5988 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5989 MIRBuilder, MRI);
5990 // We're going to do a tree reduction using vector operations until we have
5991 // one NarrowTy size value left.
5992 while (SplitSrcs.size() > 1) {
5993 SmallVector<Register> PartialRdxs;
5994 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5995 Register LHS = SplitSrcs[Idx];
5996 Register RHS = SplitSrcs[Idx + 1];
5997 // Create the intermediate vector op.
5998 Register Res =
5999 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6000 PartialRdxs.push_back(Res);
6001 }
6002 SplitSrcs = std::move(PartialRdxs);
6003 }
6004 // Finally generate the requested NarrowTy based reduction.
6005 Observer.changingInstr(MI);
6006 MI.getOperand(1).setReg(SplitSrcs[0]);
6007 Observer.changedInstr(MI);
6008 return Legalized;
6009}
6010
6013 const LLT HalfTy, const LLT AmtTy) {
6014
6015 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6016 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6017 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6018
6019 if (Amt.isZero()) {
6020 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6021 MI.eraseFromParent();
6022 return Legalized;
6023 }
6024
6025 LLT NVT = HalfTy;
6026 unsigned NVTBits = HalfTy.getSizeInBits();
6027 unsigned VTBits = 2 * NVTBits;
6028
6029 SrcOp Lo(Register(0)), Hi(Register(0));
6030 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6031 if (Amt.ugt(VTBits)) {
6032 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6033 } else if (Amt.ugt(NVTBits)) {
6034 Lo = MIRBuilder.buildConstant(NVT, 0);
6035 Hi = MIRBuilder.buildShl(NVT, InL,
6036 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6037 } else if (Amt == NVTBits) {
6038 Lo = MIRBuilder.buildConstant(NVT, 0);
6039 Hi = InL;
6040 } else {
6041 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6042 auto OrLHS =
6043 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6044 auto OrRHS = MIRBuilder.buildLShr(
6045 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6046 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6047 }
6048 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6049 if (Amt.ugt(VTBits)) {
6050 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6051 } else if (Amt.ugt(NVTBits)) {
6052 Lo = MIRBuilder.buildLShr(NVT, InH,
6053 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6054 Hi = MIRBuilder.buildConstant(NVT, 0);
6055 } else if (Amt == NVTBits) {
6056 Lo = InH;
6057 Hi = MIRBuilder.buildConstant(NVT, 0);
6058 } else {
6059 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6060
6061 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6062 auto OrRHS = MIRBuilder.buildShl(
6063 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6064
6065 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6066 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6067 }
6068 } else {
6069 if (Amt.ugt(VTBits)) {
6070 Hi = Lo = MIRBuilder.buildAShr(
6071 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6072 } else if (Amt.ugt(NVTBits)) {
6073 Lo = MIRBuilder.buildAShr(NVT, InH,
6074 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6075 Hi = MIRBuilder.buildAShr(NVT, InH,
6076 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6077 } else if (Amt == NVTBits) {
6078 Lo = InH;
6079 Hi = MIRBuilder.buildAShr(NVT, InH,
6080 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6081 } else {
6082 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6083
6084 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6085 auto OrRHS = MIRBuilder.buildShl(
6086 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6087
6088 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6089 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6090 }
6091 }
6092
6093 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6094 MI.eraseFromParent();
6095
6096 return Legalized;
6097}
6098
6101 LLT RequestedTy) {
6102 if (TypeIdx == 1) {
6103 Observer.changingInstr(MI);
6104 narrowScalarSrc(MI, RequestedTy, 2);
6105 Observer.changedInstr(MI);
6106 return Legalized;
6107 }
6108
6109 Register DstReg = MI.getOperand(0).getReg();
6110 LLT DstTy = MRI.getType(DstReg);
6111 if (DstTy.isVector())
6112 return UnableToLegalize;
6113
6114 Register Amt = MI.getOperand(2).getReg();
6115 LLT ShiftAmtTy = MRI.getType(Amt);
6116 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6117 if (DstEltSize % 2 != 0)
6118 return UnableToLegalize;
6119
6120 // Check if we should use multi-way splitting instead of recursive binary
6121 // splitting.
6122 //
6123 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6124 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6125 // and dependency chains created by usual binary splitting approach
6126 // (128->64->32).
6127 //
6128 // The >= 8 parts threshold ensures we only use this optimization when binary
6129 // splitting would require multiple recursive passes, avoiding overhead for
6130 // simple 2-way splits where binary approach is sufficient.
6131 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6132 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6133 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6134 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6135 // steps).
6136 if (NumParts >= 8)
6137 return narrowScalarShiftMultiway(MI, RequestedTy);
6138 }
6139
6140 // Fall back to binary splitting:
6141 // Ignore the input type. We can only go to exactly half the size of the
6142 // input. If that isn't small enough, the resulting pieces will be further
6143 // legalized.
6144 const unsigned NewBitSize = DstEltSize / 2;
6145 const LLT HalfTy = LLT::scalar(NewBitSize);
6146 const LLT CondTy = LLT::scalar(1);
6147
6148 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6149 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6150 ShiftAmtTy);
6151 }
6152
6153 // TODO: Expand with known bits.
6154
6155 // Handle the fully general expansion by an unknown amount.
6156 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6157
6158 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6159 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6160 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6161
6162 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6163 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6164
6165 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6166 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6167 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6168
6169 Register ResultRegs[2];
6170 switch (MI.getOpcode()) {
6171 case TargetOpcode::G_SHL: {
6172 // Short: ShAmt < NewBitSize
6173 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6174
6175 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6176 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6177 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6178
6179 // Long: ShAmt >= NewBitSize
6180 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6181 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6182
6183 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6184 auto Hi = MIRBuilder.buildSelect(
6185 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6186
6187 ResultRegs[0] = Lo.getReg(0);
6188 ResultRegs[1] = Hi.getReg(0);
6189 break;
6190 }
6191 case TargetOpcode::G_LSHR:
6192 case TargetOpcode::G_ASHR: {
6193 // Short: ShAmt < NewBitSize
6194 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6195
6196 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6197 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6198 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6199
6200 // Long: ShAmt >= NewBitSize
6202 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6203 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6204 } else {
6205 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6206 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6207 }
6208 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6209 {InH, AmtExcess}); // Lo from Hi part.
6210
6211 auto Lo = MIRBuilder.buildSelect(
6212 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6213
6214 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6215
6216 ResultRegs[0] = Lo.getReg(0);
6217 ResultRegs[1] = Hi.getReg(0);
6218 break;
6219 }
6220 default:
6221 llvm_unreachable("not a shift");
6222 }
6223
6224 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6225 MI.eraseFromParent();
6226 return Legalized;
6227}
6228
6230 unsigned PartIdx,
6231 unsigned NumParts,
6232 ArrayRef<Register> SrcParts,
6233 const ShiftParams &Params,
6234 LLT TargetTy, LLT ShiftAmtTy) {
6235 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6236 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6237 assert(WordShiftConst && BitShiftConst && "Expected constants");
6238
6239 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6240 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6241 const bool NeedsInterWordShift = ShiftBits != 0;
6242
6243 switch (Opcode) {
6244 case TargetOpcode::G_SHL: {
6245 // Data moves from lower indices to higher indices
6246 // If this part would come from a source beyond our range, it's zero
6247 if (PartIdx < ShiftWords)
6248 return Params.Zero;
6249
6250 unsigned SrcIdx = PartIdx - ShiftWords;
6251 if (!NeedsInterWordShift)
6252 return SrcParts[SrcIdx];
6253
6254 // Combine shifted main part with carry from previous part
6255 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6256 if (SrcIdx > 0) {
6257 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6258 Params.InvBitShift);
6259 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6260 }
6261 return Hi.getReg(0);
6262 }
6263
6264 case TargetOpcode::G_LSHR: {
6265 unsigned SrcIdx = PartIdx + ShiftWords;
6266 if (SrcIdx >= NumParts)
6267 return Params.Zero;
6268 if (!NeedsInterWordShift)
6269 return SrcParts[SrcIdx];
6270
6271 // Combine shifted main part with carry from next part
6272 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6273 if (SrcIdx + 1 < NumParts) {
6274 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6275 Params.InvBitShift);
6276 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6277 }
6278 return Lo.getReg(0);
6279 }
6280
6281 case TargetOpcode::G_ASHR: {
6282 // Like LSHR but preserves sign bit
6283 unsigned SrcIdx = PartIdx + ShiftWords;
6284 if (SrcIdx >= NumParts)
6285 return Params.SignBit;
6286 if (!NeedsInterWordShift)
6287 return SrcParts[SrcIdx];
6288
6289 // Only the original MSB part uses arithmetic shift to preserve sign. All
6290 // other parts use logical shift since they're just moving data bits.
6291 auto Lo =
6292 (SrcIdx == NumParts - 1)
6293 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6294 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6295 Register HiSrc =
6296 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6297 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6298 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6299 }
6300
6301 default:
6302 llvm_unreachable("not a shift");
6303 }
6304}
6305
6307 Register MainOperand,
6308 Register ShiftAmt,
6309 LLT TargetTy,
6310 Register CarryOperand) {
6311 // This helper generates a single output part for variable shifts by combining
6312 // the main operand (shifted by BitShift) with carry bits from an adjacent
6313 // part.
6314
6315 // For G_ASHR, individual parts don't have their own sign bit, only the
6316 // complete value does. So we use LSHR for the main operand shift in ASHR
6317 // context.
6318 unsigned MainOpcode =
6319 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6320
6321 // Perform the primary shift on the main operand
6322 Register MainShifted =
6323 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6324 .getReg(0);
6325
6326 // No carry operand available
6327 if (!CarryOperand.isValid())
6328 return MainShifted;
6329
6330 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6331 // so carry bits aren't needed.
6332 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6333 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6334 LLT BoolTy = LLT::scalar(1);
6335 auto IsZeroBitShift =
6336 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6337
6338 // Extract bits from the adjacent part that will "carry over" into this part.
6339 // The carry direction is opposite to the main shift direction, so we can
6340 // align the two shifted values before combining them with OR.
6341
6342 // Determine the carry shift opcode (opposite direction)
6343 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6344 : TargetOpcode::G_SHL;
6345
6346 // Calculate inverse shift amount: BitWidth - ShiftAmt
6347 auto TargetBitsConst =
6348 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6349 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6350
6351 // Shift the carry operand
6352 Register CarryBits =
6354 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6355 .getReg(0);
6356
6357 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6358 // TargetBits which would be poison for the individual carry shift operation).
6359 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6360 Register SafeCarryBits =
6361 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6362 .getReg(0);
6363
6364 // Combine the main shifted part with the carry bits
6365 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6366}
6367
6370 const APInt &Amt,
6371 LLT TargetTy,
6372 LLT ShiftAmtTy) {
6373 // Any wide shift can be decomposed into WordShift + BitShift components.
6374 // When shift amount is known constant, directly compute the decomposition
6375 // values and generate constant registers.
6376 Register DstReg = MI.getOperand(0).getReg();
6377 Register SrcReg = MI.getOperand(1).getReg();
6378 LLT DstTy = MRI.getType(DstReg);
6379
6380 const unsigned DstBits = DstTy.getScalarSizeInBits();
6381 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6382 const unsigned NumParts = DstBits / TargetBits;
6383
6384 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6385
6386 // When the shift amount is known at compile time, we just calculate which
6387 // source parts contribute to each output part.
6388
6389 SmallVector<Register, 8> SrcParts;
6390 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6391
6392 if (Amt.isZero()) {
6393 // No shift needed, just copy
6394 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6395 MI.eraseFromParent();
6396 return Legalized;
6397 }
6398
6399 ShiftParams Params;
6400 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6401 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6402
6403 // Generate constants and values needed by all shift types
6404 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6405 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6406 Params.InvBitShift =
6407 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6408 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6409
6410 // For ASHR, we need the sign-extended value to fill shifted-out positions
6411 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6412 Params.SignBit =
6414 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6415 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6416 .getReg(0);
6417
6418 SmallVector<Register, 8> DstParts(NumParts);
6419 for (unsigned I = 0; I < NumParts; ++I)
6420 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6421 Params, TargetTy, ShiftAmtTy);
6422
6423 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6424 MI.eraseFromParent();
6425 return Legalized;
6426}
6427
6430 Register DstReg = MI.getOperand(0).getReg();
6431 Register SrcReg = MI.getOperand(1).getReg();
6432 Register AmtReg = MI.getOperand(2).getReg();
6433 LLT DstTy = MRI.getType(DstReg);
6434 LLT ShiftAmtTy = MRI.getType(AmtReg);
6435
6436 const unsigned DstBits = DstTy.getScalarSizeInBits();
6437 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6438 const unsigned NumParts = DstBits / TargetBits;
6439
6440 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6441 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6442
6443 // If the shift amount is known at compile time, we can use direct indexing
6444 // instead of generating select chains in the general case.
6445 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6446 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6447 ShiftAmtTy);
6448
6449 // For runtime-variable shift amounts, we must generate a more complex
6450 // sequence that handles all possible shift values using select chains.
6451
6452 // Split the input into target-sized pieces
6453 SmallVector<Register, 8> SrcParts;
6454 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6455
6456 // Shifting by zero should be a no-op.
6457 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6458 LLT BoolTy = LLT::scalar(1);
6459 auto IsZeroShift =
6460 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6461
6462 // Any wide shift can be decomposed into two components:
6463 // 1. WordShift: number of complete target-sized words to shift
6464 // 2. BitShift: number of bits to shift within each word
6465 //
6466 // Example: 128-bit >> 50 with 32-bit target:
6467 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6468 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6469 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6470 auto TargetBitsLog2Const =
6471 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6472 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6473
6474 Register WordShift =
6475 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6476 Register BitShift =
6477 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6478
6479 // Fill values:
6480 // - SHL/LSHR: fill with zeros
6481 // - ASHR: fill with sign-extended MSB
6482 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6483
6484 Register FillValue;
6485 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6486 auto TargetBitsMinusOneConst =
6487 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6488 FillValue = MIRBuilder
6489 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6490 TargetBitsMinusOneConst)
6491 .getReg(0);
6492 } else {
6493 FillValue = ZeroReg;
6494 }
6495
6496 SmallVector<Register, 8> DstParts(NumParts);
6497
6498 // For each output part, generate a select chain that chooses the correct
6499 // result based on the runtime WordShift value. This handles all possible
6500 // word shift amounts by pre-calculating what each would produce.
6501 for (unsigned I = 0; I < NumParts; ++I) {
6502 // Initialize with appropriate default value for this shift type
6503 Register InBoundsResult = FillValue;
6504
6505 // clang-format off
6506 // Build a branchless select chain by pre-computing results for all possible
6507 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6508 //
6509 // K=0: select(WordShift==0, result0, FillValue)
6510 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6511 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6512 // clang-format on
6513 for (unsigned K = 0; K < NumParts; ++K) {
6514 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6515 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6516 WordShift, WordShiftKConst);
6517
6518 // Calculate source indices for this word shift
6519 //
6520 // For 4-part 128-bit value with K=1 word shift:
6521 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6522 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6523 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6524 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6525 int MainSrcIdx;
6526 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6527
6528 switch (MI.getOpcode()) {
6529 case TargetOpcode::G_SHL:
6530 MainSrcIdx = (int)I - (int)K;
6531 CarrySrcIdx = MainSrcIdx - 1;
6532 break;
6533 case TargetOpcode::G_LSHR:
6534 case TargetOpcode::G_ASHR:
6535 MainSrcIdx = (int)I + (int)K;
6536 CarrySrcIdx = MainSrcIdx + 1;
6537 break;
6538 default:
6539 llvm_unreachable("Not a shift");
6540 }
6541
6542 // Check bounds and build the result for this word shift
6543 Register ResultForK;
6544 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6545 Register MainOp = SrcParts[MainSrcIdx];
6546 Register CarryOp;
6547
6548 // Determine carry operand with bounds checking
6549 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6550 CarryOp = SrcParts[CarrySrcIdx];
6551 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6552 CarrySrcIdx >= (int)NumParts)
6553 CarryOp = FillValue; // Use sign extension
6554
6555 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6556 TargetTy, CarryOp);
6557 } else {
6558 // Out of bounds - use fill value for this k
6559 ResultForK = FillValue;
6560 }
6561
6562 // Select this result if WordShift equals k
6563 InBoundsResult =
6565 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6566 .getReg(0);
6567 }
6568
6569 // Handle zero-shift special case: if shift is 0, use original input
6570 DstParts[I] =
6572 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6573 .getReg(0);
6574 }
6575
6576 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6577 MI.eraseFromParent();
6578 return Legalized;
6579}
6580
6583 LLT MoreTy) {
6584 assert(TypeIdx == 0 && "Expecting only Idx 0");
6585
6586 Observer.changingInstr(MI);
6587 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6588 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6589 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6590 moreElementsVectorSrc(MI, MoreTy, I);
6591 }
6592
6593 MachineBasicBlock &MBB = *MI.getParent();
6594 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6595 moreElementsVectorDst(MI, MoreTy, 0);
6596 Observer.changedInstr(MI);
6597 return Legalized;
6598}
6599
6600MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6601 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6602 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6603
6604 switch (Opcode) {
6605 default:
6607 "getNeutralElementForVecReduce called with invalid opcode!");
6608 case TargetOpcode::G_VECREDUCE_ADD:
6609 case TargetOpcode::G_VECREDUCE_OR:
6610 case TargetOpcode::G_VECREDUCE_XOR:
6611 case TargetOpcode::G_VECREDUCE_UMAX:
6612 return MIRBuilder.buildConstant(Ty, 0);
6613 case TargetOpcode::G_VECREDUCE_MUL:
6614 return MIRBuilder.buildConstant(Ty, 1);
6615 case TargetOpcode::G_VECREDUCE_AND:
6616 case TargetOpcode::G_VECREDUCE_UMIN:
6618 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6619 case TargetOpcode::G_VECREDUCE_SMAX:
6621 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6622 case TargetOpcode::G_VECREDUCE_SMIN:
6624 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6625 case TargetOpcode::G_VECREDUCE_FADD:
6626 return MIRBuilder.buildFConstant(Ty, -0.0);
6627 case TargetOpcode::G_VECREDUCE_FMUL:
6628 return MIRBuilder.buildFConstant(Ty, 1.0);
6629 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6630 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6631 assert(false && "getNeutralElementForVecReduce unimplemented for "
6632 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6633 }
6634 llvm_unreachable("switch expected to return!");
6635}
6636
6639 LLT MoreTy) {
6640 unsigned Opc = MI.getOpcode();
6641 switch (Opc) {
6642 case TargetOpcode::G_IMPLICIT_DEF:
6643 case TargetOpcode::G_LOAD: {
6644 if (TypeIdx != 0)
6645 return UnableToLegalize;
6646 Observer.changingInstr(MI);
6647 moreElementsVectorDst(MI, MoreTy, 0);
6648 Observer.changedInstr(MI);
6649 return Legalized;
6650 }
6651 case TargetOpcode::G_STORE:
6652 if (TypeIdx != 0)
6653 return UnableToLegalize;
6654 Observer.changingInstr(MI);
6655 moreElementsVectorSrc(MI, MoreTy, 0);
6656 Observer.changedInstr(MI);
6657 return Legalized;
6658 case TargetOpcode::G_AND:
6659 case TargetOpcode::G_OR:
6660 case TargetOpcode::G_XOR:
6661 case TargetOpcode::G_ADD:
6662 case TargetOpcode::G_SUB:
6663 case TargetOpcode::G_MUL:
6664 case TargetOpcode::G_FADD:
6665 case TargetOpcode::G_FSUB:
6666 case TargetOpcode::G_FMUL:
6667 case TargetOpcode::G_FDIV:
6668 case TargetOpcode::G_FCOPYSIGN:
6669 case TargetOpcode::G_UADDSAT:
6670 case TargetOpcode::G_USUBSAT:
6671 case TargetOpcode::G_SADDSAT:
6672 case TargetOpcode::G_SSUBSAT:
6673 case TargetOpcode::G_SMIN:
6674 case TargetOpcode::G_SMAX:
6675 case TargetOpcode::G_UMIN:
6676 case TargetOpcode::G_UMAX:
6677 case TargetOpcode::G_FMINNUM:
6678 case TargetOpcode::G_FMAXNUM:
6679 case TargetOpcode::G_FMINNUM_IEEE:
6680 case TargetOpcode::G_FMAXNUM_IEEE:
6681 case TargetOpcode::G_FMINIMUM:
6682 case TargetOpcode::G_FMAXIMUM:
6683 case TargetOpcode::G_FMINIMUMNUM:
6684 case TargetOpcode::G_FMAXIMUMNUM:
6685 case TargetOpcode::G_STRICT_FADD:
6686 case TargetOpcode::G_STRICT_FSUB:
6687 case TargetOpcode::G_STRICT_FMUL:
6688 case TargetOpcode::G_SHL:
6689 case TargetOpcode::G_ASHR:
6690 case TargetOpcode::G_LSHR: {
6691 Observer.changingInstr(MI);
6692 moreElementsVectorSrc(MI, MoreTy, 1);
6693 moreElementsVectorSrc(MI, MoreTy, 2);
6694 moreElementsVectorDst(MI, MoreTy, 0);
6695 Observer.changedInstr(MI);
6696 return Legalized;
6697 }
6698 case TargetOpcode::G_FMA:
6699 case TargetOpcode::G_STRICT_FMA:
6700 case TargetOpcode::G_FSHR:
6701 case TargetOpcode::G_FSHL: {
6702 Observer.changingInstr(MI);
6703 moreElementsVectorSrc(MI, MoreTy, 1);
6704 moreElementsVectorSrc(MI, MoreTy, 2);
6705 moreElementsVectorSrc(MI, MoreTy, 3);
6706 moreElementsVectorDst(MI, MoreTy, 0);
6707 Observer.changedInstr(MI);
6708 return Legalized;
6709 }
6710 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6711 case TargetOpcode::G_EXTRACT:
6712 if (TypeIdx != 1)
6713 return UnableToLegalize;
6714 Observer.changingInstr(MI);
6715 moreElementsVectorSrc(MI, MoreTy, 1);
6716 Observer.changedInstr(MI);
6717 return Legalized;
6718 case TargetOpcode::G_INSERT:
6719 case TargetOpcode::G_INSERT_VECTOR_ELT:
6720 case TargetOpcode::G_FREEZE:
6721 case TargetOpcode::G_FNEG:
6722 case TargetOpcode::G_FABS:
6723 case TargetOpcode::G_FSQRT:
6724 case TargetOpcode::G_FCEIL:
6725 case TargetOpcode::G_FFLOOR:
6726 case TargetOpcode::G_FNEARBYINT:
6727 case TargetOpcode::G_FRINT:
6728 case TargetOpcode::G_INTRINSIC_ROUND:
6729 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6730 case TargetOpcode::G_INTRINSIC_TRUNC:
6731 case TargetOpcode::G_BITREVERSE:
6732 case TargetOpcode::G_BSWAP:
6733 case TargetOpcode::G_FCANONICALIZE:
6734 case TargetOpcode::G_SEXT_INREG:
6735 case TargetOpcode::G_ABS:
6736 case TargetOpcode::G_CTLZ:
6737 case TargetOpcode::G_CTPOP:
6738 if (TypeIdx != 0)
6739 return UnableToLegalize;
6740 Observer.changingInstr(MI);
6741 moreElementsVectorSrc(MI, MoreTy, 1);
6742 moreElementsVectorDst(MI, MoreTy, 0);
6743 Observer.changedInstr(MI);
6744 return Legalized;
6745 case TargetOpcode::G_SELECT: {
6746 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6747 if (TypeIdx == 1) {
6748 if (!CondTy.isScalar() ||
6749 DstTy.getElementCount() != MoreTy.getElementCount())
6750 return UnableToLegalize;
6751
6752 // This is turning a scalar select of vectors into a vector
6753 // select. Broadcast the select condition.
6754 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6755 Observer.changingInstr(MI);
6756 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6757 Observer.changedInstr(MI);
6758 return Legalized;
6759 }
6760
6761 if (CondTy.isVector())
6762 return UnableToLegalize;
6763
6764 Observer.changingInstr(MI);
6765 moreElementsVectorSrc(MI, MoreTy, 2);
6766 moreElementsVectorSrc(MI, MoreTy, 3);
6767 moreElementsVectorDst(MI, MoreTy, 0);
6768 Observer.changedInstr(MI);
6769 return Legalized;
6770 }
6771 case TargetOpcode::G_UNMERGE_VALUES:
6772 return UnableToLegalize;
6773 case TargetOpcode::G_PHI:
6774 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6775 case TargetOpcode::G_SHUFFLE_VECTOR:
6776 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6777 case TargetOpcode::G_BUILD_VECTOR: {
6779 for (auto Op : MI.uses()) {
6780 Elts.push_back(Op.getReg());
6781 }
6782
6783 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6784 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6785 }
6786
6787 MIRBuilder.buildDeleteTrailingVectorElements(
6788 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6789 MI.eraseFromParent();
6790 return Legalized;
6791 }
6792 case TargetOpcode::G_SEXT:
6793 case TargetOpcode::G_ZEXT:
6794 case TargetOpcode::G_ANYEXT:
6795 case TargetOpcode::G_TRUNC:
6796 case TargetOpcode::G_FPTRUNC:
6797 case TargetOpcode::G_FPEXT:
6798 case TargetOpcode::G_FPTOSI:
6799 case TargetOpcode::G_FPTOUI:
6800 case TargetOpcode::G_FPTOSI_SAT:
6801 case TargetOpcode::G_FPTOUI_SAT:
6802 case TargetOpcode::G_SITOFP:
6803 case TargetOpcode::G_UITOFP: {
6804 Observer.changingInstr(MI);
6805 LLT SrcExtTy;
6806 LLT DstExtTy;
6807 if (TypeIdx == 0) {
6808 DstExtTy = MoreTy;
6809 SrcExtTy = LLT::fixed_vector(
6810 MoreTy.getNumElements(),
6811 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6812 } else {
6813 DstExtTy = LLT::fixed_vector(
6814 MoreTy.getNumElements(),
6815 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6816 SrcExtTy = MoreTy;
6817 }
6818 moreElementsVectorSrc(MI, SrcExtTy, 1);
6819 moreElementsVectorDst(MI, DstExtTy, 0);
6820 Observer.changedInstr(MI);
6821 return Legalized;
6822 }
6823 case TargetOpcode::G_ICMP:
6824 case TargetOpcode::G_FCMP: {
6825 if (TypeIdx != 1)
6826 return UnableToLegalize;
6827
6828 Observer.changingInstr(MI);
6829 moreElementsVectorSrc(MI, MoreTy, 2);
6830 moreElementsVectorSrc(MI, MoreTy, 3);
6831 LLT CondTy = LLT::fixed_vector(
6832 MoreTy.getNumElements(),
6833 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6834 moreElementsVectorDst(MI, CondTy, 0);
6835 Observer.changedInstr(MI);
6836 return Legalized;
6837 }
6838 case TargetOpcode::G_BITCAST: {
6839 if (TypeIdx != 0)
6840 return UnableToLegalize;
6841
6842 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6843 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6844
6845 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6846 if (coefficient % DstTy.getNumElements() != 0)
6847 return UnableToLegalize;
6848
6849 coefficient = coefficient / DstTy.getNumElements();
6850
6851 LLT NewTy = SrcTy.changeElementCount(
6852 ElementCount::get(coefficient, MoreTy.isScalable()));
6853 Observer.changingInstr(MI);
6854 moreElementsVectorSrc(MI, NewTy, 1);
6855 moreElementsVectorDst(MI, MoreTy, 0);
6856 Observer.changedInstr(MI);
6857 return Legalized;
6858 }
6859 case TargetOpcode::G_VECREDUCE_FADD:
6860 case TargetOpcode::G_VECREDUCE_FMUL:
6861 case TargetOpcode::G_VECREDUCE_ADD:
6862 case TargetOpcode::G_VECREDUCE_MUL:
6863 case TargetOpcode::G_VECREDUCE_AND:
6864 case TargetOpcode::G_VECREDUCE_OR:
6865 case TargetOpcode::G_VECREDUCE_XOR:
6866 case TargetOpcode::G_VECREDUCE_SMAX:
6867 case TargetOpcode::G_VECREDUCE_SMIN:
6868 case TargetOpcode::G_VECREDUCE_UMAX:
6869 case TargetOpcode::G_VECREDUCE_UMIN: {
6870 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6871 MachineOperand &MO = MI.getOperand(1);
6872 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6873 auto NeutralElement = getNeutralElementForVecReduce(
6874 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6875
6876 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6877 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6878 i != e; i++) {
6879 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6880 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6881 NeutralElement, Idx);
6882 }
6883
6884 Observer.changingInstr(MI);
6885 MO.setReg(NewVec.getReg(0));
6886 Observer.changedInstr(MI);
6887 return Legalized;
6888 }
6889
6890 default:
6891 return UnableToLegalize;
6892 }
6893}
6894
6897 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6898 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6899 unsigned MaskNumElts = Mask.size();
6900 unsigned SrcNumElts = SrcTy.getNumElements();
6901 LLT DestEltTy = DstTy.getElementType();
6902
6903 if (MaskNumElts == SrcNumElts)
6904 return Legalized;
6905
6906 if (MaskNumElts < SrcNumElts) {
6907 // Extend mask to match new destination vector size with
6908 // undef values.
6909 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6910 llvm::copy(Mask, NewMask.begin());
6911
6912 moreElementsVectorDst(MI, SrcTy, 0);
6913 MIRBuilder.setInstrAndDebugLoc(MI);
6914 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6915 MI.getOperand(1).getReg(),
6916 MI.getOperand(2).getReg(), NewMask);
6917 MI.eraseFromParent();
6918
6919 return Legalized;
6920 }
6921
6922 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6923 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6924 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6925
6926 // Create new source vectors by concatenating the initial
6927 // source vectors with undefined vectors of the same size.
6928 auto Undef = MIRBuilder.buildUndef(SrcTy);
6929 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6930 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6931 MOps1[0] = MI.getOperand(1).getReg();
6932 MOps2[0] = MI.getOperand(2).getReg();
6933
6934 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6935 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6936
6937 // Readjust mask for new input vector length.
6938 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6939 for (unsigned I = 0; I != MaskNumElts; ++I) {
6940 int Idx = Mask[I];
6941 if (Idx >= static_cast<int>(SrcNumElts))
6942 Idx += PaddedMaskNumElts - SrcNumElts;
6943 MappedOps[I] = Idx;
6944 }
6945
6946 // If we got more elements than required, extract subvector.
6947 if (MaskNumElts != PaddedMaskNumElts) {
6948 auto Shuffle =
6949 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6950
6951 SmallVector<Register, 16> Elts(MaskNumElts);
6952 for (unsigned I = 0; I < MaskNumElts; ++I) {
6953 Elts[I] =
6954 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6955 .getReg(0);
6956 }
6957 MIRBuilder.buildBuildVector(DstReg, Elts);
6958 } else {
6959 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6960 }
6961
6962 MI.eraseFromParent();
6964}
6965
6968 unsigned int TypeIdx, LLT MoreTy) {
6969 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6970 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6971 unsigned NumElts = DstTy.getNumElements();
6972 unsigned WidenNumElts = MoreTy.getNumElements();
6973
6974 if (DstTy.isVector() && Src1Ty.isVector() &&
6975 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6977 }
6978
6979 if (TypeIdx != 0)
6980 return UnableToLegalize;
6981
6982 // Expect a canonicalized shuffle.
6983 if (DstTy != Src1Ty || DstTy != Src2Ty)
6984 return UnableToLegalize;
6985
6986 moreElementsVectorSrc(MI, MoreTy, 1);
6987 moreElementsVectorSrc(MI, MoreTy, 2);
6988
6989 // Adjust mask based on new input vector length.
6990 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6991 for (unsigned I = 0; I != NumElts; ++I) {
6992 int Idx = Mask[I];
6993 if (Idx < static_cast<int>(NumElts))
6994 NewMask[I] = Idx;
6995 else
6996 NewMask[I] = Idx - NumElts + WidenNumElts;
6997 }
6998 moreElementsVectorDst(MI, MoreTy, 0);
6999 MIRBuilder.setInstrAndDebugLoc(MI);
7000 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7001 MI.getOperand(1).getReg(),
7002 MI.getOperand(2).getReg(), NewMask);
7003 MI.eraseFromParent();
7004 return Legalized;
7005}
7006
7007void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7008 ArrayRef<Register> Src1Regs,
7009 ArrayRef<Register> Src2Regs,
7010 LLT NarrowTy) {
7012 unsigned SrcParts = Src1Regs.size();
7013 unsigned DstParts = DstRegs.size();
7014
7015 unsigned DstIdx = 0; // Low bits of the result.
7016 Register FactorSum =
7017 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7018 DstRegs[DstIdx] = FactorSum;
7019
7020 Register CarrySumPrevDstIdx;
7022
7023 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7024 // Collect low parts of muls for DstIdx.
7025 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7026 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7028 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7029 Factors.push_back(Mul.getReg(0));
7030 }
7031 // Collect high parts of muls from previous DstIdx.
7032 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7033 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7034 MachineInstrBuilder Umulh =
7035 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7036 Factors.push_back(Umulh.getReg(0));
7037 }
7038 // Add CarrySum from additions calculated for previous DstIdx.
7039 if (DstIdx != 1) {
7040 Factors.push_back(CarrySumPrevDstIdx);
7041 }
7042
7043 Register CarrySum;
7044 // Add all factors and accumulate all carries into CarrySum.
7045 if (DstIdx != DstParts - 1) {
7046 MachineInstrBuilder Uaddo =
7047 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7048 FactorSum = Uaddo.getReg(0);
7049 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7050 for (unsigned i = 2; i < Factors.size(); ++i) {
7051 MachineInstrBuilder Uaddo =
7052 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7053 FactorSum = Uaddo.getReg(0);
7054 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7055 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7056 }
7057 } else {
7058 // Since value for the next index is not calculated, neither is CarrySum.
7059 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7060 for (unsigned i = 2; i < Factors.size(); ++i)
7061 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7062 }
7063
7064 CarrySumPrevDstIdx = CarrySum;
7065 DstRegs[DstIdx] = FactorSum;
7066 Factors.clear();
7067 }
7068}
7069
7072 LLT NarrowTy) {
7073 if (TypeIdx != 0)
7074 return UnableToLegalize;
7075
7076 Register DstReg = MI.getOperand(0).getReg();
7077 LLT DstType = MRI.getType(DstReg);
7078 // FIXME: add support for vector types
7079 if (DstType.isVector())
7080 return UnableToLegalize;
7081
7082 unsigned Opcode = MI.getOpcode();
7083 unsigned OpO, OpE, OpF;
7084 switch (Opcode) {
7085 case TargetOpcode::G_SADDO:
7086 case TargetOpcode::G_SADDE:
7087 case TargetOpcode::G_UADDO:
7088 case TargetOpcode::G_UADDE:
7089 case TargetOpcode::G_ADD:
7090 OpO = TargetOpcode::G_UADDO;
7091 OpE = TargetOpcode::G_UADDE;
7092 OpF = TargetOpcode::G_UADDE;
7093 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7094 OpF = TargetOpcode::G_SADDE;
7095 break;
7096 case TargetOpcode::G_SSUBO:
7097 case TargetOpcode::G_SSUBE:
7098 case TargetOpcode::G_USUBO:
7099 case TargetOpcode::G_USUBE:
7100 case TargetOpcode::G_SUB:
7101 OpO = TargetOpcode::G_USUBO;
7102 OpE = TargetOpcode::G_USUBE;
7103 OpF = TargetOpcode::G_USUBE;
7104 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7105 OpF = TargetOpcode::G_SSUBE;
7106 break;
7107 default:
7108 llvm_unreachable("Unexpected add/sub opcode!");
7109 }
7110
7111 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7112 unsigned NumDefs = MI.getNumExplicitDefs();
7113 Register Src1 = MI.getOperand(NumDefs).getReg();
7114 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7115 Register CarryDst, CarryIn;
7116 if (NumDefs == 2)
7117 CarryDst = MI.getOperand(1).getReg();
7118 if (MI.getNumOperands() == NumDefs + 3)
7119 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7120
7121 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7122 LLT LeftoverTy, DummyTy;
7123 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7124 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7125 MIRBuilder, MRI);
7126 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7127 MRI);
7128
7129 int NarrowParts = Src1Regs.size();
7130 Src1Regs.append(Src1Left);
7131 Src2Regs.append(Src2Left);
7132 DstRegs.reserve(Src1Regs.size());
7133
7134 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7135 Register DstReg =
7136 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7137 Register CarryOut;
7138 // Forward the final carry-out to the destination register
7139 if (i == e - 1 && CarryDst)
7140 CarryOut = CarryDst;
7141 else
7142 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7143
7144 if (!CarryIn) {
7145 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7146 {Src1Regs[i], Src2Regs[i]});
7147 } else if (i == e - 1) {
7148 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7149 {Src1Regs[i], Src2Regs[i], CarryIn});
7150 } else {
7151 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7152 {Src1Regs[i], Src2Regs[i], CarryIn});
7153 }
7154
7155 DstRegs.push_back(DstReg);
7156 CarryIn = CarryOut;
7157 }
7158 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7159 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7160 ArrayRef(DstRegs).drop_front(NarrowParts));
7161
7162 MI.eraseFromParent();
7163 return Legalized;
7164}
7165
7168 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7169
7170 LLT Ty = MRI.getType(DstReg);
7171 if (Ty.isVector())
7172 return UnableToLegalize;
7173
7174 unsigned Size = Ty.getSizeInBits();
7175 unsigned NarrowSize = NarrowTy.getSizeInBits();
7176 if (Size % NarrowSize != 0)
7177 return UnableToLegalize;
7178
7179 unsigned NumParts = Size / NarrowSize;
7180 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7181 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7182
7183 SmallVector<Register, 2> Src1Parts, Src2Parts;
7184 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7185 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7186 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7187 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7188
7189 // Take only high half of registers if this is high mul.
7190 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7191 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7192 MI.eraseFromParent();
7193 return Legalized;
7194}
7195
7198 LLT NarrowTy) {
7199 if (TypeIdx != 0)
7200 return UnableToLegalize;
7201
7202 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7203
7204 Register Src = MI.getOperand(1).getReg();
7205 LLT SrcTy = MRI.getType(Src);
7206
7207 // If all finite floats fit into the narrowed integer type, we can just swap
7208 // out the result type. This is practically only useful for conversions from
7209 // half to at least 16-bits, so just handle the one case.
7210 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7211 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7212 return UnableToLegalize;
7213
7214 Observer.changingInstr(MI);
7215 narrowScalarDst(MI, NarrowTy, 0,
7216 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7217 Observer.changedInstr(MI);
7218 return Legalized;
7219}
7220
7223 LLT NarrowTy) {
7224 if (TypeIdx != 1)
7225 return UnableToLegalize;
7226
7227 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7228
7229 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7230 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7231 // NarrowSize.
7232 if (SizeOp1 % NarrowSize != 0)
7233 return UnableToLegalize;
7234 int NumParts = SizeOp1 / NarrowSize;
7235
7236 SmallVector<Register, 2> SrcRegs, DstRegs;
7237 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7238 MIRBuilder, MRI);
7239
7240 Register OpReg = MI.getOperand(0).getReg();
7241 uint64_t OpStart = MI.getOperand(2).getImm();
7242 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7243 for (int i = 0; i < NumParts; ++i) {
7244 unsigned SrcStart = i * NarrowSize;
7245
7246 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7247 // No part of the extract uses this subregister, ignore it.
7248 continue;
7249 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7250 // The entire subregister is extracted, forward the value.
7251 DstRegs.push_back(SrcRegs[i]);
7252 continue;
7253 }
7254
7255 // OpSegStart is where this destination segment would start in OpReg if it
7256 // extended infinitely in both directions.
7257 int64_t ExtractOffset;
7258 uint64_t SegSize;
7259 if (OpStart < SrcStart) {
7260 ExtractOffset = 0;
7261 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7262 } else {
7263 ExtractOffset = OpStart - SrcStart;
7264 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7265 }
7266
7267 Register SegReg = SrcRegs[i];
7268 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7269 // A genuine extract is needed.
7270 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7271 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7272 }
7273
7274 DstRegs.push_back(SegReg);
7275 }
7276
7277 Register DstReg = MI.getOperand(0).getReg();
7278 if (MRI.getType(DstReg).isVector())
7279 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7280 else if (DstRegs.size() > 1)
7281 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7282 else
7283 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7284 MI.eraseFromParent();
7285 return Legalized;
7286}
7287
7290 LLT NarrowTy) {
7291 // FIXME: Don't know how to handle secondary types yet.
7292 if (TypeIdx != 0)
7293 return UnableToLegalize;
7294
7295 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7296 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7297 LLT LeftoverTy;
7298 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7299 LeftoverRegs, MIRBuilder, MRI);
7300
7301 SrcRegs.append(LeftoverRegs);
7302
7303 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7304 Register OpReg = MI.getOperand(2).getReg();
7305 uint64_t OpStart = MI.getOperand(3).getImm();
7306 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7307 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7308 unsigned DstStart = I * NarrowSize;
7309
7310 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7311 // The entire subregister is defined by this insert, forward the new
7312 // value.
7313 DstRegs.push_back(OpReg);
7314 continue;
7315 }
7316
7317 Register SrcReg = SrcRegs[I];
7318 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7319 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7320 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7321 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7322 }
7323
7324 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7325 // No part of the insert affects this subregister, forward the original.
7326 DstRegs.push_back(SrcReg);
7327 continue;
7328 }
7329
7330 // OpSegStart is where this destination segment would start in OpReg if it
7331 // extended infinitely in both directions.
7332 int64_t ExtractOffset, InsertOffset;
7333 uint64_t SegSize;
7334 if (OpStart < DstStart) {
7335 InsertOffset = 0;
7336 ExtractOffset = DstStart - OpStart;
7337 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7338 } else {
7339 InsertOffset = OpStart - DstStart;
7340 ExtractOffset = 0;
7341 SegSize =
7342 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7343 }
7344
7345 Register SegReg = OpReg;
7346 if (ExtractOffset != 0 || SegSize != OpSize) {
7347 // A genuine extract is needed.
7348 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7349 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7350 }
7351
7352 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7353 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7354 DstRegs.push_back(DstReg);
7355 }
7356
7357 uint64_t WideSize = DstRegs.size() * NarrowSize;
7358 Register DstReg = MI.getOperand(0).getReg();
7359 if (WideSize > RegTy.getSizeInBits()) {
7360 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7361 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7362 MIRBuilder.buildTrunc(DstReg, MergeReg);
7363 } else
7364 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7365
7366 MI.eraseFromParent();
7367 return Legalized;
7368}
7369
7372 LLT NarrowTy) {
7373 Register DstReg = MI.getOperand(0).getReg();
7374 LLT DstTy = MRI.getType(DstReg);
7375
7376 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7377
7378 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7379 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7380 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7381 LLT LeftoverTy;
7382 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7383 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7384 return UnableToLegalize;
7385
7386 LLT Unused;
7387 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7388 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7389 llvm_unreachable("inconsistent extractParts result");
7390
7391 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7392 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7393 {Src0Regs[I], Src1Regs[I]});
7394 DstRegs.push_back(Inst.getReg(0));
7395 }
7396
7397 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7398 auto Inst = MIRBuilder.buildInstr(
7399 MI.getOpcode(),
7400 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7401 DstLeftoverRegs.push_back(Inst.getReg(0));
7402 }
7403
7404 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7405 LeftoverTy, DstLeftoverRegs);
7406
7407 MI.eraseFromParent();
7408 return Legalized;
7409}
7410
7413 LLT NarrowTy) {
7414 if (TypeIdx != 0)
7415 return UnableToLegalize;
7416
7417 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7418
7419 LLT DstTy = MRI.getType(DstReg);
7420 if (DstTy.isVector())
7421 return UnableToLegalize;
7422
7424 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7425 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7426 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7427
7428 MI.eraseFromParent();
7429 return Legalized;
7430}
7431
7434 LLT NarrowTy) {
7435 if (TypeIdx != 0)
7436 return UnableToLegalize;
7437
7438 Register CondReg = MI.getOperand(1).getReg();
7439 LLT CondTy = MRI.getType(CondReg);
7440 if (CondTy.isVector()) // TODO: Handle vselect
7441 return UnableToLegalize;
7442
7443 Register DstReg = MI.getOperand(0).getReg();
7444 LLT DstTy = MRI.getType(DstReg);
7445
7446 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7447 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7448 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7449 LLT LeftoverTy;
7450 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7451 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7452 return UnableToLegalize;
7453
7454 LLT Unused;
7455 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7456 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7457 llvm_unreachable("inconsistent extractParts result");
7458
7459 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7460 auto Select = MIRBuilder.buildSelect(NarrowTy,
7461 CondReg, Src1Regs[I], Src2Regs[I]);
7462 DstRegs.push_back(Select.getReg(0));
7463 }
7464
7465 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7466 auto Select = MIRBuilder.buildSelect(
7467 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7468 DstLeftoverRegs.push_back(Select.getReg(0));
7469 }
7470
7471 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7472 LeftoverTy, DstLeftoverRegs);
7473
7474 MI.eraseFromParent();
7475 return Legalized;
7476}
7477
7480 LLT NarrowTy) {
7481 if (TypeIdx != 1)
7482 return UnableToLegalize;
7483
7484 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7485 unsigned NarrowSize = NarrowTy.getSizeInBits();
7486
7487 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7488 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7489
7491 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7492 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7493 auto C_0 = B.buildConstant(NarrowTy, 0);
7494 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7495 UnmergeSrc.getReg(1), C_0);
7496 auto LoCTLZ = IsUndef ?
7497 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7498 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7499 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7500 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7501 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7502 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7503
7504 MI.eraseFromParent();
7505 return Legalized;
7506 }
7507
7508 return UnableToLegalize;
7509}
7510
7513 LLT NarrowTy) {
7514 if (TypeIdx != 1)
7515 return UnableToLegalize;
7516
7517 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7518 unsigned NarrowSize = NarrowTy.getSizeInBits();
7519
7520 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7521 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7522
7524 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7525 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7526 auto C_0 = B.buildConstant(NarrowTy, 0);
7527 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7528 UnmergeSrc.getReg(0), C_0);
7529 auto HiCTTZ = IsUndef ?
7530 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7531 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7532 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7533 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7534 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7535 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7536
7537 MI.eraseFromParent();
7538 return Legalized;
7539 }
7540
7541 return UnableToLegalize;
7542}
7543
7546 LLT NarrowTy) {
7547 if (TypeIdx != 1)
7548 return UnableToLegalize;
7549
7550 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7551 unsigned NarrowSize = NarrowTy.getSizeInBits();
7552
7553 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7554 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7555
7556 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7557 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7558 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7559
7560 MI.eraseFromParent();
7561 return Legalized;
7562 }
7563
7564 return UnableToLegalize;
7565}
7566
7569 LLT NarrowTy) {
7570 if (TypeIdx != 1)
7571 return UnableToLegalize;
7572
7574 Register ExpReg = MI.getOperand(2).getReg();
7575 LLT ExpTy = MRI.getType(ExpReg);
7576
7577 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7578
7579 // Clamp the exponent to the range of the target type.
7580 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7581 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7582 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7583 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7584
7585 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7586 Observer.changingInstr(MI);
7587 MI.getOperand(2).setReg(Trunc.getReg(0));
7588 Observer.changedInstr(MI);
7589 return Legalized;
7590}
7591
7594 unsigned Opc = MI.getOpcode();
7595 const auto &TII = MIRBuilder.getTII();
7596 auto isSupported = [this](const LegalityQuery &Q) {
7597 auto QAction = LI.getAction(Q).Action;
7598 return QAction == Legal || QAction == Libcall || QAction == Custom;
7599 };
7600 switch (Opc) {
7601 default:
7602 return UnableToLegalize;
7603 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7604 // This trivially expands to CTLZ.
7605 Observer.changingInstr(MI);
7606 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7607 Observer.changedInstr(MI);
7608 return Legalized;
7609 }
7610 case TargetOpcode::G_CTLZ: {
7611 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7612 unsigned Len = SrcTy.getScalarSizeInBits();
7613
7614 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7615 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7616 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7617 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7618 auto ICmp = MIRBuilder.buildICmp(
7619 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7620 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7621 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7622 MI.eraseFromParent();
7623 return Legalized;
7624 }
7625 // for now, we do this:
7626 // NewLen = NextPowerOf2(Len);
7627 // x = x | (x >> 1);
7628 // x = x | (x >> 2);
7629 // ...
7630 // x = x | (x >>16);
7631 // x = x | (x >>32); // for 64-bit input
7632 // Upto NewLen/2
7633 // return Len - popcount(x);
7634 //
7635 // Ref: "Hacker's Delight" by Henry Warren
7636 Register Op = SrcReg;
7637 unsigned NewLen = PowerOf2Ceil(Len);
7638 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7639 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7640 auto MIBOp = MIRBuilder.buildOr(
7641 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7642 Op = MIBOp.getReg(0);
7643 }
7644 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7645 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7646 MIBPop);
7647 MI.eraseFromParent();
7648 return Legalized;
7649 }
7650 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7651 // This trivially expands to CTTZ.
7652 Observer.changingInstr(MI);
7653 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7654 Observer.changedInstr(MI);
7655 return Legalized;
7656 }
7657 case TargetOpcode::G_CTTZ: {
7658 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7659
7660 unsigned Len = SrcTy.getScalarSizeInBits();
7661 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7662 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7663 // zero.
7664 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7665 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7666 auto ICmp = MIRBuilder.buildICmp(
7667 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7668 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7669 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7670 MI.eraseFromParent();
7671 return Legalized;
7672 }
7673 // for now, we use: { return popcount(~x & (x - 1)); }
7674 // unless the target has ctlz but not ctpop, in which case we use:
7675 // { return 32 - nlz(~x & (x-1)); }
7676 // Ref: "Hacker's Delight" by Henry Warren
7677 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7678 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7679 auto MIBTmp = MIRBuilder.buildAnd(
7680 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7681 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7682 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7683 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7684 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7685 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7686 MI.eraseFromParent();
7687 return Legalized;
7688 }
7689 Observer.changingInstr(MI);
7690 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7691 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7692 Observer.changedInstr(MI);
7693 return Legalized;
7694 }
7695 case TargetOpcode::G_CTPOP: {
7696 Register SrcReg = MI.getOperand(1).getReg();
7697 LLT Ty = MRI.getType(SrcReg);
7698 unsigned Size = Ty.getScalarSizeInBits();
7700
7701 // Bail out on irregular type lengths.
7702 if (Size > 128 || Size % 8 != 0)
7703 return UnableToLegalize;
7704
7705 // Count set bits in blocks of 2 bits. Default approach would be
7706 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7707 // We use following formula instead:
7708 // B2Count = val - { (val >> 1) & 0x55555555 }
7709 // since it gives same result in blocks of 2 with one instruction less.
7710 auto C_1 = B.buildConstant(Ty, 1);
7711 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7712 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7713 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7714 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7715 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7716
7717 // In order to get count in blocks of 4 add values from adjacent block of 2.
7718 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7719 auto C_2 = B.buildConstant(Ty, 2);
7720 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7721 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7722 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7723 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7724 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7725 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7726
7727 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7728 // addition since count value sits in range {0,...,8} and 4 bits are enough
7729 // to hold such binary values. After addition high 4 bits still hold count
7730 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7731 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7732 auto C_4 = B.buildConstant(Ty, 4);
7733 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7734 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7735 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7736 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7737 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7738
7739 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7740 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7741 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7742 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7743
7744 // Shift count result from 8 high bits to low bits.
7745 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7746
7747 auto IsMulSupported = [this](const LLT Ty) {
7748 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7749 return Action == Legal || Action == WidenScalar || Action == Custom;
7750 };
7751 if (IsMulSupported(Ty)) {
7752 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7753 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7754 } else {
7755 auto ResTmp = B8Count;
7756 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7757 auto ShiftC = B.buildConstant(Ty, Shift);
7758 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7759 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7760 }
7761 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7762 }
7763 MI.eraseFromParent();
7764 return Legalized;
7765 }
7766 }
7767}
7768
7769// Check that (every element of) Reg is undef or not an exact multiple of BW.
7771 Register Reg, unsigned BW) {
7772 return matchUnaryPredicate(
7773 MRI, Reg,
7774 [=](const Constant *C) {
7775 // Null constant here means an undef.
7777 return !CI || CI->getValue().urem(BW) != 0;
7778 },
7779 /*AllowUndefs*/ true);
7780}
7781
7784 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7785 LLT Ty = MRI.getType(Dst);
7786 LLT ShTy = MRI.getType(Z);
7787
7788 unsigned BW = Ty.getScalarSizeInBits();
7789
7790 if (!isPowerOf2_32(BW))
7791 return UnableToLegalize;
7792
7793 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7794 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7795
7796 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7797 // fshl X, Y, Z -> fshr X, Y, -Z
7798 // fshr X, Y, Z -> fshl X, Y, -Z
7799 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7800 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7801 } else {
7802 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7803 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7804 auto One = MIRBuilder.buildConstant(ShTy, 1);
7805 if (IsFSHL) {
7806 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7807 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7808 } else {
7809 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7810 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7811 }
7812
7813 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7814 }
7815
7816 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7817 MI.eraseFromParent();
7818 return Legalized;
7819}
7820
7823 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7824 LLT Ty = MRI.getType(Dst);
7825 LLT ShTy = MRI.getType(Z);
7826
7827 const unsigned BW = Ty.getScalarSizeInBits();
7828 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7829
7830 Register ShX, ShY;
7831 Register ShAmt, InvShAmt;
7832
7833 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7834 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7835 // fshl: X << C | Y >> (BW - C)
7836 // fshr: X << (BW - C) | Y >> C
7837 // where C = Z % BW is not zero
7838 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7839 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7840 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7841 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7842 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7843 } else {
7844 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7845 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7846 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7847 if (isPowerOf2_32(BW)) {
7848 // Z % BW -> Z & (BW - 1)
7849 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7850 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7851 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7852 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7853 } else {
7854 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7855 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7856 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7857 }
7858
7859 auto One = MIRBuilder.buildConstant(ShTy, 1);
7860 if (IsFSHL) {
7861 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7862 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7863 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7864 } else {
7865 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7866 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7867 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7868 }
7869 }
7870
7871 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7872 MI.eraseFromParent();
7873 return Legalized;
7874}
7875
7878 // These operations approximately do the following (while avoiding undefined
7879 // shifts by BW):
7880 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7881 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7882 Register Dst = MI.getOperand(0).getReg();
7883 LLT Ty = MRI.getType(Dst);
7884 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7885
7886 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7887 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7888
7889 // TODO: Use smarter heuristic that accounts for vector legalization.
7890 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7891 return lowerFunnelShiftAsShifts(MI);
7892
7893 // This only works for powers of 2, fallback to shifts if it fails.
7894 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7895 if (Result == UnableToLegalize)
7896 return lowerFunnelShiftAsShifts(MI);
7897 return Result;
7898}
7899
7901 auto [Dst, Src] = MI.getFirst2Regs();
7902 LLT DstTy = MRI.getType(Dst);
7903 LLT SrcTy = MRI.getType(Src);
7904
7905 uint32_t DstTySize = DstTy.getSizeInBits();
7906 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7907 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7908
7909 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7910 !isPowerOf2_32(SrcTyScalarSize))
7911 return UnableToLegalize;
7912
7913 // The step between extend is too large, split it by creating an intermediate
7914 // extend instruction
7915 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7916 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7917 // If the destination type is illegal, split it into multiple statements
7918 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7919 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7920 // Unmerge the vector
7921 LLT EltTy = MidTy.changeElementCount(
7923 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7924
7925 // ZExt the vectors
7926 LLT ZExtResTy = DstTy.changeElementCount(
7928 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7929 {UnmergeSrc.getReg(0)});
7930 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7931 {UnmergeSrc.getReg(1)});
7932
7933 // Merge the ending vectors
7934 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7935
7936 MI.eraseFromParent();
7937 return Legalized;
7938 }
7939 return UnableToLegalize;
7940}
7941
7943 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7944 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7945 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7946 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7947 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7948 // %lo16(<4 x s16>) = G_TRUNC %inlo
7949 // %hi16(<4 x s16>) = G_TRUNC %inhi
7950 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7951 // %res(<8 x s8>) = G_TRUNC %in16
7952
7953 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7954
7955 Register DstReg = MI.getOperand(0).getReg();
7956 Register SrcReg = MI.getOperand(1).getReg();
7957 LLT DstTy = MRI.getType(DstReg);
7958 LLT SrcTy = MRI.getType(SrcReg);
7959
7960 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7962 isPowerOf2_32(SrcTy.getNumElements()) &&
7963 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7964 // Split input type.
7965 LLT SplitSrcTy = SrcTy.changeElementCount(
7966 SrcTy.getElementCount().divideCoefficientBy(2));
7967
7968 // First, split the source into two smaller vectors.
7969 SmallVector<Register, 2> SplitSrcs;
7970 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7971
7972 // Truncate the splits into intermediate narrower elements.
7973 LLT InterTy;
7974 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7975 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7976 else
7977 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7978 for (Register &Src : SplitSrcs)
7979 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7980
7981 // Combine the new truncates into one vector
7982 auto Merge = MIRBuilder.buildMergeLikeInstr(
7983 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7984
7985 // Truncate the new vector to the final result type
7986 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7987 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7988 else
7989 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7990
7991 MI.eraseFromParent();
7992
7993 return Legalized;
7994 }
7995 return UnableToLegalize;
7996}
7997
8000 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8001 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8002 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8003 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8004 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8005 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8006 MI.eraseFromParent();
8007 return Legalized;
8008}
8009
8011 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8012
8013 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8014 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8015
8016 MIRBuilder.setInstrAndDebugLoc(MI);
8017
8018 // If a rotate in the other direction is supported, use it.
8019 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8020 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8021 isPowerOf2_32(EltSizeInBits))
8022 return lowerRotateWithReverseRotate(MI);
8023
8024 // If a funnel shift is supported, use it.
8025 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8026 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8027 bool IsFShLegal = false;
8028 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8029 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8030 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8031 Register R3) {
8032 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8033 MI.eraseFromParent();
8034 return Legalized;
8035 };
8036 // If a funnel shift in the other direction is supported, use it.
8037 if (IsFShLegal) {
8038 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8039 } else if (isPowerOf2_32(EltSizeInBits)) {
8040 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8041 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8042 }
8043 }
8044
8045 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8046 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8047 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8048 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8049 Register ShVal;
8050 Register RevShiftVal;
8051 if (isPowerOf2_32(EltSizeInBits)) {
8052 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8053 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8054 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8055 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8056 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8057 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8058 RevShiftVal =
8059 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8060 } else {
8061 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8062 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8063 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8064 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8065 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8066 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8067 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8068 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8069 RevShiftVal =
8070 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8071 }
8072 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8073 MI.eraseFromParent();
8074 return Legalized;
8075}
8076
8077// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8078// representation.
8081 auto [Dst, Src] = MI.getFirst2Regs();
8082 const LLT S64 = LLT::scalar(64);
8083 const LLT S32 = LLT::scalar(32);
8084 const LLT S1 = LLT::scalar(1);
8085
8086 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8087
8088 // unsigned cul2f(ulong u) {
8089 // uint lz = clz(u);
8090 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8091 // u = (u << lz) & 0x7fffffffffffffffUL;
8092 // ulong t = u & 0xffffffffffUL;
8093 // uint v = (e << 23) | (uint)(u >> 40);
8094 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8095 // return as_float(v + r);
8096 // }
8097
8098 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8099 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8100
8101 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8102
8103 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8104 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8105
8106 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8107 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8108
8109 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8110 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8111
8112 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8113
8114 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8115 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8116
8117 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8118 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8119 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8120
8121 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8122 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8123 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8124 auto One = MIRBuilder.buildConstant(S32, 1);
8125
8126 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8127 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8128 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8129 MIRBuilder.buildAdd(Dst, V, R);
8130
8131 MI.eraseFromParent();
8132 return Legalized;
8133}
8134
8135// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8136// operations and G_SITOFP
8139 auto [Dst, Src] = MI.getFirst2Regs();
8140 const LLT S64 = LLT::scalar(64);
8141 const LLT S32 = LLT::scalar(32);
8142 const LLT S1 = LLT::scalar(1);
8143
8144 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8145
8146 // For i64 < INT_MAX we simply reuse SITOFP.
8147 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8148 // saved before division, convert to float by SITOFP, multiply the result
8149 // by 2.
8150 auto One = MIRBuilder.buildConstant(S64, 1);
8151 auto Zero = MIRBuilder.buildConstant(S64, 0);
8152 // Result if Src < INT_MAX
8153 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8154 // Result if Src >= INT_MAX
8155 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8156 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8157 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8158 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8159 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8160 // Check if the original value is larger than INT_MAX by comparing with
8161 // zero to pick one of the two conversions.
8162 auto IsLarge =
8163 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8164 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8165
8166 MI.eraseFromParent();
8167 return Legalized;
8168}
8169
8170// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8171// IEEE double representation.
8174 auto [Dst, Src] = MI.getFirst2Regs();
8175 const LLT S64 = LLT::scalar(64);
8176 const LLT S32 = LLT::scalar(32);
8177
8178 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8179
8180 // We create double value from 32 bit parts with 32 exponent difference.
8181 // Note that + and - are float operations that adjust the implicit leading
8182 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8183 //
8184 // X = 2^52 * 1.0...LowBits
8185 // Y = 2^84 * 1.0...HighBits
8186 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8187 // = - 2^52 * 1.0...HighBits
8188 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8189 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8190 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8191 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8192 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8193 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8194
8195 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8196 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8197 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8198 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8199 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8200 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8201 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8202
8203 MI.eraseFromParent();
8204 return Legalized;
8205}
8206
8207/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8208/// convert fpround f64->f16 without double-rounding, so we manually perform the
8209/// lowering here where we know it is valid.
8212 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8213 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8214 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8215 : MIRBuilder.buildSITOFP(SrcTy, Src);
8216 LLT S32Ty = SrcTy.changeElementSize(32);
8217 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8218 MIRBuilder.buildFPTrunc(Dst, M2);
8219 MI.eraseFromParent();
8221}
8222
8224 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8225
8226 if (SrcTy == LLT::scalar(1)) {
8227 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8228 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8229 MIRBuilder.buildSelect(Dst, Src, True, False);
8230 MI.eraseFromParent();
8231 return Legalized;
8232 }
8233
8234 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8235 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8236
8237 if (SrcTy != LLT::scalar(64))
8238 return UnableToLegalize;
8239
8240 if (DstTy == LLT::scalar(32))
8241 // TODO: SelectionDAG has several alternative expansions to port which may
8242 // be more reasonable depending on the available instructions. We also need
8243 // a more advanced mechanism to choose an optimal version depending on
8244 // target features such as sitofp or CTLZ availability.
8246
8247 if (DstTy == LLT::scalar(64))
8249
8250 return UnableToLegalize;
8251}
8252
8254 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8255
8256 const LLT S64 = LLT::scalar(64);
8257 const LLT S32 = LLT::scalar(32);
8258 const LLT S1 = LLT::scalar(1);
8259
8260 if (SrcTy == S1) {
8261 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8262 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8263 MIRBuilder.buildSelect(Dst, Src, True, False);
8264 MI.eraseFromParent();
8265 return Legalized;
8266 }
8267
8268 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8269 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8270
8271 if (SrcTy != S64)
8272 return UnableToLegalize;
8273
8274 if (DstTy == S32) {
8275 // signed cl2f(long l) {
8276 // long s = l >> 63;
8277 // float r = cul2f((l + s) ^ s);
8278 // return s ? -r : r;
8279 // }
8280 Register L = Src;
8281 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8282 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8283
8284 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8285 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8286 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8287
8288 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8289 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8290 MIRBuilder.buildConstant(S64, 0));
8291 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8292 MI.eraseFromParent();
8293 return Legalized;
8294 }
8295
8296 return UnableToLegalize;
8297}
8298
8300 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8301 const LLT S64 = LLT::scalar(64);
8302 const LLT S32 = LLT::scalar(32);
8303
8304 if (SrcTy != S64 && SrcTy != S32)
8305 return UnableToLegalize;
8306 if (DstTy != S32 && DstTy != S64)
8307 return UnableToLegalize;
8308
8309 // FPTOSI gives same result as FPTOUI for positive signed integers.
8310 // FPTOUI needs to deal with fp values that convert to unsigned integers
8311 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8312
8313 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8314 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8316 APInt::getZero(SrcTy.getSizeInBits()));
8317 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8318
8319 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8320
8321 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8322 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8323 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8324 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8325 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8326 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8327 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8328
8329 const LLT S1 = LLT::scalar(1);
8330
8331 MachineInstrBuilder FCMP =
8332 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8333 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8334
8335 MI.eraseFromParent();
8336 return Legalized;
8337}
8338
8340 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8341 const LLT S64 = LLT::scalar(64);
8342 const LLT S32 = LLT::scalar(32);
8343
8344 // FIXME: Only f32 to i64 conversions are supported.
8345 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8346 return UnableToLegalize;
8347
8348 // Expand f32 -> i64 conversion
8349 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8350 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8351
8352 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8353
8354 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8355 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8356
8357 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8358 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8359
8360 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8361 APInt::getSignMask(SrcEltBits));
8362 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8363 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8364 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8365 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8366
8367 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8368 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8369 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8370
8371 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8372 R = MIRBuilder.buildZExt(DstTy, R);
8373
8374 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8375 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8376 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8377 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8378
8379 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8380 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8381
8382 const LLT S1 = LLT::scalar(1);
8383 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8384 S1, Exponent, ExponentLoBit);
8385
8386 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8387
8388 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8389 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8390
8391 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8392
8393 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8394 S1, Exponent, ZeroSrcTy);
8395
8396 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8397 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8398
8399 MI.eraseFromParent();
8400 return Legalized;
8401}
8402
8405 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8406
8407 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8408 unsigned SatWidth = DstTy.getScalarSizeInBits();
8409
8410 // Determine minimum and maximum integer values and their corresponding
8411 // floating-point values.
8412 APInt MinInt, MaxInt;
8413 if (IsSigned) {
8414 MinInt = APInt::getSignedMinValue(SatWidth);
8415 MaxInt = APInt::getSignedMaxValue(SatWidth);
8416 } else {
8417 MinInt = APInt::getMinValue(SatWidth);
8418 MaxInt = APInt::getMaxValue(SatWidth);
8419 }
8420
8421 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8422 APFloat MinFloat(Semantics);
8423 APFloat MaxFloat(Semantics);
8424
8425 APFloat::opStatus MinStatus =
8426 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8427 APFloat::opStatus MaxStatus =
8428 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8429 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8430 !(MaxStatus & APFloat::opStatus::opInexact);
8431
8432 // If the integer bounds are exactly representable as floats, emit a
8433 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8434 // and selects.
8435 if (AreExactFloatBounds) {
8436 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8437 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8438 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8439 SrcTy.changeElementSize(1), Src, MaxC);
8440 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8441 // Clamp by MaxFloat from above. NaN cannot occur.
8442 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8443 auto MinP =
8444 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8446 auto Min =
8447 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8448 // Convert clamped value to integer. In the unsigned case we're done,
8449 // because we mapped NaN to MinFloat, which will cast to zero.
8450 if (!IsSigned) {
8451 MIRBuilder.buildFPTOUI(Dst, Min);
8452 MI.eraseFromParent();
8453 return Legalized;
8454 }
8455
8456 // Otherwise, select 0 if Src is NaN.
8457 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8458 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8459 DstTy.changeElementSize(1), Src, Src);
8460 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8461 FpToInt);
8462 MI.eraseFromParent();
8463 return Legalized;
8464 }
8465
8466 // Result of direct conversion. The assumption here is that the operation is
8467 // non-trapping and it's fine to apply it to an out-of-range value if we
8468 // select it away later.
8469 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8470 : MIRBuilder.buildFPTOUI(DstTy, Src);
8471
8472 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8473 // MinInt if Src is NaN.
8474 auto ULT =
8475 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8476 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8477 auto Max = MIRBuilder.buildSelect(
8478 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8479 // If Src OGT MaxFloat, select MaxInt.
8480 auto OGT =
8481 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8482 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8483
8484 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8485 // is already zero.
8486 if (!IsSigned) {
8487 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8488 Max);
8489 MI.eraseFromParent();
8490 return Legalized;
8491 }
8492
8493 // Otherwise, select 0 if Src is NaN.
8494 auto Min = MIRBuilder.buildSelect(
8495 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8496 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8497 DstTy.changeElementSize(1), Src, Src);
8498 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8499 MI.eraseFromParent();
8500 return Legalized;
8501}
8502
8503// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8506 const LLT S1 = LLT::scalar(1);
8507 const LLT S32 = LLT::scalar(32);
8508
8509 auto [Dst, Src] = MI.getFirst2Regs();
8510 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8511 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8512
8513 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8514 return UnableToLegalize;
8515
8516 if (MI.getFlag(MachineInstr::FmAfn)) {
8517 unsigned Flags = MI.getFlags();
8518 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8519 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8520 MI.eraseFromParent();
8521 return Legalized;
8522 }
8523
8524 const unsigned ExpMask = 0x7ff;
8525 const unsigned ExpBiasf64 = 1023;
8526 const unsigned ExpBiasf16 = 15;
8527
8528 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8529 Register U = Unmerge.getReg(0);
8530 Register UH = Unmerge.getReg(1);
8531
8532 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8533 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8534
8535 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8536 // add the f16 bias (15) to get the biased exponent for the f16 format.
8537 E = MIRBuilder.buildAdd(
8538 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8539
8540 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8541 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8542
8543 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8544 MIRBuilder.buildConstant(S32, 0x1ff));
8545 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8546
8547 auto Zero = MIRBuilder.buildConstant(S32, 0);
8548 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8549 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8550 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8551
8552 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8553 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8554 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8555 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8556
8557 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8558 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8559
8560 // N = M | (E << 12);
8561 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8562 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8563
8564 // B = clamp(1-E, 0, 13);
8565 auto One = MIRBuilder.buildConstant(S32, 1);
8566 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8567 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8568 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8569
8570 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8571 MIRBuilder.buildConstant(S32, 0x1000));
8572
8573 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8574 auto D0 = MIRBuilder.buildShl(S32, D, B);
8575
8576 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8577 D0, SigSetHigh);
8578 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8579 D = MIRBuilder.buildOr(S32, D, D1);
8580
8581 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8582 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8583
8584 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8585 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8586
8587 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8588 MIRBuilder.buildConstant(S32, 3));
8589 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8590
8591 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8592 MIRBuilder.buildConstant(S32, 5));
8593 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8594
8595 V1 = MIRBuilder.buildOr(S32, V0, V1);
8596 V = MIRBuilder.buildAdd(S32, V, V1);
8597
8598 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8599 E, MIRBuilder.buildConstant(S32, 30));
8600 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8601 MIRBuilder.buildConstant(S32, 0x7c00), V);
8602
8603 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8604 E, MIRBuilder.buildConstant(S32, 1039));
8605 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8606
8607 // Extract the sign bit.
8608 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8609 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8610
8611 // Insert the sign bit
8612 V = MIRBuilder.buildOr(S32, Sign, V);
8613
8614 MIRBuilder.buildTrunc(Dst, V);
8615 MI.eraseFromParent();
8616 return Legalized;
8617}
8618
8621 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8622 const LLT S64 = LLT::scalar(64);
8623 const LLT S16 = LLT::scalar(16);
8624
8625 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8627
8628 return UnableToLegalize;
8629}
8630
8632 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8633 LLT Ty = MRI.getType(Dst);
8634
8635 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8636 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8637 MI.eraseFromParent();
8638 return Legalized;
8639}
8640
8642 switch (Opc) {
8643 case TargetOpcode::G_SMIN:
8644 return CmpInst::ICMP_SLT;
8645 case TargetOpcode::G_SMAX:
8646 return CmpInst::ICMP_SGT;
8647 case TargetOpcode::G_UMIN:
8648 return CmpInst::ICMP_ULT;
8649 case TargetOpcode::G_UMAX:
8650 return CmpInst::ICMP_UGT;
8651 default:
8652 llvm_unreachable("not in integer min/max");
8653 }
8654}
8655
8657 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8658
8659 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8660 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8661
8662 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8663 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8664
8665 MI.eraseFromParent();
8666 return Legalized;
8667}
8668
8671 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8672
8673 Register Dst = Cmp->getReg(0);
8674 LLT DstTy = MRI.getType(Dst);
8675 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8676 LLT CmpTy = DstTy.changeElementSize(1);
8677
8678 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8681 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8684
8685 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8686 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8687 Cmp->getRHSReg());
8688 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8689 Cmp->getRHSReg());
8690
8691 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8692 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8693 if (TLI.preferSelectsOverBooleanArithmetic(
8694 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8696 auto One = MIRBuilder.buildConstant(DstTy, 1);
8697 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8698
8699 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8700 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8701 } else {
8703 std::swap(IsGT, IsLT);
8704 // Extend boolean results to DstTy, which is at least i2, before subtracting
8705 // them.
8706 unsigned BoolExtOp =
8707 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8708 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8709 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8710 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8711 }
8712
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8719 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8720 const int Src0Size = Src0Ty.getScalarSizeInBits();
8721 const int Src1Size = Src1Ty.getScalarSizeInBits();
8722
8723 auto SignBitMask = MIRBuilder.buildConstant(
8724 Src0Ty, APInt::getSignMask(Src0Size));
8725
8726 auto NotSignBitMask = MIRBuilder.buildConstant(
8727 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8728
8729 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8730 Register And1;
8731 if (Src0Ty == Src1Ty) {
8732 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8733 } else if (Src0Size > Src1Size) {
8734 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8735 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8736 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8737 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8738 } else {
8739 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8740 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8741 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8742 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8743 }
8744
8745 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8746 // constants are a nan and -0.0, but the final result should preserve
8747 // everything.
8748 unsigned Flags = MI.getFlags();
8749
8750 // We masked the sign bit and the not-sign bit, so these are disjoint.
8751 Flags |= MachineInstr::Disjoint;
8752
8753 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8754
8755 MI.eraseFromParent();
8756 return Legalized;
8757}
8758
8761 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8762 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8763 // depend on fminnum/fmaxnum.
8764
8765 unsigned NewOp;
8766 switch (MI.getOpcode()) {
8767 case TargetOpcode::G_FMINNUM:
8768 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8769 break;
8770 case TargetOpcode::G_FMINIMUMNUM:
8771 NewOp = TargetOpcode::G_FMINNUM;
8772 break;
8773 case TargetOpcode::G_FMAXNUM:
8774 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8775 break;
8776 case TargetOpcode::G_FMAXIMUMNUM:
8777 NewOp = TargetOpcode::G_FMAXNUM;
8778 break;
8779 default:
8780 llvm_unreachable("unexpected min/max opcode");
8781 }
8782
8783 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8784 LLT Ty = MRI.getType(Dst);
8785
8786 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8787 // Insert canonicalizes if it's possible we need to quiet to get correct
8788 // sNaN behavior.
8789
8790 // Note this must be done here, and not as an optimization combine in the
8791 // absence of a dedicate quiet-snan instruction as we're using an
8792 // omni-purpose G_FCANONICALIZE.
8793 if (!isKnownNeverSNaN(Src0, MRI))
8794 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8795
8796 if (!isKnownNeverSNaN(Src1, MRI))
8797 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8798 }
8799
8800 // If there are no nans, it's safe to simply replace this with the non-IEEE
8801 // version.
8802 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8803 MI.eraseFromParent();
8804 return Legalized;
8805}
8806
8809 unsigned Opc = MI.getOpcode();
8810 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8811 LLT Ty = MRI.getType(Dst);
8812 LLT CmpTy = Ty.changeElementSize(1);
8813
8814 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8815 unsigned OpcIeee =
8816 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8817 unsigned OpcNonIeee =
8818 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8819 bool MinMaxMustRespectOrderedZero = false;
8820 Register Res;
8821
8822 // IEEE variants don't need canonicalization
8823 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8824 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8825 MinMaxMustRespectOrderedZero = true;
8826 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8827 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8828 } else {
8829 auto Compare = MIRBuilder.buildFCmp(
8830 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8831 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8832 }
8833
8834 // Propagate any NaN of both operands
8835 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8836 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8837 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8838
8839 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8840 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8841 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8842 if (Ty.isVector())
8843 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8844
8845 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8846 }
8847
8848 // fminimum/fmaximum requires -0.0 less than +0.0
8849 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8850 GISelValueTracking VT(MIRBuilder.getMF());
8851 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8852 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8853
8854 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8855 const unsigned Flags = MI.getFlags();
8856 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8857 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8858
8859 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8860
8861 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8862 auto LHSSelect =
8863 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8864
8865 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8866 auto RHSSelect =
8867 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8868
8869 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8870 }
8871 }
8872
8873 MIRBuilder.buildCopy(Dst, Res);
8874 MI.eraseFromParent();
8875 return Legalized;
8876}
8877
8879 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8880 Register DstReg = MI.getOperand(0).getReg();
8881 LLT Ty = MRI.getType(DstReg);
8882 unsigned Flags = MI.getFlags();
8883
8884 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8885 Flags);
8886 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8887 MI.eraseFromParent();
8888 return Legalized;
8889}
8890
8893 auto [DstReg, X] = MI.getFirst2Regs();
8894 const unsigned Flags = MI.getFlags();
8895 const LLT Ty = MRI.getType(DstReg);
8896 const LLT CondTy = Ty.changeElementSize(1);
8897
8898 // round(x) =>
8899 // t = trunc(x);
8900 // d = fabs(x - t);
8901 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8902 // return t + o;
8903
8904 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8905
8906 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8907 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8908
8909 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8910 auto Cmp =
8911 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8912
8913 // Could emit G_UITOFP instead
8914 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8915 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8916 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8917 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8918
8919 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8920
8921 MI.eraseFromParent();
8922 return Legalized;
8923}
8924
8926 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8927 unsigned Flags = MI.getFlags();
8928 LLT Ty = MRI.getType(DstReg);
8929 const LLT CondTy = Ty.changeElementSize(1);
8930
8931 // result = trunc(src);
8932 // if (src < 0.0 && src != result)
8933 // result += -1.0.
8934
8935 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8936 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8937
8938 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8939 SrcReg, Zero, Flags);
8940 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8941 SrcReg, Trunc, Flags);
8942 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8943 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8944
8945 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8946 MI.eraseFromParent();
8947 return Legalized;
8948}
8949
8952 const unsigned NumOps = MI.getNumOperands();
8953 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8954 unsigned PartSize = Src0Ty.getSizeInBits();
8955
8956 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8957 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8958
8959 for (unsigned I = 2; I != NumOps; ++I) {
8960 const unsigned Offset = (I - 1) * PartSize;
8961
8962 Register SrcReg = MI.getOperand(I).getReg();
8963 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8964
8965 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8966 MRI.createGenericVirtualRegister(WideTy);
8967
8968 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8969 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8970 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8971 ResultReg = NextResult;
8972 }
8973
8974 if (DstTy.isPointer()) {
8975 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8976 DstTy.getAddressSpace())) {
8977 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8978 return UnableToLegalize;
8979 }
8980
8981 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8982 }
8983
8984 MI.eraseFromParent();
8985 return Legalized;
8986}
8987
8990 const unsigned NumDst = MI.getNumOperands() - 1;
8991 Register SrcReg = MI.getOperand(NumDst).getReg();
8992 Register Dst0Reg = MI.getOperand(0).getReg();
8993 LLT DstTy = MRI.getType(Dst0Reg);
8994 if (DstTy.isPointer())
8995 return UnableToLegalize; // TODO
8996
8997 SrcReg = coerceToScalar(SrcReg);
8998 if (!SrcReg)
8999 return UnableToLegalize;
9000
9001 // Expand scalarizing unmerge as bitcast to integer and shift.
9002 LLT IntTy = MRI.getType(SrcReg);
9003
9004 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9005
9006 const unsigned DstSize = DstTy.getSizeInBits();
9007 unsigned Offset = DstSize;
9008 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9009 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9010 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9011 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9012 }
9013
9014 MI.eraseFromParent();
9015 return Legalized;
9016}
9017
9018/// Lower a vector extract or insert by writing the vector to a stack temporary
9019/// and reloading the element or vector.
9020///
9021/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9022/// =>
9023/// %stack_temp = G_FRAME_INDEX
9024/// G_STORE %vec, %stack_temp
9025/// %idx = clamp(%idx, %vec.getNumElements())
9026/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9027/// %dst = G_LOAD %element_ptr
9030 Register DstReg = MI.getOperand(0).getReg();
9031 Register SrcVec = MI.getOperand(1).getReg();
9032 Register InsertVal;
9033 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9034 InsertVal = MI.getOperand(2).getReg();
9035
9036 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9037
9038 LLT VecTy = MRI.getType(SrcVec);
9039 LLT EltTy = VecTy.getElementType();
9040 unsigned NumElts = VecTy.getNumElements();
9041
9042 int64_t IdxVal;
9043 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9045 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9046
9047 if (InsertVal) {
9048 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9049 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9050 } else {
9051 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9052 }
9053
9054 MI.eraseFromParent();
9055 return Legalized;
9056 }
9057
9058 if (!EltTy.isByteSized()) { // Not implemented.
9059 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9060 return UnableToLegalize;
9061 }
9062
9063 unsigned EltBytes = EltTy.getSizeInBytes();
9064 Align VecAlign = getStackTemporaryAlignment(VecTy);
9065 Align EltAlign;
9066
9067 MachinePointerInfo PtrInfo;
9068 auto StackTemp = createStackTemporary(
9069 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9070 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9071
9072 // Get the pointer to the element, and be sure not to hit undefined behavior
9073 // if the index is out of bounds.
9074 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9075
9076 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9077 int64_t Offset = IdxVal * EltBytes;
9078 PtrInfo = PtrInfo.getWithOffset(Offset);
9079 EltAlign = commonAlignment(VecAlign, Offset);
9080 } else {
9081 // We lose information with a variable offset.
9082 EltAlign = getStackTemporaryAlignment(EltTy);
9083 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9084 }
9085
9086 if (InsertVal) {
9087 // Write the inserted element
9088 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9089
9090 // Reload the whole vector.
9091 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9092 } else {
9093 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9094 }
9095
9096 MI.eraseFromParent();
9097 return Legalized;
9098}
9099
9102 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9103 MI.getFirst3RegLLTs();
9104 LLT IdxTy = LLT::scalar(32);
9105
9106 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9107 Register Undef;
9109 LLT EltTy = DstTy.getScalarType();
9110
9111 DenseMap<unsigned, Register> CachedExtract;
9112
9113 for (int Idx : Mask) {
9114 if (Idx < 0) {
9115 if (!Undef.isValid())
9116 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9117 BuildVec.push_back(Undef);
9118 continue;
9119 }
9120
9121 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9122
9123 int NumElts = Src0Ty.getNumElements();
9124 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9125 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9126 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9127 if (Inserted) {
9128 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9129 It->second =
9130 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9131 }
9132 BuildVec.push_back(It->second);
9133 }
9134
9135 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9136 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9137 MI.eraseFromParent();
9138 return Legalized;
9139}
9140
9143 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9144 MI.getFirst4RegLLTs();
9145
9146 if (VecTy.isScalableVector())
9147 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9148
9149 Align VecAlign = getStackTemporaryAlignment(VecTy);
9150 MachinePointerInfo PtrInfo;
9151 Register StackPtr =
9152 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9153 PtrInfo)
9154 .getReg(0);
9155 MachinePointerInfo ValPtrInfo =
9157
9158 LLT IdxTy = LLT::scalar(32);
9159 LLT ValTy = VecTy.getElementType();
9160 Align ValAlign = getStackTemporaryAlignment(ValTy);
9161
9162 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9163
9164 bool HasPassthru =
9165 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9166
9167 if (HasPassthru)
9168 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9169
9170 Register LastWriteVal;
9171 std::optional<APInt> PassthruSplatVal =
9172 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9173
9174 if (PassthruSplatVal.has_value()) {
9175 LastWriteVal =
9176 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9177 } else if (HasPassthru) {
9178 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9179 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9180 {LLT::scalar(32)}, {Popcount});
9181
9182 Register LastElmtPtr =
9183 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9184 LastWriteVal =
9185 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9186 .getReg(0);
9187 }
9188
9189 unsigned NumElmts = VecTy.getNumElements();
9190 for (unsigned I = 0; I < NumElmts; ++I) {
9191 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9192 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9193 Register ElmtPtr =
9194 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9195 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9196
9197 LLT MaskITy = MaskTy.getElementType();
9198 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9199 if (MaskITy.getSizeInBits() > 1)
9200 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9201
9202 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9203 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9204
9205 if (HasPassthru && I == NumElmts - 1) {
9206 auto EndOfVector =
9207 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9208 auto AllLanesSelected = MIRBuilder.buildICmp(
9209 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9210 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9211 {OutPos, EndOfVector});
9212 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9213
9214 LastWriteVal =
9215 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9216 .getReg(0);
9217 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9218 }
9219 }
9220
9221 // TODO: Use StackPtr's FrameIndex alignment.
9222 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9223
9224 MI.eraseFromParent();
9225 return Legalized;
9226}
9227
9229 Register AllocSize,
9230 Align Alignment,
9231 LLT PtrTy) {
9232 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9233
9234 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9235 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9236
9237 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9238 // have to generate an extra instruction to negate the alloc and then use
9239 // G_PTR_ADD to add the negative offset.
9240 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9241 if (Alignment > Align(1)) {
9242 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9243 AlignMask.negate();
9244 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9245 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9246 }
9247
9248 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9249}
9250
9253 const auto &MF = *MI.getMF();
9254 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9255 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9256 return UnableToLegalize;
9257
9258 Register Dst = MI.getOperand(0).getReg();
9259 Register AllocSize = MI.getOperand(1).getReg();
9260 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9261
9262 LLT PtrTy = MRI.getType(Dst);
9263 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9264 Register SPTmp =
9265 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9266
9267 MIRBuilder.buildCopy(SPReg, SPTmp);
9268 MIRBuilder.buildCopy(Dst, SPTmp);
9269
9270 MI.eraseFromParent();
9271 return Legalized;
9272}
9273
9276 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9277 if (!StackPtr)
9278 return UnableToLegalize;
9279
9280 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9281 MI.eraseFromParent();
9282 return Legalized;
9283}
9284
9287 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9288 if (!StackPtr)
9289 return UnableToLegalize;
9290
9291 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9292 MI.eraseFromParent();
9293 return Legalized;
9294}
9295
9298 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9299 unsigned Offset = MI.getOperand(2).getImm();
9300
9301 // Extract sub-vector or one element
9302 if (SrcTy.isVector()) {
9303 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9304 unsigned DstSize = DstTy.getSizeInBits();
9305
9306 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9307 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9308 // Unmerge and allow access to each Src element for the artifact combiner.
9309 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9310
9311 // Take element(s) we need to extract and copy it (merge them).
9312 SmallVector<Register, 8> SubVectorElts;
9313 for (unsigned Idx = Offset / SrcEltSize;
9314 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9315 SubVectorElts.push_back(Unmerge.getReg(Idx));
9316 }
9317 if (SubVectorElts.size() == 1)
9318 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9319 else
9320 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9321
9322 MI.eraseFromParent();
9323 return Legalized;
9324 }
9325 }
9326
9327 if (DstTy.isScalar() &&
9328 (SrcTy.isScalar() ||
9329 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9330 LLT SrcIntTy = SrcTy;
9331 if (!SrcTy.isScalar()) {
9332 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9333 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9334 }
9335
9336 if (Offset == 0)
9337 MIRBuilder.buildTrunc(DstReg, SrcReg);
9338 else {
9339 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9340 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9341 MIRBuilder.buildTrunc(DstReg, Shr);
9342 }
9343
9344 MI.eraseFromParent();
9345 return Legalized;
9346 }
9347
9348 return UnableToLegalize;
9349}
9350
9352 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9353 uint64_t Offset = MI.getOperand(3).getImm();
9354
9355 LLT DstTy = MRI.getType(Src);
9356 LLT InsertTy = MRI.getType(InsertSrc);
9357
9358 // Insert sub-vector or one element
9359 if (DstTy.isVector() && !InsertTy.isPointer()) {
9360 LLT EltTy = DstTy.getElementType();
9361 unsigned EltSize = EltTy.getSizeInBits();
9362 unsigned InsertSize = InsertTy.getSizeInBits();
9363
9364 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9365 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9366 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9368 unsigned Idx = 0;
9369 // Elements from Src before insert start Offset
9370 for (; Idx < Offset / EltSize; ++Idx) {
9371 DstElts.push_back(UnmergeSrc.getReg(Idx));
9372 }
9373
9374 // Replace elements in Src with elements from InsertSrc
9375 if (InsertTy.getSizeInBits() > EltSize) {
9376 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9377 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9378 ++Idx, ++i) {
9379 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9380 }
9381 } else {
9382 DstElts.push_back(InsertSrc);
9383 ++Idx;
9384 }
9385
9386 // Remaining elements from Src after insert
9387 for (; Idx < DstTy.getNumElements(); ++Idx) {
9388 DstElts.push_back(UnmergeSrc.getReg(Idx));
9389 }
9390
9391 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9392 MI.eraseFromParent();
9393 return Legalized;
9394 }
9395 }
9396
9397 if (InsertTy.isVector() ||
9398 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9399 return UnableToLegalize;
9400
9401 const DataLayout &DL = MIRBuilder.getDataLayout();
9402 if ((DstTy.isPointer() &&
9403 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9404 (InsertTy.isPointer() &&
9405 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9406 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9407 return UnableToLegalize;
9408 }
9409
9410 LLT IntDstTy = DstTy;
9411
9412 if (!DstTy.isScalar()) {
9413 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9414 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9415 }
9416
9417 if (!InsertTy.isScalar()) {
9418 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9419 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9420 }
9421
9422 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9423 if (Offset != 0) {
9424 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9425 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9426 }
9427
9429 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9430
9431 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9432 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9433 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9434
9435 MIRBuilder.buildCast(Dst, Or);
9436 MI.eraseFromParent();
9437 return Legalized;
9438}
9439
9442 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9443 MI.getFirst4RegLLTs();
9444 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9445
9446 LLT Ty = Dst0Ty;
9447 LLT BoolTy = Dst1Ty;
9448
9449 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9450
9451 if (IsAdd)
9452 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9453 else
9454 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9455
9456 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9457
9458 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9459
9460 // For an addition, the result should be less than one of the operands (LHS)
9461 // if and only if the other operand (RHS) is negative, otherwise there will
9462 // be overflow.
9463 // For a subtraction, the result should be less than one of the operands
9464 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9465 // otherwise there will be overflow.
9466 auto ResultLowerThanLHS =
9467 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9468 auto ConditionRHS = MIRBuilder.buildICmp(
9469 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9470
9471 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9472
9473 MIRBuilder.buildCopy(Dst0, NewDst0);
9474 MI.eraseFromParent();
9475
9476 return Legalized;
9477}
9478
9480 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9481 const LLT Ty = MRI.getType(Res);
9482
9483 // sum = LHS + RHS + zext(CarryIn)
9484 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9485 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9486 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9487 MIRBuilder.buildCopy(Res, Sum);
9488
9489 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9490 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9491 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9492 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9493
9494 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9495 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9496
9497 MI.eraseFromParent();
9498 return Legalized;
9499}
9500
9502 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9503 const LLT Ty = MRI.getType(Res);
9504
9505 // Diff = LHS - (RHS + zext(CarryIn))
9506 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9507 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9508 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9509 MIRBuilder.buildCopy(Res, Diff);
9510
9511 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9512 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9513 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9514 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9515 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9516 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9517
9518 MI.eraseFromParent();
9519 return Legalized;
9520}
9521
9524 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9525 LLT Ty = MRI.getType(Res);
9526 bool IsSigned;
9527 bool IsAdd;
9528 unsigned BaseOp;
9529 switch (MI.getOpcode()) {
9530 default:
9531 llvm_unreachable("unexpected addsat/subsat opcode");
9532 case TargetOpcode::G_UADDSAT:
9533 IsSigned = false;
9534 IsAdd = true;
9535 BaseOp = TargetOpcode::G_ADD;
9536 break;
9537 case TargetOpcode::G_SADDSAT:
9538 IsSigned = true;
9539 IsAdd = true;
9540 BaseOp = TargetOpcode::G_ADD;
9541 break;
9542 case TargetOpcode::G_USUBSAT:
9543 IsSigned = false;
9544 IsAdd = false;
9545 BaseOp = TargetOpcode::G_SUB;
9546 break;
9547 case TargetOpcode::G_SSUBSAT:
9548 IsSigned = true;
9549 IsAdd = false;
9550 BaseOp = TargetOpcode::G_SUB;
9551 break;
9552 }
9553
9554 if (IsSigned) {
9555 // sadd.sat(a, b) ->
9556 // hi = 0x7fffffff - smax(a, 0)
9557 // lo = 0x80000000 - smin(a, 0)
9558 // a + smin(smax(lo, b), hi)
9559 // ssub.sat(a, b) ->
9560 // lo = smax(a, -1) - 0x7fffffff
9561 // hi = smin(a, -1) - 0x80000000
9562 // a - smin(smax(lo, b), hi)
9563 // TODO: AMDGPU can use a "median of 3" instruction here:
9564 // a +/- med3(lo, b, hi)
9565 uint64_t NumBits = Ty.getScalarSizeInBits();
9566 auto MaxVal =
9567 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9568 auto MinVal =
9569 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9571 if (IsAdd) {
9572 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9573 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9574 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9575 } else {
9576 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9577 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9578 MaxVal);
9579 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9580 MinVal);
9581 }
9582 auto RHSClamped =
9583 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9584 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9585 } else {
9586 // uadd.sat(a, b) -> a + umin(~a, b)
9587 // usub.sat(a, b) -> a - umin(a, b)
9588 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9589 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9590 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9591 }
9592
9593 MI.eraseFromParent();
9594 return Legalized;
9595}
9596
9599 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9600 LLT Ty = MRI.getType(Res);
9601 LLT BoolTy = Ty.changeElementSize(1);
9602 bool IsSigned;
9603 bool IsAdd;
9604 unsigned OverflowOp;
9605 switch (MI.getOpcode()) {
9606 default:
9607 llvm_unreachable("unexpected addsat/subsat opcode");
9608 case TargetOpcode::G_UADDSAT:
9609 IsSigned = false;
9610 IsAdd = true;
9611 OverflowOp = TargetOpcode::G_UADDO;
9612 break;
9613 case TargetOpcode::G_SADDSAT:
9614 IsSigned = true;
9615 IsAdd = true;
9616 OverflowOp = TargetOpcode::G_SADDO;
9617 break;
9618 case TargetOpcode::G_USUBSAT:
9619 IsSigned = false;
9620 IsAdd = false;
9621 OverflowOp = TargetOpcode::G_USUBO;
9622 break;
9623 case TargetOpcode::G_SSUBSAT:
9624 IsSigned = true;
9625 IsAdd = false;
9626 OverflowOp = TargetOpcode::G_SSUBO;
9627 break;
9628 }
9629
9630 auto OverflowRes =
9631 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9632 Register Tmp = OverflowRes.getReg(0);
9633 Register Ov = OverflowRes.getReg(1);
9634 MachineInstrBuilder Clamp;
9635 if (IsSigned) {
9636 // sadd.sat(a, b) ->
9637 // {tmp, ov} = saddo(a, b)
9638 // ov ? (tmp >>s 31) + 0x80000000 : r
9639 // ssub.sat(a, b) ->
9640 // {tmp, ov} = ssubo(a, b)
9641 // ov ? (tmp >>s 31) + 0x80000000 : r
9642 uint64_t NumBits = Ty.getScalarSizeInBits();
9643 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9644 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9645 auto MinVal =
9646 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9647 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9648 } else {
9649 // uadd.sat(a, b) ->
9650 // {tmp, ov} = uaddo(a, b)
9651 // ov ? 0xffffffff : tmp
9652 // usub.sat(a, b) ->
9653 // {tmp, ov} = usubo(a, b)
9654 // ov ? 0 : tmp
9655 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9656 }
9657 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9658
9659 MI.eraseFromParent();
9660 return Legalized;
9661}
9662
9665 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9666 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9667 "Expected shlsat opcode!");
9668 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9669 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9670 LLT Ty = MRI.getType(Res);
9671 LLT BoolTy = Ty.changeElementSize(1);
9672
9673 unsigned BW = Ty.getScalarSizeInBits();
9674 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9675 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9676 : MIRBuilder.buildLShr(Ty, Result, RHS);
9677
9678 MachineInstrBuilder SatVal;
9679 if (IsSigned) {
9680 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9681 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9682 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9683 MIRBuilder.buildConstant(Ty, 0));
9684 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9685 } else {
9686 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9687 }
9688 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9689 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9690
9691 MI.eraseFromParent();
9692 return Legalized;
9693}
9694
9696 auto [Dst, Src] = MI.getFirst2Regs();
9697 const LLT Ty = MRI.getType(Src);
9698 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9699 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9700
9701 // Swap most and least significant byte, set remaining bytes in Res to zero.
9702 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9703 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9704 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9705 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9706
9707 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9708 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9709 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9710 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9711 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9712 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9713 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9714 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9715 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9716 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9717 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9718 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9719 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9720 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9721 }
9722 Res.getInstr()->getOperand(0).setReg(Dst);
9723
9724 MI.eraseFromParent();
9725 return Legalized;
9726}
9727
9728//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9730 MachineInstrBuilder Src, const APInt &Mask) {
9731 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9732 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9733 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9734 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9735 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9736 return B.buildOr(Dst, LHS, RHS);
9737}
9738
9741 auto [Dst, Src] = MI.getFirst2Regs();
9742 const LLT SrcTy = MRI.getType(Src);
9743 unsigned Size = SrcTy.getScalarSizeInBits();
9744 unsigned VSize = SrcTy.getSizeInBits();
9745
9746 if (Size >= 8) {
9747 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9748 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9749 {LLT::fixed_vector(VSize / 8, 8),
9750 LLT::fixed_vector(VSize / 8, 8)}}))) {
9751 // If bitreverse is legal for i8 vector of the same size, then cast
9752 // to i8 vector type.
9753 // e.g. v4s32 -> v16s8
9754 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9755 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9756 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9757 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9758 MIRBuilder.buildBitcast(Dst, RBIT);
9759 } else {
9760 MachineInstrBuilder BSWAP =
9761 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9762
9763 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9764 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9765 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9766 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9767 APInt::getSplat(Size, APInt(8, 0xF0)));
9768
9769 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9770 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9771 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9772 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9773 APInt::getSplat(Size, APInt(8, 0xCC)));
9774
9775 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9776 // 6|7
9777 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9778 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9779 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9780 }
9781 } else {
9782 // Expand bitreverse for types smaller than 8 bits.
9784 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9786 if (I < J) {
9787 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9788 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9789 } else {
9790 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9791 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9792 }
9793
9794 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9795 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9796 if (I == 0)
9797 Tmp = Tmp2;
9798 else
9799 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9800 }
9801 MIRBuilder.buildCopy(Dst, Tmp);
9802 }
9803
9804 MI.eraseFromParent();
9805 return Legalized;
9806}
9807
9810 MachineFunction &MF = MIRBuilder.getMF();
9811
9812 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9813 int NameOpIdx = IsRead ? 1 : 0;
9814 int ValRegIndex = IsRead ? 0 : 1;
9815
9816 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9817 const LLT Ty = MRI.getType(ValReg);
9818 const MDString *RegStr = cast<MDString>(
9819 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9820
9821 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9822 if (!PhysReg) {
9823 const Function &Fn = MF.getFunction();
9825 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9826 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9827 Fn, MI.getDebugLoc()));
9828 if (IsRead)
9829 MIRBuilder.buildUndef(ValReg);
9830
9831 MI.eraseFromParent();
9832 return Legalized;
9833 }
9834
9835 if (IsRead)
9836 MIRBuilder.buildCopy(ValReg, PhysReg);
9837 else
9838 MIRBuilder.buildCopy(PhysReg, ValReg);
9839
9840 MI.eraseFromParent();
9841 return Legalized;
9842}
9843
9846 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9847 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9848 Register Result = MI.getOperand(0).getReg();
9849 LLT OrigTy = MRI.getType(Result);
9850 auto SizeInBits = OrigTy.getScalarSizeInBits();
9851 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9852
9853 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9854 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9855 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9856 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9857
9858 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9859 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9860 MIRBuilder.buildTrunc(Result, Shifted);
9861
9862 MI.eraseFromParent();
9863 return Legalized;
9864}
9865
9868 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9869 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9870
9871 if (Mask == fcNone) {
9872 MIRBuilder.buildConstant(DstReg, 0);
9873 MI.eraseFromParent();
9874 return Legalized;
9875 }
9876 if (Mask == fcAllFlags) {
9877 MIRBuilder.buildConstant(DstReg, 1);
9878 MI.eraseFromParent();
9879 return Legalized;
9880 }
9881
9882 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9883 // version
9884
9885 unsigned BitSize = SrcTy.getScalarSizeInBits();
9886 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9887
9888 LLT IntTy = LLT::scalar(BitSize);
9889 if (SrcTy.isVector())
9890 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9891 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9892
9893 // Various masks.
9894 APInt SignBit = APInt::getSignMask(BitSize);
9895 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9896 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9897 APInt ExpMask = Inf;
9898 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9899 APInt QNaNBitMask =
9900 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9901 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9902
9903 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9904 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9905 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9906 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9907 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9908
9909 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9910 auto Sign =
9911 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9912
9913 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9914 // Clang doesn't support capture of structured bindings:
9915 LLT DstTyCopy = DstTy;
9916 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9917 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9918 };
9919
9920 // Tests that involve more than one class should be processed first.
9921 if ((Mask & fcFinite) == fcFinite) {
9922 // finite(V) ==> abs(V) u< exp_mask
9923 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9924 ExpMaskC));
9925 Mask &= ~fcFinite;
9926 } else if ((Mask & fcFinite) == fcPosFinite) {
9927 // finite(V) && V > 0 ==> V u< exp_mask
9928 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9929 ExpMaskC));
9930 Mask &= ~fcPosFinite;
9931 } else if ((Mask & fcFinite) == fcNegFinite) {
9932 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9933 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9934 ExpMaskC);
9935 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9936 appendToRes(And);
9937 Mask &= ~fcNegFinite;
9938 }
9939
9940 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9941 // fcZero | fcSubnormal => test all exponent bits are 0
9942 // TODO: Handle sign bit specific cases
9943 // TODO: Handle inverted case
9944 if (PartialCheck == (fcZero | fcSubnormal)) {
9945 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9946 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9947 ExpBits, ZeroC));
9948 Mask &= ~PartialCheck;
9949 }
9950 }
9951
9952 // Check for individual classes.
9953 if (FPClassTest PartialCheck = Mask & fcZero) {
9954 if (PartialCheck == fcPosZero)
9955 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9956 AsInt, ZeroC));
9957 else if (PartialCheck == fcZero)
9958 appendToRes(
9959 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9960 else // fcNegZero
9961 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9962 AsInt, SignBitC));
9963 }
9964
9965 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9966 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9967 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9968 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9969 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9970 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9971 auto SubnormalRes =
9972 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9973 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9974 if (PartialCheck == fcNegSubnormal)
9975 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9976 appendToRes(SubnormalRes);
9977 }
9978
9979 if (FPClassTest PartialCheck = Mask & fcInf) {
9980 if (PartialCheck == fcPosInf)
9981 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9982 AsInt, InfC));
9983 else if (PartialCheck == fcInf)
9984 appendToRes(
9985 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9986 else { // fcNegInf
9987 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9988 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9989 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9990 AsInt, NegInfC));
9991 }
9992 }
9993
9994 if (FPClassTest PartialCheck = Mask & fcNan) {
9995 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9996 if (PartialCheck == fcNan) {
9997 // isnan(V) ==> abs(V) u> int(inf)
9998 appendToRes(
9999 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10000 } else if (PartialCheck == fcQNan) {
10001 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10002 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10003 InfWithQnanBitC));
10004 } else { // fcSNan
10005 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10006 // abs(V) u< (unsigned(Inf) | quiet_bit)
10007 auto IsNan =
10008 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10009 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10010 Abs, InfWithQnanBitC);
10011 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10012 }
10013 }
10014
10015 if (FPClassTest PartialCheck = Mask & fcNormal) {
10016 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10017 // (max_exp-1))
10018 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10019 auto ExpMinusOne = MIRBuilder.buildSub(
10020 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10021 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10022 auto NormalRes =
10023 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10024 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10025 if (PartialCheck == fcNegNormal)
10026 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10027 else if (PartialCheck == fcPosNormal) {
10028 auto PosSign = MIRBuilder.buildXor(
10029 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10030 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10031 }
10032 appendToRes(NormalRes);
10033 }
10034
10035 MIRBuilder.buildCopy(DstReg, Res);
10036 MI.eraseFromParent();
10037 return Legalized;
10038}
10039
10041 // Implement G_SELECT in terms of XOR, AND, OR.
10042 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10043 MI.getFirst4RegLLTs();
10044
10045 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10046 if (IsEltPtr) {
10047 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10048 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10049 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10050 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10051 DstTy = NewTy;
10052 }
10053
10054 if (MaskTy.isScalar()) {
10055 // Turn the scalar condition into a vector condition mask if needed.
10056
10057 Register MaskElt = MaskReg;
10058
10059 // The condition was potentially zero extended before, but we want a sign
10060 // extended boolean.
10061 if (MaskTy != LLT::scalar(1))
10062 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10063
10064 // Continue the sign extension (or truncate) to match the data type.
10065 MaskElt =
10066 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10067
10068 if (DstTy.isVector()) {
10069 // Generate a vector splat idiom.
10070 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10071 MaskReg = ShufSplat.getReg(0);
10072 } else {
10073 MaskReg = MaskElt;
10074 }
10075 MaskTy = DstTy;
10076 } else if (!DstTy.isVector()) {
10077 // Cannot handle the case that mask is a vector and dst is a scalar.
10078 return UnableToLegalize;
10079 }
10080
10081 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10082 return UnableToLegalize;
10083 }
10084
10085 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10086 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10087 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10088 if (IsEltPtr) {
10089 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10090 MIRBuilder.buildIntToPtr(DstReg, Or);
10091 } else {
10092 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10093 }
10094 MI.eraseFromParent();
10095 return Legalized;
10096}
10097
10099 // Split DIVREM into individual instructions.
10100 unsigned Opcode = MI.getOpcode();
10101
10102 MIRBuilder.buildInstr(
10103 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10104 : TargetOpcode::G_UDIV,
10105 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10106 MIRBuilder.buildInstr(
10107 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10108 : TargetOpcode::G_UREM,
10109 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10110 MI.eraseFromParent();
10111 return Legalized;
10112}
10113
10116 // Expand %res = G_ABS %a into:
10117 // %v1 = G_ASHR %a, scalar_size-1
10118 // %v2 = G_ADD %a, %v1
10119 // %res = G_XOR %v2, %v1
10120 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10121 Register OpReg = MI.getOperand(1).getReg();
10122 auto ShiftAmt =
10123 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10124 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10125 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10126 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10127 MI.eraseFromParent();
10128 return Legalized;
10129}
10130
10133 // Expand %res = G_ABS %a into:
10134 // %v1 = G_CONSTANT 0
10135 // %v2 = G_SUB %v1, %a
10136 // %res = G_SMAX %a, %v2
10137 Register SrcReg = MI.getOperand(1).getReg();
10138 LLT Ty = MRI.getType(SrcReg);
10139 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10140 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10141 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10142 MI.eraseFromParent();
10143 return Legalized;
10144}
10145
10148 Register SrcReg = MI.getOperand(1).getReg();
10149 Register DestReg = MI.getOperand(0).getReg();
10150 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10151 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10152 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10153 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10154 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10155 MI.eraseFromParent();
10156 return Legalized;
10157}
10158
10161 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10162 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10163 "Expected G_ABDS or G_ABDU instruction");
10164
10165 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10166 LLT Ty = MRI.getType(LHS);
10167
10168 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10169 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10170 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10171 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10172 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10175 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10176 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10177
10178 MI.eraseFromParent();
10179 return Legalized;
10180}
10181
10184 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10185 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10186 "Expected G_ABDS or G_ABDU instruction");
10187
10188 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10189 LLT Ty = MRI.getType(LHS);
10190
10191 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10192 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10193 Register MaxReg, MinReg;
10194 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10195 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10196 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10197 } else {
10198 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10199 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10200 }
10201 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10202
10203 MI.eraseFromParent();
10204 return Legalized;
10205}
10206
10208 Register SrcReg = MI.getOperand(1).getReg();
10209 Register DstReg = MI.getOperand(0).getReg();
10210
10211 LLT Ty = MRI.getType(DstReg);
10212
10213 // Reset sign bit
10214 MIRBuilder.buildAnd(
10215 DstReg, SrcReg,
10216 MIRBuilder.buildConstant(
10217 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10218
10219 MI.eraseFromParent();
10220 return Legalized;
10221}
10222
10225 Register SrcReg = MI.getOperand(1).getReg();
10226 LLT SrcTy = MRI.getType(SrcReg);
10227 LLT DstTy = MRI.getType(SrcReg);
10228
10229 // The source could be a scalar if the IR type was <1 x sN>.
10230 if (SrcTy.isScalar()) {
10231 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10232 return UnableToLegalize; // FIXME: handle extension.
10233 // This can be just a plain copy.
10234 Observer.changingInstr(MI);
10235 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10236 Observer.changedInstr(MI);
10237 return Legalized;
10238 }
10239 return UnableToLegalize;
10240}
10241
10243 MachineFunction &MF = *MI.getMF();
10244 const DataLayout &DL = MIRBuilder.getDataLayout();
10245 LLVMContext &Ctx = MF.getFunction().getContext();
10246 Register ListPtr = MI.getOperand(1).getReg();
10247 LLT PtrTy = MRI.getType(ListPtr);
10248
10249 // LstPtr is a pointer to the head of the list. Get the address
10250 // of the head of the list.
10251 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10252 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10253 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10254 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10255
10256 const Align A(MI.getOperand(2).getImm());
10257 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10258 if (A > TLI.getMinStackArgumentAlignment()) {
10259 Register AlignAmt =
10260 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10261 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10262 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10263 VAList = AndDst.getReg(0);
10264 }
10265
10266 // Increment the pointer, VAList, to the next vaarg
10267 // The list should be bumped by the size of element in the current head of
10268 // list.
10269 Register Dst = MI.getOperand(0).getReg();
10270 LLT LLTTy = MRI.getType(Dst);
10271 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10272 auto IncAmt =
10273 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10274 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10275
10276 // Store the increment VAList to the legalized pointer
10278 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10279 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10280 // Load the actual argument out of the pointer VAList
10281 Align EltAlignment = DL.getABITypeAlign(Ty);
10282 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10283 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10284 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10285
10286 MI.eraseFromParent();
10287 return Legalized;
10288}
10289
10291 // On Darwin, -Os means optimize for size without hurting performance, so
10292 // only really optimize for size when -Oz (MinSize) is used.
10294 return MF.getFunction().hasMinSize();
10295 return MF.getFunction().hasOptSize();
10296}
10297
10298// Returns a list of types to use for memory op lowering in MemOps. A partial
10299// port of findOptimalMemOpLowering in TargetLowering.
10300static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10301 unsigned Limit, const MemOp &Op,
10302 unsigned DstAS, unsigned SrcAS,
10303 const AttributeList &FuncAttributes,
10304 const TargetLowering &TLI) {
10305 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10306 return false;
10307
10308 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10309
10310 if (Ty == LLT()) {
10311 // Use the largest scalar type whose alignment constraints are satisfied.
10312 // We only need to check DstAlign here as SrcAlign is always greater or
10313 // equal to DstAlign (or zero).
10314 Ty = LLT::scalar(64);
10315 if (Op.isFixedDstAlign())
10316 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10317 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10318 Ty = LLT::scalar(Ty.getSizeInBytes());
10319 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10320 // FIXME: check for the largest legal type we can load/store to.
10321 }
10322
10323 unsigned NumMemOps = 0;
10324 uint64_t Size = Op.size();
10325 while (Size) {
10326 unsigned TySize = Ty.getSizeInBytes();
10327 while (TySize > Size) {
10328 // For now, only use non-vector load / store's for the left-over pieces.
10329 LLT NewTy = Ty;
10330 // FIXME: check for mem op safety and legality of the types. Not all of
10331 // SDAGisms map cleanly to GISel concepts.
10332 if (NewTy.isVector())
10333 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10334 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10335 unsigned NewTySize = NewTy.getSizeInBytes();
10336 assert(NewTySize > 0 && "Could not find appropriate type");
10337
10338 // If the new LLT cannot cover all of the remaining bits, then consider
10339 // issuing a (or a pair of) unaligned and overlapping load / store.
10340 unsigned Fast;
10341 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10342 MVT VT = getMVTForLLT(Ty);
10343 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10345 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10347 Fast)
10348 TySize = Size;
10349 else {
10350 Ty = NewTy;
10351 TySize = NewTySize;
10352 }
10353 }
10354
10355 if (++NumMemOps > Limit)
10356 return false;
10357
10358 MemOps.push_back(Ty);
10359 Size -= TySize;
10360 }
10361
10362 return true;
10363}
10364
10365// Get a vectorized representation of the memset value operand, GISel edition.
10367 MachineRegisterInfo &MRI = *MIB.getMRI();
10368 unsigned NumBits = Ty.getScalarSizeInBits();
10369 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10370 if (!Ty.isVector() && ValVRegAndVal) {
10371 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10372 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10373 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10374 }
10375
10376 // Extend the byte value to the larger type, and then multiply by a magic
10377 // value 0x010101... in order to replicate it across every byte.
10378 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10379 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10380 return MIB.buildConstant(Ty, 0).getReg(0);
10381 }
10382
10383 LLT ExtType = Ty.getScalarType();
10384 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10385 if (NumBits > 8) {
10386 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10387 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10388 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10389 }
10390
10391 // For vector types create a G_BUILD_VECTOR.
10392 if (Ty.isVector())
10393 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10394
10395 return Val;
10396}
10397
10399LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10400 uint64_t KnownLen, Align Alignment,
10401 bool IsVolatile) {
10402 auto &MF = *MI.getParent()->getParent();
10403 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10404 auto &DL = MF.getDataLayout();
10405 LLVMContext &C = MF.getFunction().getContext();
10406
10407 assert(KnownLen != 0 && "Have a zero length memset length!");
10408
10409 bool DstAlignCanChange = false;
10410 MachineFrameInfo &MFI = MF.getFrameInfo();
10411 bool OptSize = shouldLowerMemFuncForSize(MF);
10412
10413 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10414 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10415 DstAlignCanChange = true;
10416
10417 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10418 std::vector<LLT> MemOps;
10419
10420 const auto &DstMMO = **MI.memoperands_begin();
10421 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10422
10423 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10424 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10425
10426 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10427 MemOp::Set(KnownLen, DstAlignCanChange,
10428 Alignment,
10429 /*IsZeroMemset=*/IsZeroVal,
10430 /*IsVolatile=*/IsVolatile),
10431 DstPtrInfo.getAddrSpace(), ~0u,
10432 MF.getFunction().getAttributes(), TLI))
10433 return UnableToLegalize;
10434
10435 if (DstAlignCanChange) {
10436 // Get an estimate of the type from the LLT.
10437 Type *IRTy = getTypeForLLT(MemOps[0], C);
10438 Align NewAlign = DL.getABITypeAlign(IRTy);
10439 if (NewAlign > Alignment) {
10440 Alignment = NewAlign;
10441 unsigned FI = FIDef->getOperand(1).getIndex();
10442 // Give the stack frame object a larger alignment if needed.
10443 if (MFI.getObjectAlign(FI) < Alignment)
10444 MFI.setObjectAlignment(FI, Alignment);
10445 }
10446 }
10447
10448 MachineIRBuilder MIB(MI);
10449 // Find the largest store and generate the bit pattern for it.
10450 LLT LargestTy = MemOps[0];
10451 for (unsigned i = 1; i < MemOps.size(); i++)
10452 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10453 LargestTy = MemOps[i];
10454
10455 // The memset stored value is always defined as an s8, so in order to make it
10456 // work with larger store types we need to repeat the bit pattern across the
10457 // wider type.
10458 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10459
10460 if (!MemSetValue)
10461 return UnableToLegalize;
10462
10463 // Generate the stores. For each store type in the list, we generate the
10464 // matching store of that type to the destination address.
10465 LLT PtrTy = MRI.getType(Dst);
10466 unsigned DstOff = 0;
10467 unsigned Size = KnownLen;
10468 for (unsigned I = 0; I < MemOps.size(); I++) {
10469 LLT Ty = MemOps[I];
10470 unsigned TySize = Ty.getSizeInBytes();
10471 if (TySize > Size) {
10472 // Issuing an unaligned load / store pair that overlaps with the previous
10473 // pair. Adjust the offset accordingly.
10474 assert(I == MemOps.size() - 1 && I != 0);
10475 DstOff -= TySize - Size;
10476 }
10477
10478 // If this store is smaller than the largest store see whether we can get
10479 // the smaller value for free with a truncate.
10480 Register Value = MemSetValue;
10481 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10482 MVT VT = getMVTForLLT(Ty);
10483 MVT LargestVT = getMVTForLLT(LargestTy);
10484 if (!LargestTy.isVector() && !Ty.isVector() &&
10485 TLI.isTruncateFree(LargestVT, VT))
10486 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10487 else
10488 Value = getMemsetValue(Val, Ty, MIB);
10489 if (!Value)
10490 return UnableToLegalize;
10491 }
10492
10493 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10494
10495 Register Ptr = Dst;
10496 if (DstOff != 0) {
10497 auto Offset =
10498 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10499 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10500 }
10501
10502 MIB.buildStore(Value, Ptr, *StoreMMO);
10503 DstOff += Ty.getSizeInBytes();
10504 Size -= TySize;
10505 }
10506
10507 MI.eraseFromParent();
10508 return Legalized;
10509}
10510
10512LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10513 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10514
10515 auto [Dst, Src, Len] = MI.getFirst3Regs();
10516
10517 const auto *MMOIt = MI.memoperands_begin();
10518 const MachineMemOperand *MemOp = *MMOIt;
10519 bool IsVolatile = MemOp->isVolatile();
10520
10521 // See if this is a constant length copy
10522 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10523 // FIXME: support dynamically sized G_MEMCPY_INLINE
10524 assert(LenVRegAndVal &&
10525 "inline memcpy with dynamic size is not yet supported");
10526 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10527 if (KnownLen == 0) {
10528 MI.eraseFromParent();
10529 return Legalized;
10530 }
10531
10532 const auto &DstMMO = **MI.memoperands_begin();
10533 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10534 Align DstAlign = DstMMO.getBaseAlign();
10535 Align SrcAlign = SrcMMO.getBaseAlign();
10536
10537 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10538 IsVolatile);
10539}
10540
10542LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10543 uint64_t KnownLen, Align DstAlign,
10544 Align SrcAlign, bool IsVolatile) {
10545 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10546 return lowerMemcpy(MI, Dst, Src, KnownLen,
10547 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10548 IsVolatile);
10549}
10550
10552LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10553 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10554 Align SrcAlign, bool IsVolatile) {
10555 auto &MF = *MI.getParent()->getParent();
10556 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10557 auto &DL = MF.getDataLayout();
10559
10560 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10561
10562 bool DstAlignCanChange = false;
10563 MachineFrameInfo &MFI = MF.getFrameInfo();
10564 Align Alignment = std::min(DstAlign, SrcAlign);
10565
10566 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10567 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10568 DstAlignCanChange = true;
10569
10570 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10571 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10572 // if the memcpy is in a tail call position.
10573
10574 std::vector<LLT> MemOps;
10575
10576 const auto &DstMMO = **MI.memoperands_begin();
10577 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10578 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10579 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10580
10582 MemOps, Limit,
10583 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10584 IsVolatile),
10585 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10586 MF.getFunction().getAttributes(), TLI))
10587 return UnableToLegalize;
10588
10589 if (DstAlignCanChange) {
10590 // Get an estimate of the type from the LLT.
10591 Type *IRTy = getTypeForLLT(MemOps[0], C);
10592 Align NewAlign = DL.getABITypeAlign(IRTy);
10593
10594 // Don't promote to an alignment that would require dynamic stack
10595 // realignment.
10597 if (!TRI->hasStackRealignment(MF))
10598 if (MaybeAlign StackAlign = DL.getStackAlignment())
10599 NewAlign = std::min(NewAlign, *StackAlign);
10600
10601 if (NewAlign > Alignment) {
10602 Alignment = NewAlign;
10603 unsigned FI = FIDef->getOperand(1).getIndex();
10604 // Give the stack frame object a larger alignment if needed.
10605 if (MFI.getObjectAlign(FI) < Alignment)
10606 MFI.setObjectAlignment(FI, Alignment);
10607 }
10608 }
10609
10610 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10611
10612 MachineIRBuilder MIB(MI);
10613 // Now we need to emit a pair of load and stores for each of the types we've
10614 // collected. I.e. for each type, generate a load from the source pointer of
10615 // that type width, and then generate a corresponding store to the dest buffer
10616 // of that value loaded. This can result in a sequence of loads and stores
10617 // mixed types, depending on what the target specifies as good types to use.
10618 unsigned CurrOffset = 0;
10619 unsigned Size = KnownLen;
10620 for (auto CopyTy : MemOps) {
10621 // Issuing an unaligned load / store pair that overlaps with the previous
10622 // pair. Adjust the offset accordingly.
10623 if (CopyTy.getSizeInBytes() > Size)
10624 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10625
10626 // Construct MMOs for the accesses.
10627 auto *LoadMMO =
10628 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10629 auto *StoreMMO =
10630 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10631
10632 // Create the load.
10633 Register LoadPtr = Src;
10635 if (CurrOffset != 0) {
10636 LLT SrcTy = MRI.getType(Src);
10637 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10638 .getReg(0);
10639 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10640 }
10641 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10642
10643 // Create the store.
10644 Register StorePtr = Dst;
10645 if (CurrOffset != 0) {
10646 LLT DstTy = MRI.getType(Dst);
10647 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10648 }
10649 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10650 CurrOffset += CopyTy.getSizeInBytes();
10651 Size -= CopyTy.getSizeInBytes();
10652 }
10653
10654 MI.eraseFromParent();
10655 return Legalized;
10656}
10657
10659LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10660 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10661 bool IsVolatile) {
10662 auto &MF = *MI.getParent()->getParent();
10663 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10664 auto &DL = MF.getDataLayout();
10665 LLVMContext &C = MF.getFunction().getContext();
10666
10667 assert(KnownLen != 0 && "Have a zero length memmove length!");
10668
10669 bool DstAlignCanChange = false;
10670 MachineFrameInfo &MFI = MF.getFrameInfo();
10671 bool OptSize = shouldLowerMemFuncForSize(MF);
10672 Align Alignment = std::min(DstAlign, SrcAlign);
10673
10674 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10675 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10676 DstAlignCanChange = true;
10677
10678 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10679 std::vector<LLT> MemOps;
10680
10681 const auto &DstMMO = **MI.memoperands_begin();
10682 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10683 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10684 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10685
10686 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10687 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10688 // same thing here.
10690 MemOps, Limit,
10691 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10692 /*IsVolatile*/ true),
10693 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10694 MF.getFunction().getAttributes(), TLI))
10695 return UnableToLegalize;
10696
10697 if (DstAlignCanChange) {
10698 // Get an estimate of the type from the LLT.
10699 Type *IRTy = getTypeForLLT(MemOps[0], C);
10700 Align NewAlign = DL.getABITypeAlign(IRTy);
10701
10702 // Don't promote to an alignment that would require dynamic stack
10703 // realignment.
10704 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10705 if (!TRI->hasStackRealignment(MF))
10706 if (MaybeAlign StackAlign = DL.getStackAlignment())
10707 NewAlign = std::min(NewAlign, *StackAlign);
10708
10709 if (NewAlign > Alignment) {
10710 Alignment = NewAlign;
10711 unsigned FI = FIDef->getOperand(1).getIndex();
10712 // Give the stack frame object a larger alignment if needed.
10713 if (MFI.getObjectAlign(FI) < Alignment)
10714 MFI.setObjectAlignment(FI, Alignment);
10715 }
10716 }
10717
10718 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10719
10720 MachineIRBuilder MIB(MI);
10721 // Memmove requires that we perform the loads first before issuing the stores.
10722 // Apart from that, this loop is pretty much doing the same thing as the
10723 // memcpy codegen function.
10724 unsigned CurrOffset = 0;
10725 SmallVector<Register, 16> LoadVals;
10726 for (auto CopyTy : MemOps) {
10727 // Construct MMO for the load.
10728 auto *LoadMMO =
10729 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10730
10731 // Create the load.
10732 Register LoadPtr = Src;
10733 if (CurrOffset != 0) {
10734 LLT SrcTy = MRI.getType(Src);
10735 auto Offset =
10736 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10737 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10738 }
10739 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10740 CurrOffset += CopyTy.getSizeInBytes();
10741 }
10742
10743 CurrOffset = 0;
10744 for (unsigned I = 0; I < MemOps.size(); ++I) {
10745 LLT CopyTy = MemOps[I];
10746 // Now store the values loaded.
10747 auto *StoreMMO =
10748 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10749
10750 Register StorePtr = Dst;
10751 if (CurrOffset != 0) {
10752 LLT DstTy = MRI.getType(Dst);
10753 auto Offset =
10754 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10755 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10756 }
10757 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10758 CurrOffset += CopyTy.getSizeInBytes();
10759 }
10760 MI.eraseFromParent();
10761 return Legalized;
10762}
10763
10766 const unsigned Opc = MI.getOpcode();
10767 // This combine is fairly complex so it's not written with a separate
10768 // matcher function.
10769 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10770 Opc == TargetOpcode::G_MEMSET) &&
10771 "Expected memcpy like instruction");
10772
10773 auto MMOIt = MI.memoperands_begin();
10774 const MachineMemOperand *MemOp = *MMOIt;
10775
10776 Align DstAlign = MemOp->getBaseAlign();
10777 Align SrcAlign;
10778 auto [Dst, Src, Len] = MI.getFirst3Regs();
10779
10780 if (Opc != TargetOpcode::G_MEMSET) {
10781 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10782 MemOp = *(++MMOIt);
10783 SrcAlign = MemOp->getBaseAlign();
10784 }
10785
10786 // See if this is a constant length copy
10787 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10788 if (!LenVRegAndVal)
10789 return UnableToLegalize;
10790 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10791
10792 if (KnownLen == 0) {
10793 MI.eraseFromParent();
10794 return Legalized;
10795 }
10796
10797 if (MaxLen && KnownLen > MaxLen)
10798 return UnableToLegalize;
10799
10800 bool IsVolatile = MemOp->isVolatile();
10801 if (Opc == TargetOpcode::G_MEMCPY) {
10802 auto &MF = *MI.getParent()->getParent();
10803 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10804 bool OptSize = shouldLowerMemFuncForSize(MF);
10805 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10806 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10807 IsVolatile);
10808 }
10809 if (Opc == TargetOpcode::G_MEMMOVE)
10810 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10811 if (Opc == TargetOpcode::G_MEMSET)
10812 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10813 return UnableToLegalize;
10814}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1469
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:627
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2034
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1621
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1188
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1276
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)