LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FMODF:
475 RTLIBCASE(MODF_F);
476 case TargetOpcode::G_FLOG10:
477 RTLIBCASE(LOG10_F);
478 case TargetOpcode::G_FLOG:
479 RTLIBCASE(LOG_F);
480 case TargetOpcode::G_FLOG2:
481 RTLIBCASE(LOG2_F);
482 case TargetOpcode::G_FLDEXP:
483 RTLIBCASE(LDEXP_F);
484 case TargetOpcode::G_FCEIL:
485 RTLIBCASE(CEIL_F);
486 case TargetOpcode::G_FFLOOR:
487 RTLIBCASE(FLOOR_F);
488 case TargetOpcode::G_FMINNUM:
489 RTLIBCASE(FMIN_F);
490 case TargetOpcode::G_FMAXNUM:
491 RTLIBCASE(FMAX_F);
492 case TargetOpcode::G_FMINIMUMNUM:
493 RTLIBCASE(FMINIMUM_NUM_F);
494 case TargetOpcode::G_FMAXIMUMNUM:
495 RTLIBCASE(FMAXIMUM_NUM_F);
496 case TargetOpcode::G_FSQRT:
497 RTLIBCASE(SQRT_F);
498 case TargetOpcode::G_FRINT:
499 RTLIBCASE(RINT_F);
500 case TargetOpcode::G_FNEARBYINT:
501 RTLIBCASE(NEARBYINT_F);
502 case TargetOpcode::G_INTRINSIC_TRUNC:
503 RTLIBCASE(TRUNC_F);
504 case TargetOpcode::G_INTRINSIC_ROUND:
505 RTLIBCASE(ROUND_F);
506 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
507 RTLIBCASE(ROUNDEVEN_F);
508 case TargetOpcode::G_INTRINSIC_LRINT:
509 RTLIBCASE(LRINT_F);
510 case TargetOpcode::G_INTRINSIC_LLRINT:
511 RTLIBCASE(LLRINT_F);
512 }
513 llvm_unreachable("Unknown libcall function");
514#undef RTLIBCASE_INT
515#undef RTLIBCASE
516}
517
518/// True if an instruction is in tail position in its caller. Intended for
519/// legalizing libcalls as tail calls when possible.
522 const TargetInstrInfo &TII,
524 MachineBasicBlock &MBB = *MI.getParent();
525 const Function &F = MBB.getParent()->getFunction();
526
527 // Conservatively require the attributes of the call to match those of
528 // the return. Ignore NoAlias and NonNull because they don't affect the
529 // call sequence.
530 AttributeList CallerAttrs = F.getAttributes();
531 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
532 .removeAttribute(Attribute::NoAlias)
533 .removeAttribute(Attribute::NonNull)
534 .hasAttributes())
535 return false;
536
537 // It's not safe to eliminate the sign / zero extension of the return value.
538 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
539 CallerAttrs.hasRetAttr(Attribute::SExt))
540 return false;
541
542 // Only tail call if the following instruction is a standard return or if we
543 // have a `thisreturn` callee, and a sequence like:
544 //
545 // G_MEMCPY %0, %1, %2
546 // $x0 = COPY %0
547 // RET_ReallyLR implicit $x0
548 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
549 if (Next != MBB.instr_end() && Next->isCopy()) {
550 if (MI.getOpcode() == TargetOpcode::G_BZERO)
551 return false;
552
553 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
554 // mempy/etc routines return the same parameter. For other it will be the
555 // returned value.
556 Register VReg = MI.getOperand(0).getReg();
557 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
558 return false;
559
560 Register PReg = Next->getOperand(0).getReg();
561 if (!PReg.isPhysical())
562 return false;
563
564 auto Ret = next_nodbg(Next, MBB.instr_end());
565 if (Ret == MBB.instr_end() || !Ret->isReturn())
566 return false;
567
568 if (Ret->getNumImplicitOperands() != 1)
569 return false;
570
571 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
572 return false;
573
574 // Skip over the COPY that we just validated.
575 Next = Ret;
576 }
577
578 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
579 return false;
580
581 return true;
582}
583
585llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
586 const CallLowering::ArgInfo &Result,
588 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
589 MachineInstr *MI) {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
633 const CallLowering::ArgInfo &Result,
635 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
636 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
637 const char *Name = TLI.getLibcallName(Libcall);
638 if (!Name)
640 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
641 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
642}
643
644// Useful for libcalls where all operands have the same type.
647 Type *OpType, LostDebugLocObserver &LocObserver) {
648 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
649
650 // FIXME: What does the original arg index mean here?
652 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
653 Args.push_back({MO.getReg(), OpType, 0});
654 return createLibcall(MIRBuilder, Libcall,
655 {MI.getOperand(0).getReg(), OpType, 0}, Args,
656 LocObserver, &MI);
657}
658
659LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
660 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
661 LostDebugLocObserver &LocObserver) {
662 MachineFunction &MF = *MI.getMF();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664
665 Register DstSin = MI.getOperand(0).getReg();
666 Register DstCos = MI.getOperand(1).getReg();
667 Register Src = MI.getOperand(2).getReg();
668 LLT DstTy = MRI.getType(DstSin);
669
670 int MemSize = DstTy.getSizeInBytes();
671 Align Alignment = getStackTemporaryAlignment(DstTy);
672 const DataLayout &DL = MIRBuilder.getDataLayout();
673 unsigned AddrSpace = DL.getAllocaAddrSpace();
674 MachinePointerInfo PtrInfo;
675
676 Register StackPtrSin =
677 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
678 .getReg(0);
679 Register StackPtrCos =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682
683 auto &Ctx = MF.getFunction().getContext();
684 auto LibcallResult =
686 {{0}, Type::getVoidTy(Ctx), 0},
687 {{Src, OpType, 0},
688 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
689 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
690 LocObserver, &MI);
691
692 if (LibcallResult != LegalizeResult::Legalized)
694
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
699
700 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
701 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
702 MI.eraseFromParent();
703
705}
706
708LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
709 unsigned Size, Type *OpType,
710 LostDebugLocObserver &LocObserver) {
711 MachineFunction &MF = MIRBuilder.getMF();
712 MachineRegisterInfo &MRI = MF.getRegInfo();
713
714 Register DstFrac = MI.getOperand(0).getReg();
715 Register DstInt = MI.getOperand(1).getReg();
716 Register Src = MI.getOperand(2).getReg();
717 LLT DstTy = MRI.getType(DstFrac);
718
719 int MemSize = DstTy.getSizeInBytes();
720 Align Alignment = getStackTemporaryAlignment(DstTy);
721 const DataLayout &DL = MIRBuilder.getDataLayout();
722 unsigned AddrSpace = DL.getAllocaAddrSpace();
723 MachinePointerInfo PtrInfo;
724
725 Register StackPtrInt =
726 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
727 .getReg(0);
728
729 auto &Ctx = MF.getFunction().getContext();
730 auto LibcallResult = createLibcall(
731 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
732 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
733 LocObserver, &MI);
734
735 if (LibcallResult != LegalizeResult::Legalized)
737
739 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
740
741 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
742 MI.eraseFromParent();
743
745}
746
749 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
750 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
751
753 // Add all the args, except for the last which is an imm denoting 'tail'.
754 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
755 Register Reg = MI.getOperand(i).getReg();
756
757 // Need derive an IR type for call lowering.
758 LLT OpLLT = MRI.getType(Reg);
759 Type *OpTy = nullptr;
760 if (OpLLT.isPointer())
761 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
762 else
763 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
764 Args.push_back({Reg, OpTy, 0});
765 }
766
767 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
768 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
769 RTLIB::Libcall RTLibcall;
770 unsigned Opc = MI.getOpcode();
771 const char *Name;
772 switch (Opc) {
773 case TargetOpcode::G_BZERO:
774 RTLibcall = RTLIB::BZERO;
775 Name = TLI.getLibcallName(RTLibcall);
776 break;
777 case TargetOpcode::G_MEMCPY:
778 RTLibcall = RTLIB::MEMCPY;
779 Name = TLI.getMemcpyName();
780 Args[0].Flags[0].setReturned();
781 break;
782 case TargetOpcode::G_MEMMOVE:
783 RTLibcall = RTLIB::MEMMOVE;
784 Name = TLI.getLibcallName(RTLibcall);
785 Args[0].Flags[0].setReturned();
786 break;
787 case TargetOpcode::G_MEMSET:
788 RTLibcall = RTLIB::MEMSET;
789 Name = TLI.getLibcallName(RTLibcall);
790 Args[0].Flags[0].setReturned();
791 break;
792 default:
793 llvm_unreachable("unsupported opcode");
794 }
795
796 // Unsupported libcall on the target.
797 if (!Name) {
798 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
799 << MIRBuilder.getTII().getName(Opc) << "\n");
801 }
802
804 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
805 Info.Callee = MachineOperand::CreateES(Name);
806 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
807 Info.IsTailCall =
808 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
809 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
810
811 llvm::append_range(Info.OrigArgs, Args);
812 if (!CLI.lowerCall(MIRBuilder, Info))
814
815 if (Info.LoweredTailCall) {
816 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
817
818 // Check debug locations before removing the return.
819 LocObserver.checkpoint(true);
820
821 // We must have a return following the call (or debug insts) to get past
822 // isLibCallInTailPosition.
823 do {
824 MachineInstr *Next = MI.getNextNode();
825 assert(Next &&
826 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
827 "Expected instr following MI to be return or debug inst?");
828 // We lowered a tail call, so the call is now the return from the block.
829 // Delete the old return.
830 Next->eraseFromParent();
831 } while (MI.getNextNode());
832
833 // We expect to lose the debug location from the return.
834 LocObserver.checkpoint(false);
835 }
836
838}
839
840static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
841 unsigned Opc = MI.getOpcode();
842 auto &AtomicMI = cast<GMemOperation>(MI);
843 auto &MMO = AtomicMI.getMMO();
844 auto Ordering = MMO.getMergedOrdering();
845 LLT MemType = MMO.getMemoryType();
846 uint64_t MemSize = MemType.getSizeInBytes();
847 if (MemType.isVector())
848 return RTLIB::UNKNOWN_LIBCALL;
849
850#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
851#define LCALL5(A) \
852 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
856 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
857 return getOutlineAtomicHelper(LC, Ordering, MemSize);
858 }
859 case TargetOpcode::G_ATOMICRMW_XCHG: {
860 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
861 return getOutlineAtomicHelper(LC, Ordering, MemSize);
862 }
863 case TargetOpcode::G_ATOMICRMW_ADD:
864 case TargetOpcode::G_ATOMICRMW_SUB: {
865 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
866 return getOutlineAtomicHelper(LC, Ordering, MemSize);
867 }
868 case TargetOpcode::G_ATOMICRMW_AND: {
869 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
870 return getOutlineAtomicHelper(LC, Ordering, MemSize);
871 }
872 case TargetOpcode::G_ATOMICRMW_OR: {
873 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
874 return getOutlineAtomicHelper(LC, Ordering, MemSize);
875 }
876 case TargetOpcode::G_ATOMICRMW_XOR: {
877 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
878 return getOutlineAtomicHelper(LC, Ordering, MemSize);
879 }
880 default:
881 return RTLIB::UNKNOWN_LIBCALL;
882 }
883#undef LCALLS
884#undef LCALL5
885}
886
889 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
890
891 Type *RetTy;
892 SmallVector<Register> RetRegs;
894 unsigned Opc = MI.getOpcode();
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 LLT SuccessLLT;
900 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
901 MI.getFirst4RegLLTs();
902 RetRegs.push_back(Ret);
903 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
904 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
905 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
906 NewLLT) = MI.getFirst5RegLLTs();
907 RetRegs.push_back(Success);
908 RetTy = StructType::get(
909 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
910 }
911 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
912 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
913 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
914 break;
915 }
916 case TargetOpcode::G_ATOMICRMW_XCHG:
917 case TargetOpcode::G_ATOMICRMW_ADD:
918 case TargetOpcode::G_ATOMICRMW_SUB:
919 case TargetOpcode::G_ATOMICRMW_AND:
920 case TargetOpcode::G_ATOMICRMW_OR:
921 case TargetOpcode::G_ATOMICRMW_XOR: {
922 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
923 RetRegs.push_back(Ret);
924 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
925 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
926 Val =
927 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
928 .getReg(0);
929 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
930 Val =
931 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
932 .getReg(0);
933 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
934 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
935 break;
936 }
937 default:
938 llvm_unreachable("unsupported opcode");
939 }
940
941 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
942 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
943 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
944 const char *Name = TLI.getLibcallName(RTLibcall);
945
946 // Unsupported libcall on the target.
947 if (!Name) {
948 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
949 << MIRBuilder.getTII().getName(Opc) << "\n");
951 }
952
954 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
955 Info.Callee = MachineOperand::CreateES(Name);
956 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
957
958 llvm::append_range(Info.OrigArgs, Args);
959 if (!CLI.lowerCall(MIRBuilder, Info))
961
963}
964
965static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
966 Type *FromType) {
967 auto ToMVT = MVT::getVT(ToType);
968 auto FromMVT = MVT::getVT(FromType);
969
970 switch (Opcode) {
971 case TargetOpcode::G_FPEXT:
972 return RTLIB::getFPEXT(FromMVT, ToMVT);
973 case TargetOpcode::G_FPTRUNC:
974 return RTLIB::getFPROUND(FromMVT, ToMVT);
975 case TargetOpcode::G_FPTOSI:
976 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
977 case TargetOpcode::G_FPTOUI:
978 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
979 case TargetOpcode::G_SITOFP:
980 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
981 case TargetOpcode::G_UITOFP:
982 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
983 }
984 llvm_unreachable("Unsupported libcall function");
985}
986
989 Type *FromType, LostDebugLocObserver &LocObserver,
990 const TargetLowering &TLI, bool IsSigned = false) {
991 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
992 if (FromType->isIntegerTy()) {
993 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
994 Arg.Flags[0].setSExt();
995 else
996 Arg.Flags[0].setZExt();
997 }
998
999 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
1000 return createLibcall(MIRBuilder, Libcall,
1001 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1002 &MI);
1003}
1004
1005static RTLIB::Libcall
1007 RTLIB::Libcall RTLibcall;
1008 switch (MI.getOpcode()) {
1009 case TargetOpcode::G_GET_FPENV:
1010 RTLibcall = RTLIB::FEGETENV;
1011 break;
1012 case TargetOpcode::G_SET_FPENV:
1013 case TargetOpcode::G_RESET_FPENV:
1014 RTLibcall = RTLIB::FESETENV;
1015 break;
1016 case TargetOpcode::G_GET_FPMODE:
1017 RTLibcall = RTLIB::FEGETMODE;
1018 break;
1019 case TargetOpcode::G_SET_FPMODE:
1020 case TargetOpcode::G_RESET_FPMODE:
1021 RTLibcall = RTLIB::FESETMODE;
1022 break;
1023 default:
1024 llvm_unreachable("Unexpected opcode");
1025 }
1026 return RTLibcall;
1027}
1028
1029// Some library functions that read FP state (fegetmode, fegetenv) write the
1030// state into a region in memory. IR intrinsics that do the same operations
1031// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1032// intrinsics via the library functions, we need to use temporary variable,
1033// for example:
1034//
1035// %0:_(s32) = G_GET_FPMODE
1036//
1037// is transformed to:
1038//
1039// %1:_(p0) = G_FRAME_INDEX %stack.0
1040// BL &fegetmode
1041// %0:_(s32) = G_LOAD % 1
1042//
1044LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1046 LostDebugLocObserver &LocObserver) {
1047 const DataLayout &DL = MIRBuilder.getDataLayout();
1048 auto &MF = MIRBuilder.getMF();
1049 auto &MRI = *MIRBuilder.getMRI();
1050 auto &Ctx = MF.getFunction().getContext();
1051
1052 // Create temporary, where library function will put the read state.
1053 Register Dst = MI.getOperand(0).getReg();
1054 LLT StateTy = MRI.getType(Dst);
1055 TypeSize StateSize = StateTy.getSizeInBytes();
1056 Align TempAlign = getStackTemporaryAlignment(StateTy);
1057 MachinePointerInfo TempPtrInfo;
1058 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1059
1060 // Create a call to library function, with the temporary as an argument.
1061 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1062 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1063 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1064 auto Res =
1065 createLibcall(MIRBuilder, RTLibcall,
1066 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1068 LocObserver, nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1086 LostDebugLocObserver &LocObserver) {
1087 const DataLayout &DL = MIRBuilder.getDataLayout();
1088 auto &MF = MIRBuilder.getMF();
1089 auto &MRI = *MIRBuilder.getMRI();
1090 auto &Ctx = MF.getFunction().getContext();
1091
1092 // Create temporary, where library function will get the new state.
1093 Register Src = MI.getOperand(0).getReg();
1094 LLT StateTy = MRI.getType(Src);
1095 TypeSize StateSize = StateTy.getSizeInBytes();
1096 Align TempAlign = getStackTemporaryAlignment(StateTy);
1097 MachinePointerInfo TempPtrInfo;
1098 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1099
1100 // Put the new state into the temporary.
1101 MachineMemOperand *MMO = MF.getMachineMemOperand(
1102 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1103 MIRBuilder.buildStore(Src, Temp, *MMO);
1104
1105 // Create a call to library function, with the temporary as an argument.
1106 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1107 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1108 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1109 return createLibcall(MIRBuilder, RTLibcall,
1110 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1111 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1112 LocObserver, nullptr);
1113}
1114
1115/// Returns the corresponding libcall for the given Pred and
1116/// the ICMP predicate that should be generated to compare with #0
1117/// after the libcall.
1118static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1120#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1121 do { \
1122 switch (Size) { \
1123 case 32: \
1124 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1125 case 64: \
1126 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1127 case 128: \
1128 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1129 default: \
1130 llvm_unreachable("unexpected size"); \
1131 } \
1132 } while (0)
1133
1134 switch (Pred) {
1135 case CmpInst::FCMP_OEQ:
1137 case CmpInst::FCMP_UNE:
1139 case CmpInst::FCMP_OGE:
1141 case CmpInst::FCMP_OLT:
1143 case CmpInst::FCMP_OLE:
1145 case CmpInst::FCMP_OGT:
1147 case CmpInst::FCMP_UNO:
1149 default:
1150 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1151 }
1152}
1153
1155LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1157 LostDebugLocObserver &LocObserver) {
1158 auto &MF = MIRBuilder.getMF();
1159 auto &Ctx = MF.getFunction().getContext();
1160 const GFCmp *Cmp = cast<GFCmp>(&MI);
1161
1162 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1163 unsigned Size = OpLLT.getSizeInBits();
1164 if ((Size != 32 && Size != 64 && Size != 128) ||
1165 OpLLT != MRI.getType(Cmp->getRHSReg()))
1166 return UnableToLegalize;
1167
1168 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1169
1170 // DstReg type is s32
1171 const Register DstReg = Cmp->getReg(0);
1172 LLT DstTy = MRI.getType(DstReg);
1173 const auto Cond = Cmp->getCond();
1174
1175 // Reference:
1176 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1177 // Generates a libcall followed by ICMP.
1178 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1179 const CmpInst::Predicate ICmpPred,
1180 const DstOp &Res) -> Register {
1181 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1182 constexpr LLT TempLLT = LLT::scalar(32);
1183 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1184 // Generate libcall, holding result in Temp
1185 const auto Status = createLibcall(
1186 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1187 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1188 LocObserver, &MI);
1189 if (!Status)
1190 return {};
1191
1192 // Compare temp with #0 to get the final result.
1193 return MIRBuilder
1194 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1195 .getReg(0);
1196 };
1197
1198 // Simple case if we have a direct mapping from predicate to libcall
1199 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1200 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1201 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1202 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1203 return Legalized;
1204 }
1205 return UnableToLegalize;
1206 }
1207
1208 // No direct mapping found, should be generated as combination of libcalls.
1209
1210 switch (Cond) {
1211 case CmpInst::FCMP_UEQ: {
1212 // FCMP_UEQ: unordered or equal
1213 // Convert into (FCMP_OEQ || FCMP_UNO).
1214
1215 const auto [OeqLibcall, OeqPred] =
1217 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1218
1219 const auto [UnoLibcall, UnoPred] =
1221 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1222 if (Oeq && Uno)
1223 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1224 else
1225 return UnableToLegalize;
1226
1227 break;
1228 }
1229 case CmpInst::FCMP_ONE: {
1230 // FCMP_ONE: ordered and operands are unequal
1231 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1232
1233 // We inverse the predicate instead of generating a NOT
1234 // to save one instruction.
1235 // On AArch64 isel can even select two cmp into a single ccmp.
1236 const auto [OeqLibcall, OeqPred] =
1238 const auto NotOeq =
1239 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1240
1241 const auto [UnoLibcall, UnoPred] =
1243 const auto NotUno =
1244 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1245
1246 if (NotOeq && NotUno)
1247 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1248 else
1249 return UnableToLegalize;
1250
1251 break;
1252 }
1253 case CmpInst::FCMP_ULT:
1254 case CmpInst::FCMP_UGE:
1255 case CmpInst::FCMP_UGT:
1256 case CmpInst::FCMP_ULE:
1257 case CmpInst::FCMP_ORD: {
1258 // Convert into: !(inverse(Pred))
1259 // E.g. FCMP_ULT becomes !FCMP_OGE
1260 // This is equivalent to the following, but saves some instructions.
1261 // MIRBuilder.buildNot(
1262 // PredTy,
1263 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1264 // Op1, Op2));
1265 const auto [InversedLibcall, InversedPred] =
1267 if (!BuildLibcall(InversedLibcall,
1268 CmpInst::getInversePredicate(InversedPred), DstReg))
1269 return UnableToLegalize;
1270 break;
1271 }
1272 default:
1273 return UnableToLegalize;
1274 }
1275
1276 return Legalized;
1277}
1278
1279// The function is used to legalize operations that set default environment
1280// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1281// On most targets supported in glibc FE_DFL_MODE is defined as
1282// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1283// it is not true, the target must provide custom lowering.
1285LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1287 LostDebugLocObserver &LocObserver) {
1288 const DataLayout &DL = MIRBuilder.getDataLayout();
1289 auto &MF = MIRBuilder.getMF();
1290 auto &Ctx = MF.getFunction().getContext();
1291
1292 // Create an argument for the library function.
1293 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1294 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1295 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1296 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1297 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1298 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1299 MIRBuilder.buildIntToPtr(Dest, DefValue);
1300
1301 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1302 return createLibcall(MIRBuilder, RTLibcall,
1303 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1304 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1305 LocObserver, &MI);
1306}
1307
1310 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1311
1312 switch (MI.getOpcode()) {
1313 default:
1314 return UnableToLegalize;
1315 case TargetOpcode::G_MUL:
1316 case TargetOpcode::G_SDIV:
1317 case TargetOpcode::G_UDIV:
1318 case TargetOpcode::G_SREM:
1319 case TargetOpcode::G_UREM:
1320 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1321 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1322 unsigned Size = LLTy.getSizeInBits();
1323 Type *HLTy = IntegerType::get(Ctx, Size);
1324 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1325 if (Status != Legalized)
1326 return Status;
1327 break;
1328 }
1329 case TargetOpcode::G_FADD:
1330 case TargetOpcode::G_FSUB:
1331 case TargetOpcode::G_FMUL:
1332 case TargetOpcode::G_FDIV:
1333 case TargetOpcode::G_FMA:
1334 case TargetOpcode::G_FPOW:
1335 case TargetOpcode::G_FREM:
1336 case TargetOpcode::G_FCOS:
1337 case TargetOpcode::G_FSIN:
1338 case TargetOpcode::G_FTAN:
1339 case TargetOpcode::G_FACOS:
1340 case TargetOpcode::G_FASIN:
1341 case TargetOpcode::G_FATAN:
1342 case TargetOpcode::G_FATAN2:
1343 case TargetOpcode::G_FCOSH:
1344 case TargetOpcode::G_FSINH:
1345 case TargetOpcode::G_FTANH:
1346 case TargetOpcode::G_FLOG10:
1347 case TargetOpcode::G_FLOG:
1348 case TargetOpcode::G_FLOG2:
1349 case TargetOpcode::G_FEXP:
1350 case TargetOpcode::G_FEXP2:
1351 case TargetOpcode::G_FEXP10:
1352 case TargetOpcode::G_FCEIL:
1353 case TargetOpcode::G_FFLOOR:
1354 case TargetOpcode::G_FMINNUM:
1355 case TargetOpcode::G_FMAXNUM:
1356 case TargetOpcode::G_FMINIMUMNUM:
1357 case TargetOpcode::G_FMAXIMUMNUM:
1358 case TargetOpcode::G_FSQRT:
1359 case TargetOpcode::G_FRINT:
1360 case TargetOpcode::G_FNEARBYINT:
1361 case TargetOpcode::G_INTRINSIC_TRUNC:
1362 case TargetOpcode::G_INTRINSIC_ROUND:
1363 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1364 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1365 unsigned Size = LLTy.getSizeInBits();
1366 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1372 if (Status != Legalized)
1373 return Status;
1374 break;
1375 }
1376 case TargetOpcode::G_FSINCOS: {
1377 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1378 unsigned Size = LLTy.getSizeInBits();
1379 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1380 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1381 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1382 return UnableToLegalize;
1383 }
1384 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1385 }
1386 case TargetOpcode::G_FMODF: {
1387 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1388 unsigned Size = LLTy.getSizeInBits();
1389 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1390 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1391 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1392 return UnableToLegalize;
1393 }
1394 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1395 }
1396 case TargetOpcode::G_LROUND:
1397 case TargetOpcode::G_LLROUND:
1398 case TargetOpcode::G_INTRINSIC_LRINT:
1399 case TargetOpcode::G_INTRINSIC_LLRINT: {
1400 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1401 unsigned Size = LLTy.getSizeInBits();
1402 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1403 Type *ITy = IntegerType::get(
1404 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1405 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1406 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1407 return UnableToLegalize;
1408 }
1409 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1411 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1412 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1413 if (Status != Legalized)
1414 return Status;
1415 MI.eraseFromParent();
1416 return Legalized;
1417 }
1418 case TargetOpcode::G_FPOWI:
1419 case TargetOpcode::G_FLDEXP: {
1420 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1421 unsigned Size = LLTy.getSizeInBits();
1422 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1423 Type *ITy = IntegerType::get(
1424 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1425 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1426 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1427 return UnableToLegalize;
1428 }
1429 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1431 {MI.getOperand(1).getReg(), HLTy, 0},
1432 {MI.getOperand(2).getReg(), ITy, 1}};
1433 Args[1].Flags[0].setSExt();
1435 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1436 Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1448 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1484 LocObserver, TLI, IsSigned);
1485 if (Status != Legalized)
1486 return Status;
1487 break;
1488 }
1489 case TargetOpcode::G_ATOMICRMW_XCHG:
1490 case TargetOpcode::G_ATOMICRMW_ADD:
1491 case TargetOpcode::G_ATOMICRMW_SUB:
1492 case TargetOpcode::G_ATOMICRMW_AND:
1493 case TargetOpcode::G_ATOMICRMW_OR:
1494 case TargetOpcode::G_ATOMICRMW_XOR:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG:
1496 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1498 if (Status != Legalized)
1499 return Status;
1500 break;
1501 }
1502 case TargetOpcode::G_BZERO:
1503 case TargetOpcode::G_MEMCPY:
1504 case TargetOpcode::G_MEMMOVE:
1505 case TargetOpcode::G_MEMSET: {
1506 LegalizeResult Result =
1507 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1508 if (Result != Legalized)
1509 return Result;
1510 MI.eraseFromParent();
1511 return Result;
1512 }
1513 case TargetOpcode::G_GET_FPENV:
1514 case TargetOpcode::G_GET_FPMODE: {
1515 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_SET_FPENV:
1521 case TargetOpcode::G_SET_FPMODE: {
1522 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 case TargetOpcode::G_RESET_FPENV:
1528 case TargetOpcode::G_RESET_FPMODE: {
1529 LegalizeResult Result =
1530 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1531 if (Result != Legalized)
1532 return Result;
1533 break;
1534 }
1535 }
1536
1537 MI.eraseFromParent();
1538 return Legalized;
1539}
1540
1542 unsigned TypeIdx,
1543 LLT NarrowTy) {
1544 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1545 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 return UnableToLegalize;
1550 case TargetOpcode::G_IMPLICIT_DEF: {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 LLT DstTy = MRI.getType(DstReg);
1553
1554 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1555 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1556 // FIXME: Although this would also be legal for the general case, it causes
1557 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1558 // combines not being hit). This seems to be a problem related to the
1559 // artifact combiner.
1560 if (SizeOp0 % NarrowSize != 0) {
1561 LLT ImplicitTy = NarrowTy;
1562 if (DstTy.isVector())
1563 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1564
1565 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1566 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1567
1568 MI.eraseFromParent();
1569 return Legalized;
1570 }
1571
1572 int NumParts = SizeOp0 / NarrowSize;
1573
1575 for (int i = 0; i < NumParts; ++i)
1576 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1577
1578 if (DstTy.isVector())
1579 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1580 else
1581 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_CONSTANT: {
1586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1587 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1588 unsigned TotalSize = Ty.getSizeInBits();
1589 unsigned NarrowSize = NarrowTy.getSizeInBits();
1590 int NumParts = TotalSize / NarrowSize;
1591
1592 SmallVector<Register, 4> PartRegs;
1593 for (int I = 0; I != NumParts; ++I) {
1594 unsigned Offset = I * NarrowSize;
1595 auto K = MIRBuilder.buildConstant(NarrowTy,
1596 Val.lshr(Offset).trunc(NarrowSize));
1597 PartRegs.push_back(K.getReg(0));
1598 }
1599
1600 LLT LeftoverTy;
1601 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1602 SmallVector<Register, 1> LeftoverRegs;
1603 if (LeftoverBits != 0) {
1604 LeftoverTy = LLT::scalar(LeftoverBits);
1605 auto K = MIRBuilder.buildConstant(
1606 LeftoverTy,
1607 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1608 LeftoverRegs.push_back(K.getReg(0));
1609 }
1610
1611 insertParts(MI.getOperand(0).getReg(),
1612 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1613
1614 MI.eraseFromParent();
1615 return Legalized;
1616 }
1617 case TargetOpcode::G_SEXT:
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_ANYEXT:
1620 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1621 case TargetOpcode::G_TRUNC: {
1622 if (TypeIdx != 1)
1623 return UnableToLegalize;
1624
1625 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1626 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1627 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1628 return UnableToLegalize;
1629 }
1630
1631 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1632 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1633 MI.eraseFromParent();
1634 return Legalized;
1635 }
1636 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1637 case TargetOpcode::G_FREEZE: {
1638 if (TypeIdx != 0)
1639 return UnableToLegalize;
1640
1641 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1642 // Should widen scalar first
1643 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1644 return UnableToLegalize;
1645
1646 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1648 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1649 Parts.push_back(
1650 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1651 .getReg(0));
1652 }
1653
1654 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1655 MI.eraseFromParent();
1656 return Legalized;
1657 }
1658 case TargetOpcode::G_ADD:
1659 case TargetOpcode::G_SUB:
1660 case TargetOpcode::G_SADDO:
1661 case TargetOpcode::G_SSUBO:
1662 case TargetOpcode::G_SADDE:
1663 case TargetOpcode::G_SSUBE:
1664 case TargetOpcode::G_UADDO:
1665 case TargetOpcode::G_USUBO:
1666 case TargetOpcode::G_UADDE:
1667 case TargetOpcode::G_USUBE:
1668 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_MUL:
1670 case TargetOpcode::G_UMULH:
1671 return narrowScalarMul(MI, NarrowTy);
1672 case TargetOpcode::G_EXTRACT:
1673 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1674 case TargetOpcode::G_INSERT:
1675 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1676 case TargetOpcode::G_LOAD: {
1677 auto &LoadMI = cast<GLoad>(MI);
1678 Register DstReg = LoadMI.getDstReg();
1679 LLT DstTy = MRI.getType(DstReg);
1680 if (DstTy.isVector())
1681 return UnableToLegalize;
1682
1683 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1684 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1685 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1686 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1687 LoadMI.eraseFromParent();
1688 return Legalized;
1689 }
1690
1691 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1692 }
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_SEXTLOAD: {
1695 auto &LoadMI = cast<GExtLoad>(MI);
1696 Register DstReg = LoadMI.getDstReg();
1697 Register PtrReg = LoadMI.getPointerReg();
1698
1699 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1700 auto &MMO = LoadMI.getMMO();
1701 unsigned MemSize = MMO.getSizeInBits().getValue();
1702
1703 if (MemSize == NarrowSize) {
1704 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1705 } else if (MemSize < NarrowSize) {
1706 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1707 } else if (MemSize > NarrowSize) {
1708 // FIXME: Need to split the load.
1709 return UnableToLegalize;
1710 }
1711
1712 if (isa<GZExtLoad>(LoadMI))
1713 MIRBuilder.buildZExt(DstReg, TmpReg);
1714 else
1715 MIRBuilder.buildSExt(DstReg, TmpReg);
1716
1717 LoadMI.eraseFromParent();
1718 return Legalized;
1719 }
1720 case TargetOpcode::G_STORE: {
1721 auto &StoreMI = cast<GStore>(MI);
1722
1723 Register SrcReg = StoreMI.getValueReg();
1724 LLT SrcTy = MRI.getType(SrcReg);
1725 if (SrcTy.isVector())
1726 return UnableToLegalize;
1727
1728 int NumParts = SizeOp0 / NarrowSize;
1729 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1730 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1731 if (SrcTy.isVector() && LeftoverBits != 0)
1732 return UnableToLegalize;
1733
1734 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1735 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1736 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1737 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1738 StoreMI.eraseFromParent();
1739 return Legalized;
1740 }
1741
1742 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1743 }
1744 case TargetOpcode::G_SELECT:
1745 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1746 case TargetOpcode::G_AND:
1747 case TargetOpcode::G_OR:
1748 case TargetOpcode::G_XOR: {
1749 // Legalize bitwise operation:
1750 // A = BinOp<Ty> B, C
1751 // into:
1752 // B1, ..., BN = G_UNMERGE_VALUES B
1753 // C1, ..., CN = G_UNMERGE_VALUES C
1754 // A1 = BinOp<Ty/N> B1, C2
1755 // ...
1756 // AN = BinOp<Ty/N> BN, CN
1757 // A = G_MERGE_VALUES A1, ..., AN
1758 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1759 }
1760 case TargetOpcode::G_SHL:
1761 case TargetOpcode::G_LSHR:
1762 case TargetOpcode::G_ASHR:
1763 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTLZ:
1765 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1766 case TargetOpcode::G_CTTZ:
1767 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1768 case TargetOpcode::G_CTPOP:
1769 if (TypeIdx == 1)
1770 switch (MI.getOpcode()) {
1771 case TargetOpcode::G_CTLZ:
1772 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1773 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTTZ:
1775 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1776 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1777 case TargetOpcode::G_CTPOP:
1778 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1779 default:
1780 return UnableToLegalize;
1781 }
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_INTTOPTR:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarSrc(MI, NarrowTy, 1);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PTRTOINT:
1796 if (TypeIdx != 0)
1797 return UnableToLegalize;
1798
1799 Observer.changingInstr(MI);
1800 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 case TargetOpcode::G_PHI: {
1804 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1805 // NarrowSize.
1806 if (SizeOp0 % NarrowSize != 0)
1807 return UnableToLegalize;
1808
1809 unsigned NumParts = SizeOp0 / NarrowSize;
1810 SmallVector<Register, 2> DstRegs(NumParts);
1811 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1812 Observer.changingInstr(MI);
1813 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1814 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1815 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1816 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1817 SrcRegs[i / 2], MIRBuilder, MRI);
1818 }
1819 MachineBasicBlock &MBB = *MI.getParent();
1820 MIRBuilder.setInsertPt(MBB, MI);
1821 for (unsigned i = 0; i < NumParts; ++i) {
1822 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1824 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1825 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1826 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1827 }
1828 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1829 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1830 Observer.changedInstr(MI);
1831 MI.eraseFromParent();
1832 return Legalized;
1833 }
1834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1835 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 if (TypeIdx != 2)
1837 return UnableToLegalize;
1838
1839 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1840 Observer.changingInstr(MI);
1841 narrowScalarSrc(MI, NarrowTy, OpIdx);
1842 Observer.changedInstr(MI);
1843 return Legalized;
1844 }
1845 case TargetOpcode::G_ICMP: {
1846 Register LHS = MI.getOperand(2).getReg();
1847 LLT SrcTy = MRI.getType(LHS);
1848 CmpInst::Predicate Pred =
1849 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1850
1851 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1852 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1853 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1854 LHSLeftoverRegs, MIRBuilder, MRI))
1855 return UnableToLegalize;
1856
1857 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1858 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1859 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1860 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1861 return UnableToLegalize;
1862
1863 // We now have the LHS and RHS of the compare split into narrow-type
1864 // registers, plus potentially some leftover type.
1865 Register Dst = MI.getOperand(0).getReg();
1866 LLT ResTy = MRI.getType(Dst);
1867 if (ICmpInst::isEquality(Pred)) {
1868 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1869 // them together. For each equal part, the result should be all 0s. For
1870 // each non-equal part, we'll get at least one 1.
1871 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1873 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1874 auto LHS = std::get<0>(LHSAndRHS);
1875 auto RHS = std::get<1>(LHSAndRHS);
1876 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1877 Xors.push_back(Xor);
1878 }
1879
1880 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1881 // to the desired narrow type so that we can OR them together later.
1882 SmallVector<Register, 4> WidenedXors;
1883 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1884 auto LHS = std::get<0>(LHSAndRHS);
1885 auto RHS = std::get<1>(LHSAndRHS);
1886 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1887 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1888 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1889 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1890 llvm::append_range(Xors, WidenedXors);
1891 }
1892
1893 // Now, for each part we broke up, we know if they are equal/not equal
1894 // based off the G_XOR. We can OR these all together and compare against
1895 // 0 to get the result.
1896 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1897 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1898 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1899 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1900 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1901 } else {
1902 Register CmpIn;
1903 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1904 Register CmpOut;
1905 CmpInst::Predicate PartPred;
1906
1907 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1908 PartPred = Pred;
1909 CmpOut = Dst;
1910 } else {
1911 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1912 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 }
1914
1915 if (!CmpIn) {
1916 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 } else {
1919 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1920 RHSPartRegs[I]);
1921 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1922 LHSPartRegs[I], RHSPartRegs[I]);
1923 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1924 }
1925
1926 CmpIn = CmpOut;
1927 }
1928
1929 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1930 Register CmpOut;
1931 CmpInst::Predicate PartPred;
1932
1933 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1934 PartPred = Pred;
1935 CmpOut = Dst;
1936 } else {
1937 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1938 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 }
1940
1941 if (!CmpIn) {
1942 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 } else {
1945 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1946 RHSLeftoverRegs[I]);
1947 auto CmpEq =
1948 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1949 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1950 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 }
1952
1953 CmpIn = CmpOut;
1954 }
1955 }
1956 MI.eraseFromParent();
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FCMP:
1960 if (TypeIdx != 0)
1961 return UnableToLegalize;
1962
1963 Observer.changingInstr(MI);
1964 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1965 Observer.changedInstr(MI);
1966 return Legalized;
1967
1968 case TargetOpcode::G_SEXT_INREG: {
1969 if (TypeIdx != 0)
1970 return UnableToLegalize;
1971
1972 int64_t SizeInBits = MI.getOperand(2).getImm();
1973
1974 // So long as the new type has more bits than the bits we're extending we
1975 // don't need to break it apart.
1976 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1977 Observer.changingInstr(MI);
1978 // We don't lose any non-extension bits by truncating the src and
1979 // sign-extending the dst.
1980 MachineOperand &MO1 = MI.getOperand(1);
1981 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1982 MO1.setReg(TruncMIB.getReg(0));
1983
1984 MachineOperand &MO2 = MI.getOperand(0);
1985 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1987 MIRBuilder.buildSExt(MO2, DstExt);
1988 MO2.setReg(DstExt);
1989 Observer.changedInstr(MI);
1990 return Legalized;
1991 }
1992
1993 // Break it apart. Components below the extension point are unmodified. The
1994 // component containing the extension point becomes a narrower SEXT_INREG.
1995 // Components above it are ashr'd from the component containing the
1996 // extension point.
1997 if (SizeOp0 % NarrowSize != 0)
1998 return UnableToLegalize;
1999 int NumParts = SizeOp0 / NarrowSize;
2000
2001 // List the registers where the destination will be scattered.
2003 // List the registers where the source will be split.
2005
2006 // Create all the temporary registers.
2007 for (int i = 0; i < NumParts; ++i) {
2008 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2009
2010 SrcRegs.push_back(SrcReg);
2011 }
2012
2013 // Explode the big arguments into smaller chunks.
2014 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2015
2016 Register AshrCstReg =
2017 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2018 .getReg(0);
2019 Register FullExtensionReg;
2020 Register PartialExtensionReg;
2021
2022 // Do the operation on each small part.
2023 for (int i = 0; i < NumParts; ++i) {
2024 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2025 DstRegs.push_back(SrcRegs[i]);
2026 PartialExtensionReg = DstRegs.back();
2027 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2028 assert(PartialExtensionReg &&
2029 "Expected to visit partial extension before full");
2030 if (FullExtensionReg) {
2031 DstRegs.push_back(FullExtensionReg);
2032 continue;
2033 }
2034 DstRegs.push_back(
2035 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2036 .getReg(0));
2037 FullExtensionReg = DstRegs.back();
2038 } else {
2039 DstRegs.push_back(
2041 .buildInstr(
2042 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2043 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2044 .getReg(0));
2045 PartialExtensionReg = DstRegs.back();
2046 }
2047 }
2048
2049 // Gather the destination registers into the final destination.
2050 Register DstReg = MI.getOperand(0).getReg();
2051 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2052 MI.eraseFromParent();
2053 return Legalized;
2054 }
2055 case TargetOpcode::G_BSWAP:
2056 case TargetOpcode::G_BITREVERSE: {
2057 if (SizeOp0 % NarrowSize != 0)
2058 return UnableToLegalize;
2059
2060 Observer.changingInstr(MI);
2061 SmallVector<Register, 2> SrcRegs, DstRegs;
2062 unsigned NumParts = SizeOp0 / NarrowSize;
2063 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2064 MIRBuilder, MRI);
2065
2066 for (unsigned i = 0; i < NumParts; ++i) {
2067 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2068 {SrcRegs[NumParts - 1 - i]});
2069 DstRegs.push_back(DstPart.getReg(0));
2070 }
2071
2072 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2073
2074 Observer.changedInstr(MI);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 case TargetOpcode::G_PTRMASK: {
2080 if (TypeIdx != 1)
2081 return UnableToLegalize;
2082 Observer.changingInstr(MI);
2083 narrowScalarSrc(MI, NarrowTy, 2);
2084 Observer.changedInstr(MI);
2085 return Legalized;
2086 }
2087 case TargetOpcode::G_FPTOUI:
2088 case TargetOpcode::G_FPTOSI:
2089 case TargetOpcode::G_FPTOUI_SAT:
2090 case TargetOpcode::G_FPTOSI_SAT:
2091 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2092 case TargetOpcode::G_FPEXT:
2093 if (TypeIdx != 0)
2094 return UnableToLegalize;
2095 Observer.changingInstr(MI);
2096 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2097 Observer.changedInstr(MI);
2098 return Legalized;
2099 case TargetOpcode::G_FLDEXP:
2100 case TargetOpcode::G_STRICT_FLDEXP:
2101 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2102 case TargetOpcode::G_VSCALE: {
2103 Register Dst = MI.getOperand(0).getReg();
2104 LLT Ty = MRI.getType(Dst);
2105
2106 // Assume VSCALE(1) fits into a legal integer
2107 const APInt One(NarrowTy.getSizeInBits(), 1);
2108 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2109 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2110 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2111 MIRBuilder.buildMul(Dst, ZExt, C);
2112
2113 MI.eraseFromParent();
2114 return Legalized;
2115 }
2116 }
2117}
2118
2120 LLT Ty = MRI.getType(Val);
2121 if (Ty.isScalar())
2122 return Val;
2123
2124 const DataLayout &DL = MIRBuilder.getDataLayout();
2125 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2126 if (Ty.isPointer()) {
2127 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2128 return Register();
2129 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2130 }
2131
2132 Register NewVal = Val;
2133
2134 assert(Ty.isVector());
2135 if (Ty.isPointerVector())
2136 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2137 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138}
2139
2141 unsigned OpIdx, unsigned ExtOpcode) {
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2144 MO.setReg(ExtB.getReg(0));
2145}
2146
2148 unsigned OpIdx) {
2149 MachineOperand &MO = MI.getOperand(OpIdx);
2150 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2151 MO.setReg(ExtB.getReg(0));
2152}
2153
2155 unsigned OpIdx, unsigned TruncOpcode) {
2156 MachineOperand &MO = MI.getOperand(OpIdx);
2157 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2158 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2159 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2160 MO.setReg(DstExt);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2169 MO.setReg(DstTrunc);
2170}
2171
2173 unsigned OpIdx) {
2174 MachineOperand &MO = MI.getOperand(OpIdx);
2175 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2176 Register Dst = MO.getReg();
2177 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178 MO.setReg(DstExt);
2179 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2180}
2181
2183 unsigned OpIdx) {
2184 MachineOperand &MO = MI.getOperand(OpIdx);
2185 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2186}
2187
2189 MachineOperand &Op = MI.getOperand(OpIdx);
2190 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2191}
2192
2194 MachineOperand &MO = MI.getOperand(OpIdx);
2195 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2196 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2197 MIRBuilder.buildBitcast(MO, CastDst);
2198 MO.setReg(CastDst);
2199}
2200
2202LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 1)
2205 return UnableToLegalize;
2206
2207 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2208 if (DstTy.isVector())
2209 return UnableToLegalize;
2210
2211 LLT SrcTy = MRI.getType(Src1Reg);
2212 const int DstSize = DstTy.getSizeInBits();
2213 const int SrcSize = SrcTy.getSizeInBits();
2214 const int WideSize = WideTy.getSizeInBits();
2215 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2216
2217 unsigned NumOps = MI.getNumOperands();
2218 unsigned NumSrc = MI.getNumOperands() - 1;
2219 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2220
2221 if (WideSize >= DstSize) {
2222 // Directly pack the bits in the target type.
2223 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2224
2225 for (unsigned I = 2; I != NumOps; ++I) {
2226 const unsigned Offset = (I - 1) * PartSize;
2227
2228 Register SrcReg = MI.getOperand(I).getReg();
2229 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2230
2231 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2232
2233 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2234 MRI.createGenericVirtualRegister(WideTy);
2235
2236 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2237 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2238 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2239 ResultReg = NextResult;
2240 }
2241
2242 if (WideSize > DstSize)
2243 MIRBuilder.buildTrunc(DstReg, ResultReg);
2244 else if (DstTy.isPointer())
2245 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2246
2247 MI.eraseFromParent();
2248 return Legalized;
2249 }
2250
2251 // Unmerge the original values to the GCD type, and recombine to the next
2252 // multiple greater than the original type.
2253 //
2254 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2255 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2256 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2257 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2258 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2259 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2260 // %12:_(s12) = G_MERGE_VALUES %10, %11
2261 //
2262 // Padding with undef if necessary:
2263 //
2264 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2265 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2266 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2267 // %7:_(s2) = G_IMPLICIT_DEF
2268 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2269 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2270 // %10:_(s12) = G_MERGE_VALUES %8, %9
2271
2272 const int GCD = std::gcd(SrcSize, WideSize);
2273 LLT GCDTy = LLT::scalar(GCD);
2274
2275 SmallVector<Register, 8> NewMergeRegs;
2276 SmallVector<Register, 8> Unmerges;
2277 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2278
2279 // Decompose the original operands if they don't evenly divide.
2280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2281 Register SrcReg = MO.getReg();
2282 if (GCD == SrcSize) {
2283 Unmerges.push_back(SrcReg);
2284 } else {
2285 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2286 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2287 Unmerges.push_back(Unmerge.getReg(J));
2288 }
2289 }
2290
2291 // Pad with undef to the next size that is a multiple of the requested size.
2292 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2293 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2294 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2295 Unmerges.push_back(UndefReg);
2296 }
2297
2298 const int PartsPerGCD = WideSize / GCD;
2299
2300 // Build merges of each piece.
2301 ArrayRef<Register> Slicer(Unmerges);
2302 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2303 auto Merge =
2304 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2305 NewMergeRegs.push_back(Merge.getReg(0));
2306 }
2307
2308 // A truncate may be necessary if the requested type doesn't evenly divide the
2309 // original result type.
2310 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2311 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2312 } else {
2313 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2314 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2315 }
2316
2317 MI.eraseFromParent();
2318 return Legalized;
2319}
2320
2322LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2323 LLT WideTy) {
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 int NumDst = MI.getNumOperands() - 1;
2328 Register SrcReg = MI.getOperand(NumDst).getReg();
2329 LLT SrcTy = MRI.getType(SrcReg);
2330 if (SrcTy.isVector())
2331 return UnableToLegalize;
2332
2333 Register Dst0Reg = MI.getOperand(0).getReg();
2334 LLT DstTy = MRI.getType(Dst0Reg);
2335 if (!DstTy.isScalar())
2336 return UnableToLegalize;
2337
2338 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2339 if (SrcTy.isPointer()) {
2340 const DataLayout &DL = MIRBuilder.getDataLayout();
2341 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2342 LLVM_DEBUG(
2343 dbgs() << "Not casting non-integral address space integer\n");
2344 return UnableToLegalize;
2345 }
2346
2347 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2348 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2349 }
2350
2351 // Widen SrcTy to WideTy. This does not affect the result, but since the
2352 // user requested this size, it is probably better handled than SrcTy and
2353 // should reduce the total number of legalization artifacts.
2354 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2355 SrcTy = WideTy;
2356 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2357 }
2358
2359 // Theres no unmerge type to target. Directly extract the bits from the
2360 // source type
2361 unsigned DstSize = DstTy.getSizeInBits();
2362
2363 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2364 for (int I = 1; I != NumDst; ++I) {
2365 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2366 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2368 }
2369
2370 MI.eraseFromParent();
2371 return Legalized;
2372 }
2373
2374 // Extend the source to a wider type.
2375 LLT LCMTy = getLCMType(SrcTy, WideTy);
2376
2377 Register WideSrc = SrcReg;
2378 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2379 // TODO: If this is an integral address space, cast to integer and anyext.
2380 if (SrcTy.isPointer()) {
2381 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2382 return UnableToLegalize;
2383 }
2384
2385 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2386 }
2387
2388 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2389
2390 // Create a sequence of unmerges and merges to the original results. Since we
2391 // may have widened the source, we will need to pad the results with dead defs
2392 // to cover the source register.
2393 // e.g. widen s48 to s64:
2394 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2395 //
2396 // =>
2397 // %4:_(s192) = G_ANYEXT %0:_(s96)
2398 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2399 // ; unpack to GCD type, with extra dead defs
2400 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2401 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2402 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2403 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2404 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2405 const LLT GCDTy = getGCDType(WideTy, DstTy);
2406 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2407 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2408
2409 // Directly unmerge to the destination without going through a GCD type
2410 // if possible
2411 if (PartsPerRemerge == 1) {
2412 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2413
2414 for (int I = 0; I != NumUnmerge; ++I) {
2415 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2416
2417 for (int J = 0; J != PartsPerUnmerge; ++J) {
2418 int Idx = I * PartsPerUnmerge + J;
2419 if (Idx < NumDst)
2420 MIB.addDef(MI.getOperand(Idx).getReg());
2421 else {
2422 // Create dead def for excess components.
2423 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 }
2425 }
2426
2427 MIB.addUse(Unmerge.getReg(I));
2428 }
2429 } else {
2430 SmallVector<Register, 16> Parts;
2431 for (int J = 0; J != NumUnmerge; ++J)
2432 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2433
2434 SmallVector<Register, 8> RemergeParts;
2435 for (int I = 0; I != NumDst; ++I) {
2436 for (int J = 0; J < PartsPerRemerge; ++J) {
2437 const int Idx = I * PartsPerRemerge + J;
2438 RemergeParts.emplace_back(Parts[Idx]);
2439 }
2440
2441 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2442 RemergeParts.clear();
2443 }
2444 }
2445
2446 MI.eraseFromParent();
2447 return Legalized;
2448}
2449
2451LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2452 LLT WideTy) {
2453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2454 unsigned Offset = MI.getOperand(2).getImm();
2455
2456 if (TypeIdx == 0) {
2457 if (SrcTy.isVector() || DstTy.isVector())
2458 return UnableToLegalize;
2459
2460 SrcOp Src(SrcReg);
2461 if (SrcTy.isPointer()) {
2462 // Extracts from pointers can be handled only if they are really just
2463 // simple integers.
2464 const DataLayout &DL = MIRBuilder.getDataLayout();
2465 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2466 return UnableToLegalize;
2467
2468 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2469 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 SrcTy = SrcAsIntTy;
2471 }
2472
2473 if (DstTy.isPointer())
2474 return UnableToLegalize;
2475
2476 if (Offset == 0) {
2477 // Avoid a shift in the degenerate case.
2478 MIRBuilder.buildTrunc(DstReg,
2479 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2480 MI.eraseFromParent();
2481 return Legalized;
2482 }
2483
2484 // Do a shift in the source type.
2485 LLT ShiftTy = SrcTy;
2486 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2487 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2488 ShiftTy = WideTy;
2489 }
2490
2491 auto LShr = MIRBuilder.buildLShr(
2492 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2493 MIRBuilder.buildTrunc(DstReg, LShr);
2494 MI.eraseFromParent();
2495 return Legalized;
2496 }
2497
2498 if (SrcTy.isScalar()) {
2499 Observer.changingInstr(MI);
2500 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2501 Observer.changedInstr(MI);
2502 return Legalized;
2503 }
2504
2505 if (!SrcTy.isVector())
2506 return UnableToLegalize;
2507
2508 if (DstTy != SrcTy.getElementType())
2509 return UnableToLegalize;
2510
2511 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2512 return UnableToLegalize;
2513
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2516
2517 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2518 Offset);
2519 widenScalarDst(MI, WideTy.getScalarType(), 0);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522}
2523
2525LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2526 LLT WideTy) {
2527 if (TypeIdx != 0 || WideTy.isVector())
2528 return UnableToLegalize;
2529 Observer.changingInstr(MI);
2530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2531 widenScalarDst(MI, WideTy);
2532 Observer.changedInstr(MI);
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 unsigned Opcode;
2540 unsigned ExtOpcode;
2541 std::optional<Register> CarryIn;
2542 switch (MI.getOpcode()) {
2543 default:
2544 llvm_unreachable("Unexpected opcode!");
2545 case TargetOpcode::G_SADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_SSUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_SEXT;
2552 break;
2553 case TargetOpcode::G_UADDO:
2554 Opcode = TargetOpcode::G_ADD;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_USUBO:
2558 Opcode = TargetOpcode::G_SUB;
2559 ExtOpcode = TargetOpcode::G_ZEXT;
2560 break;
2561 case TargetOpcode::G_SADDE:
2562 Opcode = TargetOpcode::G_UADDE;
2563 ExtOpcode = TargetOpcode::G_SEXT;
2564 CarryIn = MI.getOperand(4).getReg();
2565 break;
2566 case TargetOpcode::G_SSUBE:
2567 Opcode = TargetOpcode::G_USUBE;
2568 ExtOpcode = TargetOpcode::G_SEXT;
2569 CarryIn = MI.getOperand(4).getReg();
2570 break;
2571 case TargetOpcode::G_UADDE:
2572 Opcode = TargetOpcode::G_UADDE;
2573 ExtOpcode = TargetOpcode::G_ZEXT;
2574 CarryIn = MI.getOperand(4).getReg();
2575 break;
2576 case TargetOpcode::G_USUBE:
2577 Opcode = TargetOpcode::G_USUBE;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 CarryIn = MI.getOperand(4).getReg();
2580 break;
2581 }
2582
2583 if (TypeIdx == 1) {
2584 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2585
2586 Observer.changingInstr(MI);
2587 if (CarryIn)
2588 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2589 widenScalarDst(MI, WideTy, 1);
2590
2591 Observer.changedInstr(MI);
2592 return Legalized;
2593 }
2594
2595 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2596 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2597 // Do the arithmetic in the larger type.
2598 Register NewOp;
2599 if (CarryIn) {
2600 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2601 NewOp = MIRBuilder
2602 .buildInstr(Opcode, {WideTy, CarryOutTy},
2603 {LHSExt, RHSExt, *CarryIn})
2604 .getReg(0);
2605 } else {
2606 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2607 }
2608 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2609 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2610 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2611 // There is no overflow if the ExtOp is the same as NewOp.
2612 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2613 // Now trunc the NewOp to the original result.
2614 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2615 MI.eraseFromParent();
2616 return Legalized;
2617}
2618
2620LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2621 LLT WideTy) {
2622 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2623 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2624 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2625 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2626 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2627 // We can convert this to:
2628 // 1. Any extend iN to iM
2629 // 2. SHL by M-N
2630 // 3. [US][ADD|SUB|SHL]SAT
2631 // 4. L/ASHR by M-N
2632 //
2633 // It may be more efficient to lower this to a min and a max operation in
2634 // the higher precision arithmetic if the promoted operation isn't legal,
2635 // but this decision is up to the target's lowering request.
2636 Register DstReg = MI.getOperand(0).getReg();
2637
2638 unsigned NewBits = WideTy.getScalarSizeInBits();
2639 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2640
2641 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2642 // must not left shift the RHS to preserve the shift amount.
2643 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2644 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2645 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2646 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2648 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2649
2650 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2651 {ShiftL, ShiftR}, MI.getFlags());
2652
2653 // Use a shift that will preserve the number of sign bits when the trunc is
2654 // folded away.
2655 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2656 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2657
2658 MIRBuilder.buildTrunc(DstReg, Result);
2659 MI.eraseFromParent();
2660 return Legalized;
2661}
2662
2664LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2665 LLT WideTy) {
2666 if (TypeIdx == 1) {
2667 Observer.changingInstr(MI);
2668 widenScalarDst(MI, WideTy, 1);
2669 Observer.changedInstr(MI);
2670 return Legalized;
2671 }
2672
2673 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2674 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2675 LLT SrcTy = MRI.getType(LHS);
2676 LLT OverflowTy = MRI.getType(OriginalOverflow);
2677 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2678
2679 // To determine if the result overflowed in the larger type, we extend the
2680 // input to the larger type, do the multiply (checking if it overflows),
2681 // then also check the high bits of the result to see if overflow happened
2682 // there.
2683 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2684 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2685 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2686
2687 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2688 // so we don't need to check the overflow result of larger type Mulo.
2689 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2690
2691 unsigned MulOpc =
2692 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2693
2694 MachineInstrBuilder Mulo;
2695 if (WideMulCanOverflow)
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2697 {LeftOperand, RightOperand});
2698 else
2699 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2700
2701 auto Mul = Mulo->getOperand(0);
2702 MIRBuilder.buildTrunc(Result, Mul);
2703
2704 MachineInstrBuilder ExtResult;
2705 // Overflow occurred if it occurred in the larger type, or if the high part
2706 // of the result does not zero/sign-extend the low part. Check this second
2707 // possibility first.
2708 if (IsSigned) {
2709 // For signed, overflow occurred when the high part does not sign-extend
2710 // the low part.
2711 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2712 } else {
2713 // Unsigned overflow occurred when the high part does not zero-extend the
2714 // low part.
2715 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2716 }
2717
2718 if (WideMulCanOverflow) {
2719 auto Overflow =
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2721 // Finally check if the multiplication in the larger type itself overflowed.
2722 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2723 } else {
2724 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2725 }
2726 MI.eraseFromParent();
2727 return Legalized;
2728}
2729
2732 unsigned Opcode = MI.getOpcode();
2733 switch (Opcode) {
2734 default:
2735 return UnableToLegalize;
2736 case TargetOpcode::G_ATOMICRMW_XCHG:
2737 case TargetOpcode::G_ATOMICRMW_ADD:
2738 case TargetOpcode::G_ATOMICRMW_SUB:
2739 case TargetOpcode::G_ATOMICRMW_AND:
2740 case TargetOpcode::G_ATOMICRMW_OR:
2741 case TargetOpcode::G_ATOMICRMW_XOR:
2742 case TargetOpcode::G_ATOMICRMW_MIN:
2743 case TargetOpcode::G_ATOMICRMW_MAX:
2744 case TargetOpcode::G_ATOMICRMW_UMIN:
2745 case TargetOpcode::G_ATOMICRMW_UMAX:
2746 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2747 Observer.changingInstr(MI);
2748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG:
2753 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2761 if (TypeIdx == 0) {
2762 Observer.changingInstr(MI);
2763 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2764 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2765 widenScalarDst(MI, WideTy, 0);
2766 Observer.changedInstr(MI);
2767 return Legalized;
2768 }
2769 assert(TypeIdx == 1 &&
2770 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2771 Observer.changingInstr(MI);
2772 widenScalarDst(MI, WideTy, 1);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_EXTRACT:
2776 return widenScalarExtract(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_INSERT:
2778 return widenScalarInsert(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_MERGE_VALUES:
2780 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2781 case TargetOpcode::G_UNMERGE_VALUES:
2782 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2783 case TargetOpcode::G_SADDO:
2784 case TargetOpcode::G_SSUBO:
2785 case TargetOpcode::G_UADDO:
2786 case TargetOpcode::G_USUBO:
2787 case TargetOpcode::G_SADDE:
2788 case TargetOpcode::G_SSUBE:
2789 case TargetOpcode::G_UADDE:
2790 case TargetOpcode::G_USUBE:
2791 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_UMULO:
2793 case TargetOpcode::G_SMULO:
2794 return widenScalarMulo(MI, TypeIdx, WideTy);
2795 case TargetOpcode::G_SADDSAT:
2796 case TargetOpcode::G_SSUBSAT:
2797 case TargetOpcode::G_SSHLSAT:
2798 case TargetOpcode::G_UADDSAT:
2799 case TargetOpcode::G_USUBSAT:
2800 case TargetOpcode::G_USHLSAT:
2801 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_CTTZ:
2803 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2804 case TargetOpcode::G_CTLZ:
2805 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2806 case TargetOpcode::G_CTPOP: {
2807 if (TypeIdx == 0) {
2808 Observer.changingInstr(MI);
2809 widenScalarDst(MI, WideTy, 0);
2810 Observer.changedInstr(MI);
2811 return Legalized;
2812 }
2813
2814 Register SrcReg = MI.getOperand(1).getReg();
2815
2816 // First extend the input.
2817 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2818 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2819 ? TargetOpcode::G_ANYEXT
2820 : TargetOpcode::G_ZEXT;
2821 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2822 LLT CurTy = MRI.getType(SrcReg);
2823 unsigned NewOpc = Opcode;
2824 if (NewOpc == TargetOpcode::G_CTTZ) {
2825 // The count is the same in the larger type except if the original
2826 // value was zero. This can be handled by setting the bit just off
2827 // the top of the original type.
2828 auto TopBit =
2830 MIBSrc = MIRBuilder.buildOr(
2831 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2832 // Now we know the operand is non-zero, use the more relaxed opcode.
2833 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2834 }
2835
2836 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2837
2838 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2839 // An optimization where the result is the CTLZ after the left shift by
2840 // (Difference in widety and current ty), that is,
2841 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2842 // Result = ctlz MIBSrc
2843 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2844 MIRBuilder.buildConstant(WideTy, SizeDiff));
2845 }
2846
2847 // Perform the operation at the larger size.
2848 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2849 // This is already the correct result for CTPOP and CTTZs
2850 if (Opcode == TargetOpcode::G_CTLZ) {
2851 // The correct result is NewOp - (Difference in widety and current ty).
2852 MIBNewOp = MIRBuilder.buildSub(
2853 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2857 MI.eraseFromParent();
2858 return Legalized;
2859 }
2860 case TargetOpcode::G_BSWAP: {
2861 Observer.changingInstr(MI);
2862 Register DstReg = MI.getOperand(0).getReg();
2863
2864 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2865 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2866 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2867 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2868
2869 MI.getOperand(0).setReg(DstExt);
2870
2871 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2872
2873 LLT Ty = MRI.getType(DstReg);
2874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2875 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2876 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2877
2878 MIRBuilder.buildTrunc(DstReg, ShrReg);
2879 Observer.changedInstr(MI);
2880 return Legalized;
2881 }
2882 case TargetOpcode::G_BITREVERSE: {
2883 Observer.changingInstr(MI);
2884
2885 Register DstReg = MI.getOperand(0).getReg();
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888
2889 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2890 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2891 MI.getOperand(0).setReg(DstExt);
2892 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2893
2894 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2895 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2896 MIRBuilder.buildTrunc(DstReg, Shift);
2897 Observer.changedInstr(MI);
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_FREEZE:
2901 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2902 Observer.changingInstr(MI);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 widenScalarDst(MI, WideTy);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907
2908 case TargetOpcode::G_ABS:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ADD:
2916 case TargetOpcode::G_AND:
2917 case TargetOpcode::G_MUL:
2918 case TargetOpcode::G_OR:
2919 case TargetOpcode::G_XOR:
2920 case TargetOpcode::G_SUB:
2921 case TargetOpcode::G_SHUFFLE_VECTOR:
2922 // Perform operation at larger width (any extension is fines here, high bits
2923 // don't affect the result) and then truncate the result back to the
2924 // original type.
2925 Observer.changingInstr(MI);
2926 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 Observer.changedInstr(MI);
2930 return Legalized;
2931
2932 case TargetOpcode::G_SBFX:
2933 case TargetOpcode::G_UBFX:
2934 Observer.changingInstr(MI);
2935
2936 if (TypeIdx == 0) {
2937 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2938 widenScalarDst(MI, WideTy);
2939 } else {
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2941 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2942 }
2943
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_SHL:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962
2963 case TargetOpcode::G_ROTR:
2964 case TargetOpcode::G_ROTL:
2965 if (TypeIdx != 1)
2966 return UnableToLegalize;
2967
2968 Observer.changingInstr(MI);
2969 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972
2973 case TargetOpcode::G_SDIV:
2974 case TargetOpcode::G_SREM:
2975 case TargetOpcode::G_SMIN:
2976 case TargetOpcode::G_SMAX:
2977 case TargetOpcode::G_ABDS:
2978 Observer.changingInstr(MI);
2979 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2981 widenScalarDst(MI, WideTy);
2982 Observer.changedInstr(MI);
2983 return Legalized;
2984
2985 case TargetOpcode::G_SDIVREM:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2991 widenScalarDst(MI, WideTy, 1);
2992 Observer.changedInstr(MI);
2993 return Legalized;
2994
2995 case TargetOpcode::G_ASHR:
2996 case TargetOpcode::G_LSHR:
2997 Observer.changingInstr(MI);
2998
2999 if (TypeIdx == 0) {
3000 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3001 : TargetOpcode::G_ZEXT;
3002
3003 widenScalarSrc(MI, WideTy, 1, CvtOp);
3004 widenScalarDst(MI, WideTy);
3005 } else {
3006 assert(TypeIdx == 1);
3007 // The "number of bits to shift" operand must preserve its value as an
3008 // unsigned integer:
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 }
3011
3012 Observer.changedInstr(MI);
3013 return Legalized;
3014 case TargetOpcode::G_UDIV:
3015 case TargetOpcode::G_UREM:
3016 case TargetOpcode::G_ABDU:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023 case TargetOpcode::G_UDIVREM:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3029 widenScalarDst(MI, WideTy, 1);
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UMIN:
3033 case TargetOpcode::G_UMAX: {
3034 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3035
3036 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3037 unsigned ExtOpc =
3038 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3039 getApproximateEVTForLLT(WideTy, Ctx))
3040 ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 Observer.changingInstr(MI);
3044 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3045 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3046 widenScalarDst(MI, WideTy);
3047 Observer.changedInstr(MI);
3048 return Legalized;
3049 }
3050
3051 case TargetOpcode::G_SELECT:
3052 Observer.changingInstr(MI);
3053 if (TypeIdx == 0) {
3054 // Perform operation at larger width (any extension is fine here, high
3055 // bits don't affect the result) and then truncate the result back to the
3056 // original type.
3057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3059 widenScalarDst(MI, WideTy);
3060 } else {
3061 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3062 // Explicit extension is required here since high bits affect the result.
3063 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3064 }
3065 Observer.changedInstr(MI);
3066 return Legalized;
3067
3068 case TargetOpcode::G_FPTOSI:
3069 case TargetOpcode::G_FPTOUI:
3070 case TargetOpcode::G_INTRINSIC_LRINT:
3071 case TargetOpcode::G_INTRINSIC_LLRINT:
3072 case TargetOpcode::G_IS_FPCLASS:
3073 Observer.changingInstr(MI);
3074
3075 if (TypeIdx == 0)
3076 widenScalarDst(MI, WideTy);
3077 else
3078 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3079
3080 Observer.changedInstr(MI);
3081 return Legalized;
3082 case TargetOpcode::G_SITOFP:
3083 Observer.changingInstr(MI);
3084
3085 if (TypeIdx == 0)
3086 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3087 else
3088 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3089
3090 Observer.changedInstr(MI);
3091 return Legalized;
3092 case TargetOpcode::G_UITOFP:
3093 Observer.changingInstr(MI);
3094
3095 if (TypeIdx == 0)
3096 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3097 else
3098 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3099
3100 Observer.changedInstr(MI);
3101 return Legalized;
3102 case TargetOpcode::G_FPTOSI_SAT:
3103 case TargetOpcode::G_FPTOUI_SAT:
3104 Observer.changingInstr(MI);
3105
3106 if (TypeIdx == 0) {
3107 Register OldDst = MI.getOperand(0).getReg();
3108 LLT Ty = MRI.getType(OldDst);
3109 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3110 Register NewDst;
3111 MI.getOperand(0).setReg(ExtReg);
3112 uint64_t ShortBits = Ty.getScalarSizeInBits();
3113 uint64_t WideBits = WideTy.getScalarSizeInBits();
3114 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3115 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3116 // z = i16 fptosi_sat(a)
3117 // ->
3118 // x = i32 fptosi_sat(a)
3119 // y = smin(x, 32767)
3120 // z = smax(y, -32768)
3121 auto MaxVal = MIRBuilder.buildConstant(
3122 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3123 auto MinVal = MIRBuilder.buildConstant(
3124 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3125 Register MidReg =
3126 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3127 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3128 } else {
3129 // z = i16 fptoui_sat(a)
3130 // ->
3131 // x = i32 fptoui_sat(a)
3132 // y = smin(x, 65535)
3133 auto MaxVal = MIRBuilder.buildConstant(
3134 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3135 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3136 }
3137 MIRBuilder.buildTrunc(OldDst, NewDst);
3138 } else
3139 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3140
3141 Observer.changedInstr(MI);
3142 return Legalized;
3143 case TargetOpcode::G_LOAD:
3144 case TargetOpcode::G_SEXTLOAD:
3145 case TargetOpcode::G_ZEXTLOAD:
3146 Observer.changingInstr(MI);
3147 widenScalarDst(MI, WideTy);
3148 Observer.changedInstr(MI);
3149 return Legalized;
3150
3151 case TargetOpcode::G_STORE: {
3152 if (TypeIdx != 0)
3153 return UnableToLegalize;
3154
3155 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3156 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3157 if (!Ty.isScalar()) {
3158 // We need to widen the vector element type.
3159 Observer.changingInstr(MI);
3160 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3161 // We also need to adjust the MMO to turn this into a truncating store.
3162 MachineMemOperand &MMO = **MI.memoperands_begin();
3163 MachineFunction &MF = MIRBuilder.getMF();
3164 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3165 MI.setMemRefs(MF, {NewMMO});
3166 Observer.changedInstr(MI);
3167 return Legalized;
3168 }
3169
3170 Observer.changingInstr(MI);
3171
3172 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3173 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3174 widenScalarSrc(MI, WideTy, 0, ExtType);
3175
3176 Observer.changedInstr(MI);
3177 return Legalized;
3178 }
3179 case TargetOpcode::G_CONSTANT: {
3180 MachineOperand &SrcMO = MI.getOperand(1);
3181 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3182 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3183 MRI.getType(MI.getOperand(0).getReg()));
3184 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3185 ExtOpc == TargetOpcode::G_ANYEXT) &&
3186 "Illegal Extend");
3187 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3188 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3189 ? SrcVal.sext(WideTy.getSizeInBits())
3190 : SrcVal.zext(WideTy.getSizeInBits());
3191 Observer.changingInstr(MI);
3192 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3193
3194 widenScalarDst(MI, WideTy);
3195 Observer.changedInstr(MI);
3196 return Legalized;
3197 }
3198 case TargetOpcode::G_FCONSTANT: {
3199 // To avoid changing the bits of the constant due to extension to a larger
3200 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3201 MachineOperand &SrcMO = MI.getOperand(1);
3202 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3203 MIRBuilder.setInstrAndDebugLoc(MI);
3204 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3205 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3206 MI.eraseFromParent();
3207 return Legalized;
3208 }
3209 case TargetOpcode::G_IMPLICIT_DEF: {
3210 Observer.changingInstr(MI);
3211 widenScalarDst(MI, WideTy);
3212 Observer.changedInstr(MI);
3213 return Legalized;
3214 }
3215 case TargetOpcode::G_BRCOND:
3216 Observer.changingInstr(MI);
3217 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3218 Observer.changedInstr(MI);
3219 return Legalized;
3220
3221 case TargetOpcode::G_FCMP:
3222 Observer.changingInstr(MI);
3223 if (TypeIdx == 0)
3224 widenScalarDst(MI, WideTy);
3225 else {
3226 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3227 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3228 }
3229 Observer.changedInstr(MI);
3230 return Legalized;
3231
3232 case TargetOpcode::G_ICMP:
3233 Observer.changingInstr(MI);
3234 if (TypeIdx == 0)
3235 widenScalarDst(MI, WideTy);
3236 else {
3237 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3238 CmpInst::Predicate Pred =
3239 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3240
3241 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3242 unsigned ExtOpcode =
3243 (CmpInst::isSigned(Pred) ||
3244 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3245 getApproximateEVTForLLT(WideTy, Ctx)))
3246 ? TargetOpcode::G_SEXT
3247 : TargetOpcode::G_ZEXT;
3248 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3249 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3250 }
3251 Observer.changedInstr(MI);
3252 return Legalized;
3253
3254 case TargetOpcode::G_PTR_ADD:
3255 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3256 Observer.changingInstr(MI);
3257 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3258 Observer.changedInstr(MI);
3259 return Legalized;
3260
3261 case TargetOpcode::G_PHI: {
3262 assert(TypeIdx == 0 && "Expecting only Idx 0");
3263
3264 Observer.changingInstr(MI);
3265 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3266 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3267 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3268 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3269 }
3270
3271 MachineBasicBlock &MBB = *MI.getParent();
3272 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3273 widenScalarDst(MI, WideTy);
3274 Observer.changedInstr(MI);
3275 return Legalized;
3276 }
3277 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3278 if (TypeIdx == 0) {
3279 Register VecReg = MI.getOperand(1).getReg();
3280 LLT VecTy = MRI.getType(VecReg);
3281 Observer.changingInstr(MI);
3282
3284 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3285 TargetOpcode::G_ANYEXT);
3286
3287 widenScalarDst(MI, WideTy, 0);
3288 Observer.changedInstr(MI);
3289 return Legalized;
3290 }
3291
3292 if (TypeIdx != 2)
3293 return UnableToLegalize;
3294 Observer.changingInstr(MI);
3295 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3296 Observer.changedInstr(MI);
3297 return Legalized;
3298 }
3299 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3300 if (TypeIdx == 0) {
3301 Observer.changingInstr(MI);
3302 const LLT WideEltTy = WideTy.getElementType();
3303
3304 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3305 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3306 widenScalarDst(MI, WideTy, 0);
3307 Observer.changedInstr(MI);
3308 return Legalized;
3309 }
3310
3311 if (TypeIdx == 1) {
3312 Observer.changingInstr(MI);
3313
3314 Register VecReg = MI.getOperand(1).getReg();
3315 LLT VecTy = MRI.getType(VecReg);
3316 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3317
3318 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3319 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3320 widenScalarDst(MI, WideVecTy, 0);
3321 Observer.changedInstr(MI);
3322 return Legalized;
3323 }
3324
3325 if (TypeIdx == 2) {
3326 Observer.changingInstr(MI);
3327 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3328 Observer.changedInstr(MI);
3329 return Legalized;
3330 }
3331
3332 return UnableToLegalize;
3333 }
3334 case TargetOpcode::G_FADD:
3335 case TargetOpcode::G_FMUL:
3336 case TargetOpcode::G_FSUB:
3337 case TargetOpcode::G_FMA:
3338 case TargetOpcode::G_FMAD:
3339 case TargetOpcode::G_FNEG:
3340 case TargetOpcode::G_FABS:
3341 case TargetOpcode::G_FCANONICALIZE:
3342 case TargetOpcode::G_FMINNUM:
3343 case TargetOpcode::G_FMAXNUM:
3344 case TargetOpcode::G_FMINNUM_IEEE:
3345 case TargetOpcode::G_FMAXNUM_IEEE:
3346 case TargetOpcode::G_FMINIMUM:
3347 case TargetOpcode::G_FMAXIMUM:
3348 case TargetOpcode::G_FMINIMUMNUM:
3349 case TargetOpcode::G_FMAXIMUMNUM:
3350 case TargetOpcode::G_FDIV:
3351 case TargetOpcode::G_FREM:
3352 case TargetOpcode::G_FCEIL:
3353 case TargetOpcode::G_FFLOOR:
3354 case TargetOpcode::G_FCOS:
3355 case TargetOpcode::G_FSIN:
3356 case TargetOpcode::G_FTAN:
3357 case TargetOpcode::G_FACOS:
3358 case TargetOpcode::G_FASIN:
3359 case TargetOpcode::G_FATAN:
3360 case TargetOpcode::G_FATAN2:
3361 case TargetOpcode::G_FCOSH:
3362 case TargetOpcode::G_FSINH:
3363 case TargetOpcode::G_FTANH:
3364 case TargetOpcode::G_FLOG10:
3365 case TargetOpcode::G_FLOG:
3366 case TargetOpcode::G_FLOG2:
3367 case TargetOpcode::G_FRINT:
3368 case TargetOpcode::G_FNEARBYINT:
3369 case TargetOpcode::G_FSQRT:
3370 case TargetOpcode::G_FEXP:
3371 case TargetOpcode::G_FEXP2:
3372 case TargetOpcode::G_FEXP10:
3373 case TargetOpcode::G_FPOW:
3374 case TargetOpcode::G_INTRINSIC_TRUNC:
3375 case TargetOpcode::G_INTRINSIC_ROUND:
3376 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3377 assert(TypeIdx == 0);
3378 Observer.changingInstr(MI);
3379
3380 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3381 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3382
3383 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3384 Observer.changedInstr(MI);
3385 return Legalized;
3386 case TargetOpcode::G_FMODF: {
3387 Observer.changingInstr(MI);
3388 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3389
3390 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3391 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3392 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3393 Observer.changedInstr(MI);
3394 return Legalized;
3395 }
3396 case TargetOpcode::G_FPOWI:
3397 case TargetOpcode::G_FLDEXP:
3398 case TargetOpcode::G_STRICT_FLDEXP: {
3399 if (TypeIdx == 0) {
3400 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3401 return UnableToLegalize;
3402
3403 Observer.changingInstr(MI);
3404 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3405 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3406 Observer.changedInstr(MI);
3407 return Legalized;
3408 }
3409
3410 if (TypeIdx == 1) {
3411 // For some reason SelectionDAG tries to promote to a libcall without
3412 // actually changing the integer type for promotion.
3413 Observer.changingInstr(MI);
3414 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3415 Observer.changedInstr(MI);
3416 return Legalized;
3417 }
3418
3419 return UnableToLegalize;
3420 }
3421 case TargetOpcode::G_FFREXP: {
3422 Observer.changingInstr(MI);
3423
3424 if (TypeIdx == 0) {
3425 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3426 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3427 } else {
3428 widenScalarDst(MI, WideTy, 1);
3429 }
3430
3431 Observer.changedInstr(MI);
3432 return Legalized;
3433 }
3434 case TargetOpcode::G_INTTOPTR:
3435 if (TypeIdx != 1)
3436 return UnableToLegalize;
3437
3438 Observer.changingInstr(MI);
3439 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3440 Observer.changedInstr(MI);
3441 return Legalized;
3442 case TargetOpcode::G_PTRTOINT:
3443 if (TypeIdx != 0)
3444 return UnableToLegalize;
3445
3446 Observer.changingInstr(MI);
3447 widenScalarDst(MI, WideTy, 0);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 case TargetOpcode::G_BUILD_VECTOR: {
3451 Observer.changingInstr(MI);
3452
3453 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3454 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3455 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3456
3457 // Avoid changing the result vector type if the source element type was
3458 // requested.
3459 if (TypeIdx == 1) {
3460 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3461 } else {
3462 widenScalarDst(MI, WideTy, 0);
3463 }
3464
3465 Observer.changedInstr(MI);
3466 return Legalized;
3467 }
3468 case TargetOpcode::G_SEXT_INREG:
3469 if (TypeIdx != 0)
3470 return UnableToLegalize;
3471
3472 Observer.changingInstr(MI);
3473 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3474 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3475 Observer.changedInstr(MI);
3476 return Legalized;
3477 case TargetOpcode::G_PTRMASK: {
3478 if (TypeIdx != 1)
3479 return UnableToLegalize;
3480 Observer.changingInstr(MI);
3481 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3482 Observer.changedInstr(MI);
3483 return Legalized;
3484 }
3485 case TargetOpcode::G_VECREDUCE_ADD: {
3486 if (TypeIdx != 1)
3487 return UnableToLegalize;
3488 Observer.changingInstr(MI);
3489 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3490 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3491 Observer.changedInstr(MI);
3492 return Legalized;
3493 }
3494 case TargetOpcode::G_VECREDUCE_FADD:
3495 case TargetOpcode::G_VECREDUCE_FMUL:
3496 case TargetOpcode::G_VECREDUCE_FMIN:
3497 case TargetOpcode::G_VECREDUCE_FMAX:
3498 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3499 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3500 if (TypeIdx != 0)
3501 return UnableToLegalize;
3502 Observer.changingInstr(MI);
3503 Register VecReg = MI.getOperand(1).getReg();
3504 LLT VecTy = MRI.getType(VecReg);
3505 LLT WideVecTy = VecTy.isVector()
3506 ? LLT::vector(VecTy.getElementCount(), WideTy)
3507 : WideTy;
3508 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3509 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3510 Observer.changedInstr(MI);
3511 return Legalized;
3512 }
3513 case TargetOpcode::G_VSCALE: {
3514 MachineOperand &SrcMO = MI.getOperand(1);
3515 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3516 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3517 // The CImm is always a signed value
3518 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3519 Observer.changingInstr(MI);
3520 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3521 widenScalarDst(MI, WideTy);
3522 Observer.changedInstr(MI);
3523 return Legalized;
3524 }
3525 case TargetOpcode::G_SPLAT_VECTOR: {
3526 if (TypeIdx != 1)
3527 return UnableToLegalize;
3528
3529 Observer.changingInstr(MI);
3530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3531 Observer.changedInstr(MI);
3532 return Legalized;
3533 }
3534 case TargetOpcode::G_INSERT_SUBVECTOR: {
3535 if (TypeIdx != 0)
3536 return UnableToLegalize;
3537
3539 Register BigVec = IS.getBigVec();
3540 Register SubVec = IS.getSubVec();
3541
3542 LLT SubVecTy = MRI.getType(SubVec);
3543 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3544
3545 // Widen the G_INSERT_SUBVECTOR
3546 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3547 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3548 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3549 IS.getIndexImm());
3550
3551 // Truncate back down
3552 auto SplatZero = MIRBuilder.buildSplatVector(
3553 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3554 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3555 SplatZero);
3556
3557 MI.eraseFromParent();
3558
3559 return Legalized;
3560 }
3561 }
3562}
3563
3565 MachineIRBuilder &B, Register Src, LLT Ty) {
3566 auto Unmerge = B.buildUnmerge(Ty, Src);
3567 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3568 Pieces.push_back(Unmerge.getReg(I));
3569}
3570
3571static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3572 MachineIRBuilder &MIRBuilder) {
3573 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3574 MachineFunction &MF = MIRBuilder.getMF();
3575 const DataLayout &DL = MIRBuilder.getDataLayout();
3576 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3577 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3578 LLT DstLLT = MRI.getType(DstReg);
3579
3580 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3581
3582 auto Addr = MIRBuilder.buildConstantPool(
3583 AddrPtrTy,
3584 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3585
3586 MachineMemOperand *MMO =
3588 MachineMemOperand::MOLoad, DstLLT, Alignment);
3589
3590 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3591}
3592
3595 const MachineOperand &ConstOperand = MI.getOperand(1);
3596 const Constant *ConstantVal = ConstOperand.getCImm();
3597
3598 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3599 MI.eraseFromParent();
3600
3601 return Legalized;
3602}
3603
3606 const MachineOperand &ConstOperand = MI.getOperand(1);
3607 const Constant *ConstantVal = ConstOperand.getFPImm();
3608
3609 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3610 MI.eraseFromParent();
3611
3612 return Legalized;
3613}
3614
3617 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3618 if (SrcTy.isVector()) {
3619 LLT SrcEltTy = SrcTy.getElementType();
3621
3622 if (DstTy.isVector()) {
3623 int NumDstElt = DstTy.getNumElements();
3624 int NumSrcElt = SrcTy.getNumElements();
3625
3626 LLT DstEltTy = DstTy.getElementType();
3627 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3628 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3629
3630 // If there's an element size mismatch, insert intermediate casts to match
3631 // the result element type.
3632 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3633 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3634 //
3635 // =>
3636 //
3637 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3638 // %3:_(<2 x s8>) = G_BITCAST %2
3639 // %4:_(<2 x s8>) = G_BITCAST %3
3640 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3641 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3642 SrcPartTy = SrcEltTy;
3643 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3644 //
3645 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3646 //
3647 // =>
3648 //
3649 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3650 // %3:_(s16) = G_BITCAST %2
3651 // %4:_(s16) = G_BITCAST %3
3652 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3653 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3654 DstCastTy = DstEltTy;
3655 }
3656
3657 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3658 for (Register &SrcReg : SrcRegs)
3659 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3660 } else
3661 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3662
3663 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3664 MI.eraseFromParent();
3665 return Legalized;
3666 }
3667
3668 if (DstTy.isVector()) {
3670 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3671 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3672 MI.eraseFromParent();
3673 return Legalized;
3674 }
3675
3676 return UnableToLegalize;
3677}
3678
3679/// Figure out the bit offset into a register when coercing a vector index for
3680/// the wide element type. This is only for the case when promoting vector to
3681/// one with larger elements.
3682//
3683///
3684/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3685/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3687 Register Idx,
3688 unsigned NewEltSize,
3689 unsigned OldEltSize) {
3690 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3691 LLT IdxTy = B.getMRI()->getType(Idx);
3692
3693 // Now figure out the amount we need to shift to get the target bits.
3694 auto OffsetMask = B.buildConstant(
3695 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3696 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3697 return B.buildShl(IdxTy, OffsetIdx,
3698 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3699}
3700
3701/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3702/// is casting to a vector with a smaller element size, perform multiple element
3703/// extracts and merge the results. If this is coercing to a vector with larger
3704/// elements, index the bitcasted vector and extract the target element with bit
3705/// operations. This is intended to force the indexing in the native register
3706/// size for architectures that can dynamically index the register file.
3709 LLT CastTy) {
3710 if (TypeIdx != 1)
3711 return UnableToLegalize;
3712
3713 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3714
3715 LLT SrcEltTy = SrcVecTy.getElementType();
3716 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3717 unsigned OldNumElts = SrcVecTy.getNumElements();
3718
3719 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3720 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3721
3722 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3723 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3724 if (NewNumElts > OldNumElts) {
3725 // Decreasing the vector element size
3726 //
3727 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3728 // =>
3729 // v4i32:castx = bitcast x:v2i64
3730 //
3731 // i64 = bitcast
3732 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3733 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3734 //
3735 if (NewNumElts % OldNumElts != 0)
3736 return UnableToLegalize;
3737
3738 // Type of the intermediate result vector.
3739 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3740 LLT MidTy =
3741 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3742
3743 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3744
3745 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3746 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3747
3748 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3749 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3750 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3751 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3752 NewOps[I] = Elt.getReg(0);
3753 }
3754
3755 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3756 MIRBuilder.buildBitcast(Dst, NewVec);
3757 MI.eraseFromParent();
3758 return Legalized;
3759 }
3760
3761 if (NewNumElts < OldNumElts) {
3762 if (NewEltSize % OldEltSize != 0)
3763 return UnableToLegalize;
3764
3765 // This only depends on powers of 2 because we use bit tricks to figure out
3766 // the bit offset we need to shift to get the target element. A general
3767 // expansion could emit division/multiply.
3768 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3769 return UnableToLegalize;
3770
3771 // Increasing the vector element size.
3772 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3773 //
3774 // =>
3775 //
3776 // %cast = G_BITCAST %vec
3777 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3778 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3779 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3780 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3781 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3782 // %elt = G_TRUNC %elt_bits
3783
3784 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3785 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3786
3787 // Divide to get the index in the wider element type.
3788 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3789
3790 Register WideElt = CastVec;
3791 if (CastTy.isVector()) {
3792 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3793 ScaledIdx).getReg(0);
3794 }
3795
3796 // Compute the bit offset into the register of the target element.
3798 MIRBuilder, Idx, NewEltSize, OldEltSize);
3799
3800 // Shift the wide element to get the target element.
3801 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3802 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3803 MI.eraseFromParent();
3804 return Legalized;
3805 }
3806
3807 return UnableToLegalize;
3808}
3809
3810/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3811/// TargetReg, while preserving other bits in \p TargetReg.
3812///
3813/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3815 Register TargetReg, Register InsertReg,
3816 Register OffsetBits) {
3817 LLT TargetTy = B.getMRI()->getType(TargetReg);
3818 LLT InsertTy = B.getMRI()->getType(InsertReg);
3819 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3820 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3821
3822 // Produce a bitmask of the value to insert
3823 auto EltMask = B.buildConstant(
3824 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3825 InsertTy.getSizeInBits()));
3826 // Shift it into position
3827 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3828 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3829
3830 // Clear out the bits in the wide element
3831 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3832
3833 // The value to insert has all zeros already, so stick it into the masked
3834 // wide element.
3835 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3836}
3837
3838/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3839/// is increasing the element size, perform the indexing in the target element
3840/// type, and use bit operations to insert at the element position. This is
3841/// intended for architectures that can dynamically index the register file and
3842/// want to force indexing in the native register size.
3845 LLT CastTy) {
3846 if (TypeIdx != 0)
3847 return UnableToLegalize;
3848
3849 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3850 MI.getFirst4RegLLTs();
3851 LLT VecTy = DstTy;
3852
3853 LLT VecEltTy = VecTy.getElementType();
3854 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3855 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3856 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3857
3858 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3859 unsigned OldNumElts = VecTy.getNumElements();
3860
3861 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3862 if (NewNumElts < OldNumElts) {
3863 if (NewEltSize % OldEltSize != 0)
3864 return UnableToLegalize;
3865
3866 // This only depends on powers of 2 because we use bit tricks to figure out
3867 // the bit offset we need to shift to get the target element. A general
3868 // expansion could emit division/multiply.
3869 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3870 return UnableToLegalize;
3871
3872 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3873 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3874
3875 // Divide to get the index in the wider element type.
3876 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3877
3878 Register ExtractedElt = CastVec;
3879 if (CastTy.isVector()) {
3880 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3881 ScaledIdx).getReg(0);
3882 }
3883
3884 // Compute the bit offset into the register of the target element.
3886 MIRBuilder, Idx, NewEltSize, OldEltSize);
3887
3888 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3889 Val, OffsetBits);
3890 if (CastTy.isVector()) {
3891 InsertedElt = MIRBuilder.buildInsertVectorElement(
3892 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3893 }
3894
3895 MIRBuilder.buildBitcast(Dst, InsertedElt);
3896 MI.eraseFromParent();
3897 return Legalized;
3898 }
3899
3900 return UnableToLegalize;
3901}
3902
3903// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3904// those that have smaller than legal operands.
3905//
3906// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3907//
3908// ===>
3909//
3910// s32 = G_BITCAST <4 x s8>
3911// s32 = G_BITCAST <4 x s8>
3912// s32 = G_BITCAST <4 x s8>
3913// s32 = G_BITCAST <4 x s8>
3914// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3915// <16 x s8> = G_BITCAST <4 x s32>
3918 LLT CastTy) {
3919 // Convert it to CONCAT instruction
3920 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3921 if (!ConcatMI) {
3922 return UnableToLegalize;
3923 }
3924
3925 // Check if bitcast is Legal
3926 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3927 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3928
3929 // Check if the build vector is Legal
3930 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3931 return UnableToLegalize;
3932 }
3933
3934 // Bitcast the sources
3935 SmallVector<Register> BitcastRegs;
3936 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3937 BitcastRegs.push_back(
3938 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3939 .getReg(0));
3940 }
3941
3942 // Build the scalar values into a vector
3943 Register BuildReg =
3944 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3945 MIRBuilder.buildBitcast(DstReg, BuildReg);
3946
3947 MI.eraseFromParent();
3948 return Legalized;
3949}
3950
3951// This bitcasts a shuffle vector to a different type currently of the same
3952// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3953// will be used instead.
3954//
3955// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3956// ===>
3957// <4 x s64> = G_PTRTOINT <4 x p0>
3958// <4 x s64> = G_PTRTOINT <4 x p0>
3959// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3960// <16 x p0> = G_INTTOPTR <16 x s64>
3963 LLT CastTy) {
3964 auto ShuffleMI = cast<GShuffleVector>(&MI);
3965 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3966 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3967
3968 // We currently only handle vectors of the same size.
3969 if (TypeIdx != 0 ||
3970 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3971 CastTy.getElementCount() != DstTy.getElementCount())
3972 return UnableToLegalize;
3973
3974 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3975
3976 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3977 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3978 auto Shuf =
3979 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3980 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3981
3982 MI.eraseFromParent();
3983 return Legalized;
3984}
3985
3986/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3987///
3988/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3989///
3990/// ===>
3991///
3992/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3993/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3994/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3997 LLT CastTy) {
3998 auto ES = cast<GExtractSubvector>(&MI);
3999
4000 if (!CastTy.isVector())
4001 return UnableToLegalize;
4002
4003 if (TypeIdx != 0)
4004 return UnableToLegalize;
4005
4006 Register Dst = ES->getReg(0);
4007 Register Src = ES->getSrcVec();
4008 uint64_t Idx = ES->getIndexImm();
4009
4010 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4011
4012 LLT DstTy = MRI.getType(Dst);
4013 LLT SrcTy = MRI.getType(Src);
4014 ElementCount DstTyEC = DstTy.getElementCount();
4015 ElementCount SrcTyEC = SrcTy.getElementCount();
4016 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4017 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4018
4019 if (DstTy == CastTy)
4020 return Legalized;
4021
4022 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4023 return UnableToLegalize;
4024
4025 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4026 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4027 if (CastEltSize < DstEltSize)
4028 return UnableToLegalize;
4029
4030 auto AdjustAmt = CastEltSize / DstEltSize;
4031 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4032 SrcTyMinElts % AdjustAmt != 0)
4033 return UnableToLegalize;
4034
4035 Idx /= AdjustAmt;
4036 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4037 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4038 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4039 MIRBuilder.buildBitcast(Dst, PromotedES);
4040
4041 ES->eraseFromParent();
4042 return Legalized;
4043}
4044
4045/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4046///
4047/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4048/// <vscale x 8 x i1>,
4049/// N
4050///
4051/// ===>
4052///
4053/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4054/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4055/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4056/// <vscale x 1 x i8>, N / 8
4057/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4060 LLT CastTy) {
4061 auto ES = cast<GInsertSubvector>(&MI);
4062
4063 if (!CastTy.isVector())
4064 return UnableToLegalize;
4065
4066 if (TypeIdx != 0)
4067 return UnableToLegalize;
4068
4069 Register Dst = ES->getReg(0);
4070 Register BigVec = ES->getBigVec();
4071 Register SubVec = ES->getSubVec();
4072 uint64_t Idx = ES->getIndexImm();
4073
4074 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4075
4076 LLT DstTy = MRI.getType(Dst);
4077 LLT BigVecTy = MRI.getType(BigVec);
4078 LLT SubVecTy = MRI.getType(SubVec);
4079
4080 if (DstTy == CastTy)
4081 return Legalized;
4082
4083 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4084 return UnableToLegalize;
4085
4086 ElementCount DstTyEC = DstTy.getElementCount();
4087 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4088 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4089 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4090 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4091 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4092
4093 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4094 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4095 if (CastEltSize < DstEltSize)
4096 return UnableToLegalize;
4097
4098 auto AdjustAmt = CastEltSize / DstEltSize;
4099 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4100 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4101 return UnableToLegalize;
4102
4103 Idx /= AdjustAmt;
4104 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4105 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4106 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4107 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4108 auto PromotedIS =
4109 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4110 MIRBuilder.buildBitcast(Dst, PromotedIS);
4111
4112 ES->eraseFromParent();
4113 return Legalized;
4114}
4115
4117 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4118 Register DstReg = LoadMI.getDstReg();
4119 Register PtrReg = LoadMI.getPointerReg();
4120 LLT DstTy = MRI.getType(DstReg);
4121 MachineMemOperand &MMO = LoadMI.getMMO();
4122 LLT MemTy = MMO.getMemoryType();
4123 MachineFunction &MF = MIRBuilder.getMF();
4124
4125 unsigned MemSizeInBits = MemTy.getSizeInBits();
4126 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4127
4128 if (MemSizeInBits != MemStoreSizeInBits) {
4129 if (MemTy.isVector())
4130 return UnableToLegalize;
4131
4132 // Promote to a byte-sized load if not loading an integral number of
4133 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4134 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4135 MachineMemOperand *NewMMO =
4136 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4137
4138 Register LoadReg = DstReg;
4139 LLT LoadTy = DstTy;
4140
4141 // If this wasn't already an extending load, we need to widen the result
4142 // register to avoid creating a load with a narrower result than the source.
4143 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4144 LoadTy = WideMemTy;
4145 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4146 }
4147
4148 if (isa<GSExtLoad>(LoadMI)) {
4149 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4150 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4151 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4152 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4153 // The extra bits are guaranteed to be zero, since we stored them that
4154 // way. A zext load from Wide thus automatically gives zext from MemVT.
4155 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4156 } else {
4157 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4158 }
4159
4160 if (DstTy != LoadTy)
4161 MIRBuilder.buildTrunc(DstReg, LoadReg);
4162
4163 LoadMI.eraseFromParent();
4164 return Legalized;
4165 }
4166
4167 // Big endian lowering not implemented.
4168 if (MIRBuilder.getDataLayout().isBigEndian())
4169 return UnableToLegalize;
4170
4171 // This load needs splitting into power of 2 sized loads.
4172 //
4173 // Our strategy here is to generate anyextending loads for the smaller
4174 // types up to next power-2 result type, and then combine the two larger
4175 // result values together, before truncating back down to the non-pow-2
4176 // type.
4177 // E.g. v1 = i24 load =>
4178 // v2 = i32 zextload (2 byte)
4179 // v3 = i32 load (1 byte)
4180 // v4 = i32 shl v3, 16
4181 // v5 = i32 or v4, v2
4182 // v1 = i24 trunc v5
4183 // By doing this we generate the correct truncate which should get
4184 // combined away as an artifact with a matching extend.
4185
4186 uint64_t LargeSplitSize, SmallSplitSize;
4187
4188 if (!isPowerOf2_32(MemSizeInBits)) {
4189 // This load needs splitting into power of 2 sized loads.
4190 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4191 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4192 } else {
4193 // This is already a power of 2, but we still need to split this in half.
4194 //
4195 // Assume we're being asked to decompose an unaligned load.
4196 // TODO: If this requires multiple splits, handle them all at once.
4197 auto &Ctx = MF.getFunction().getContext();
4198 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4199 return UnableToLegalize;
4200
4201 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4202 }
4203
4204 if (MemTy.isVector()) {
4205 // TODO: Handle vector extloads
4206 if (MemTy != DstTy)
4207 return UnableToLegalize;
4208
4209 Align Alignment = LoadMI.getAlign();
4210 // Given an alignment larger than the size of the memory, we can increase
4211 // the size of the load without needing to scalarize it.
4212 if (Alignment.value() * 8 > MemSizeInBits &&
4215 DstTy.getElementType());
4216 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4217 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4218 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4219 NewLoad.getReg(0));
4220 LoadMI.eraseFromParent();
4221 return Legalized;
4222 }
4223
4224 // TODO: We can do better than scalarizing the vector and at least split it
4225 // in half.
4226 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4227 }
4228
4229 MachineMemOperand *LargeMMO =
4230 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4231 MachineMemOperand *SmallMMO =
4232 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4233
4234 LLT PtrTy = MRI.getType(PtrReg);
4235 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4236 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4237 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4238 PtrReg, *LargeMMO);
4239
4240 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4241 LargeSplitSize / 8);
4242 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4243 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4244 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4245 SmallPtr, *SmallMMO);
4246
4247 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4248 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4249
4250 if (AnyExtTy == DstTy)
4251 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4252 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4253 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4254 MIRBuilder.buildTrunc(DstReg, {Or});
4255 } else {
4256 assert(DstTy.isPointer() && "expected pointer");
4257 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4258
4259 // FIXME: We currently consider this to be illegal for non-integral address
4260 // spaces, but we need still need a way to reinterpret the bits.
4261 MIRBuilder.buildIntToPtr(DstReg, Or);
4262 }
4263
4264 LoadMI.eraseFromParent();
4265 return Legalized;
4266}
4267
4269 // Lower a non-power of 2 store into multiple pow-2 stores.
4270 // E.g. split an i24 store into an i16 store + i8 store.
4271 // We do this by first extending the stored value to the next largest power
4272 // of 2 type, and then using truncating stores to store the components.
4273 // By doing this, likewise with G_LOAD, generate an extend that can be
4274 // artifact-combined away instead of leaving behind extracts.
4275 Register SrcReg = StoreMI.getValueReg();
4276 Register PtrReg = StoreMI.getPointerReg();
4277 LLT SrcTy = MRI.getType(SrcReg);
4278 MachineFunction &MF = MIRBuilder.getMF();
4279 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4280 LLT MemTy = MMO.getMemoryType();
4281
4282 unsigned StoreWidth = MemTy.getSizeInBits();
4283 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4284
4285 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4286 // Promote to a byte-sized store with upper bits zero if not
4287 // storing an integral number of bytes. For example, promote
4288 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4289 LLT WideTy = LLT::scalar(StoreSizeInBits);
4290
4291 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4292 // Avoid creating a store with a narrower source than result.
4293 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4294 SrcTy = WideTy;
4295 }
4296
4297 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4298
4299 MachineMemOperand *NewMMO =
4300 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4301 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4302 StoreMI.eraseFromParent();
4303 return Legalized;
4304 }
4305
4306 if (MemTy.isVector()) {
4307 if (MemTy != SrcTy)
4308 return scalarizeVectorBooleanStore(StoreMI);
4309
4310 // TODO: We can do better than scalarizing the vector and at least split it
4311 // in half.
4312 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4313 }
4314
4315 unsigned MemSizeInBits = MemTy.getSizeInBits();
4316 uint64_t LargeSplitSize, SmallSplitSize;
4317
4318 if (!isPowerOf2_32(MemSizeInBits)) {
4319 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4320 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4321 } else {
4322 auto &Ctx = MF.getFunction().getContext();
4323 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4324 return UnableToLegalize; // Don't know what we're being asked to do.
4325
4326 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4327 }
4328
4329 // Extend to the next pow-2. If this store was itself the result of lowering,
4330 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4331 // that's wider than the stored size.
4332 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4333 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4334
4335 if (SrcTy.isPointer()) {
4336 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4337 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4338 }
4339
4340 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4341
4342 // Obtain the smaller value by shifting away the larger value.
4343 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4344 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4345
4346 // Generate the PtrAdd and truncating stores.
4347 LLT PtrTy = MRI.getType(PtrReg);
4348 auto OffsetCst = MIRBuilder.buildConstant(
4349 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4350 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4351
4352 MachineMemOperand *LargeMMO =
4353 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4354 MachineMemOperand *SmallMMO =
4355 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4356 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4357 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4358 StoreMI.eraseFromParent();
4359 return Legalized;
4360}
4361
4364 Register SrcReg = StoreMI.getValueReg();
4365 Register PtrReg = StoreMI.getPointerReg();
4366 LLT SrcTy = MRI.getType(SrcReg);
4367 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4368 LLT MemTy = MMO.getMemoryType();
4369 LLT MemScalarTy = MemTy.getElementType();
4370 MachineFunction &MF = MIRBuilder.getMF();
4371
4372 assert(SrcTy.isVector() && "Expect a vector store type");
4373
4374 if (!MemScalarTy.isByteSized()) {
4375 // We need to build an integer scalar of the vector bit pattern.
4376 // It's not legal for us to add padding when storing a vector.
4377 unsigned NumBits = MemTy.getSizeInBits();
4378 LLT IntTy = LLT::scalar(NumBits);
4379 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4380 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4381
4382 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4383 auto Elt = MIRBuilder.buildExtractVectorElement(
4384 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4385 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4386 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4387 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4388 ? (MemTy.getNumElements() - 1) - I
4389 : I;
4390 auto ShiftAmt = MIRBuilder.buildConstant(
4391 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4392 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4393 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4394 }
4395 auto PtrInfo = MMO.getPointerInfo();
4396 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4397 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4398 StoreMI.eraseFromParent();
4399 return Legalized;
4400 }
4401
4402 // TODO: implement simple scalarization.
4403 return UnableToLegalize;
4404}
4405
4407LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4408 switch (MI.getOpcode()) {
4409 case TargetOpcode::G_LOAD: {
4410 if (TypeIdx != 0)
4411 return UnableToLegalize;
4412 MachineMemOperand &MMO = **MI.memoperands_begin();
4413
4414 // Not sure how to interpret a bitcast of an extending load.
4415 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4416 return UnableToLegalize;
4417
4418 Observer.changingInstr(MI);
4419 bitcastDst(MI, CastTy, 0);
4420 MMO.setType(CastTy);
4421 // The range metadata is no longer valid when reinterpreted as a different
4422 // type.
4423 MMO.clearRanges();
4424 Observer.changedInstr(MI);
4425 return Legalized;
4426 }
4427 case TargetOpcode::G_STORE: {
4428 if (TypeIdx != 0)
4429 return UnableToLegalize;
4430
4431 MachineMemOperand &MMO = **MI.memoperands_begin();
4432
4433 // Not sure how to interpret a bitcast of a truncating store.
4434 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4435 return UnableToLegalize;
4436
4437 Observer.changingInstr(MI);
4438 bitcastSrc(MI, CastTy, 0);
4439 MMO.setType(CastTy);
4440 Observer.changedInstr(MI);
4441 return Legalized;
4442 }
4443 case TargetOpcode::G_SELECT: {
4444 if (TypeIdx != 0)
4445 return UnableToLegalize;
4446
4447 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4448 LLVM_DEBUG(
4449 dbgs() << "bitcast action not implemented for vector select\n");
4450 return UnableToLegalize;
4451 }
4452
4453 Observer.changingInstr(MI);
4454 bitcastSrc(MI, CastTy, 2);
4455 bitcastSrc(MI, CastTy, 3);
4456 bitcastDst(MI, CastTy, 0);
4457 Observer.changedInstr(MI);
4458 return Legalized;
4459 }
4460 case TargetOpcode::G_AND:
4461 case TargetOpcode::G_OR:
4462 case TargetOpcode::G_XOR: {
4463 Observer.changingInstr(MI);
4464 bitcastSrc(MI, CastTy, 1);
4465 bitcastSrc(MI, CastTy, 2);
4466 bitcastDst(MI, CastTy, 0);
4467 Observer.changedInstr(MI);
4468 return Legalized;
4469 }
4470 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4471 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4472 case TargetOpcode::G_INSERT_VECTOR_ELT:
4473 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4474 case TargetOpcode::G_CONCAT_VECTORS:
4475 return bitcastConcatVector(MI, TypeIdx, CastTy);
4476 case TargetOpcode::G_SHUFFLE_VECTOR:
4477 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4478 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4479 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4480 case TargetOpcode::G_INSERT_SUBVECTOR:
4481 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4482 default:
4483 return UnableToLegalize;
4484 }
4485}
4486
4487// Legalize an instruction by changing the opcode in place.
4488void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4490 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4492}
4493
4495LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4496 using namespace TargetOpcode;
4497
4498 switch(MI.getOpcode()) {
4499 default:
4500 return UnableToLegalize;
4501 case TargetOpcode::G_FCONSTANT:
4502 return lowerFConstant(MI);
4503 case TargetOpcode::G_BITCAST:
4504 return lowerBitcast(MI);
4505 case TargetOpcode::G_SREM:
4506 case TargetOpcode::G_UREM: {
4507 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4508 auto Quot =
4509 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4510 {MI.getOperand(1), MI.getOperand(2)});
4511
4512 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4513 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4514 MI.eraseFromParent();
4515 return Legalized;
4516 }
4517 case TargetOpcode::G_SADDO:
4518 case TargetOpcode::G_SSUBO:
4519 return lowerSADDO_SSUBO(MI);
4520 case TargetOpcode::G_SADDE:
4521 return lowerSADDE(MI);
4522 case TargetOpcode::G_SSUBE:
4523 return lowerSSUBE(MI);
4524 case TargetOpcode::G_UMULH:
4525 case TargetOpcode::G_SMULH:
4526 return lowerSMULH_UMULH(MI);
4527 case TargetOpcode::G_SMULO:
4528 case TargetOpcode::G_UMULO: {
4529 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4530 // result.
4531 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4532 LLT Ty = MRI.getType(Res);
4533
4534 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4535 ? TargetOpcode::G_SMULH
4536 : TargetOpcode::G_UMULH;
4537
4538 Observer.changingInstr(MI);
4539 const auto &TII = MIRBuilder.getTII();
4540 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4541 MI.removeOperand(1);
4542 Observer.changedInstr(MI);
4543
4544 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4545 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4546
4547 // Move insert point forward so we can use the Res register if needed.
4548 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4549
4550 // For *signed* multiply, overflow is detected by checking:
4551 // (hi != (lo >> bitwidth-1))
4552 if (Opcode == TargetOpcode::G_SMULH) {
4553 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4554 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4555 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4556 } else {
4557 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4558 }
4559 return Legalized;
4560 }
4561 case TargetOpcode::G_FNEG: {
4562 auto [Res, SubByReg] = MI.getFirst2Regs();
4563 LLT Ty = MRI.getType(Res);
4564
4565 auto SignMask = MIRBuilder.buildConstant(
4566 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4567 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4568 MI.eraseFromParent();
4569 return Legalized;
4570 }
4571 case TargetOpcode::G_FSUB:
4572 case TargetOpcode::G_STRICT_FSUB: {
4573 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4574 LLT Ty = MRI.getType(Res);
4575
4576 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4577 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4578
4579 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4580 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4581 else
4582 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4583
4584 MI.eraseFromParent();
4585 return Legalized;
4586 }
4587 case TargetOpcode::G_FMAD:
4588 return lowerFMad(MI);
4589 case TargetOpcode::G_FFLOOR:
4590 return lowerFFloor(MI);
4591 case TargetOpcode::G_LROUND:
4592 case TargetOpcode::G_LLROUND: {
4593 Register DstReg = MI.getOperand(0).getReg();
4594 Register SrcReg = MI.getOperand(1).getReg();
4595 LLT SrcTy = MRI.getType(SrcReg);
4596 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4597 {SrcReg});
4598 MIRBuilder.buildFPTOSI(DstReg, Round);
4599 MI.eraseFromParent();
4600 return Legalized;
4601 }
4602 case TargetOpcode::G_INTRINSIC_ROUND:
4603 return lowerIntrinsicRound(MI);
4604 case TargetOpcode::G_FRINT: {
4605 // Since round even is the assumed rounding mode for unconstrained FP
4606 // operations, rint and roundeven are the same operation.
4607 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4608 return Legalized;
4609 }
4610 case TargetOpcode::G_INTRINSIC_LRINT:
4611 case TargetOpcode::G_INTRINSIC_LLRINT: {
4612 Register DstReg = MI.getOperand(0).getReg();
4613 Register SrcReg = MI.getOperand(1).getReg();
4614 LLT SrcTy = MRI.getType(SrcReg);
4615 auto Round =
4616 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4617 MIRBuilder.buildFPTOSI(DstReg, Round);
4618 MI.eraseFromParent();
4619 return Legalized;
4620 }
4621 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4622 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4623 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4624 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4625 **MI.memoperands_begin());
4626 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4627 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4628 MI.eraseFromParent();
4629 return Legalized;
4630 }
4631 case TargetOpcode::G_LOAD:
4632 case TargetOpcode::G_SEXTLOAD:
4633 case TargetOpcode::G_ZEXTLOAD:
4634 return lowerLoad(cast<GAnyLoad>(MI));
4635 case TargetOpcode::G_STORE:
4636 return lowerStore(cast<GStore>(MI));
4637 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4638 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4639 case TargetOpcode::G_CTLZ:
4640 case TargetOpcode::G_CTTZ:
4641 case TargetOpcode::G_CTPOP:
4642 return lowerBitCount(MI);
4643 case G_UADDO: {
4644 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4645
4646 Register NewRes = MRI.cloneVirtualRegister(Res);
4647
4648 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4649 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4650
4651 MIRBuilder.buildCopy(Res, NewRes);
4652
4653 MI.eraseFromParent();
4654 return Legalized;
4655 }
4656 case G_UADDE: {
4657 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4658 const LLT CondTy = MRI.getType(CarryOut);
4659 const LLT Ty = MRI.getType(Res);
4660
4661 Register NewRes = MRI.cloneVirtualRegister(Res);
4662
4663 // Initial add of the two operands.
4664 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4665
4666 // Initial check for carry.
4667 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4668
4669 // Add the sum and the carry.
4670 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4671 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4672
4673 // Second check for carry. We can only carry if the initial sum is all 1s
4674 // and the carry is set, resulting in a new sum of 0.
4675 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4676 auto ResEqZero =
4677 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4678 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4679 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4680
4681 MIRBuilder.buildCopy(Res, NewRes);
4682
4683 MI.eraseFromParent();
4684 return Legalized;
4685 }
4686 case G_USUBO: {
4687 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4688
4689 MIRBuilder.buildSub(Res, LHS, RHS);
4690 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4691
4692 MI.eraseFromParent();
4693 return Legalized;
4694 }
4695 case G_USUBE: {
4696 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4697 const LLT CondTy = MRI.getType(BorrowOut);
4698 const LLT Ty = MRI.getType(Res);
4699
4700 // Initial subtract of the two operands.
4701 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4702
4703 // Initial check for borrow.
4704 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4705
4706 // Subtract the borrow from the first subtract.
4707 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4708 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4709
4710 // Second check for borrow. We can only borrow if the initial difference is
4711 // 0 and the borrow is set, resulting in a new difference of all 1s.
4712 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4713 auto TmpResEqZero =
4714 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4715 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4716 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4717
4718 MI.eraseFromParent();
4719 return Legalized;
4720 }
4721 case G_UITOFP:
4722 return lowerUITOFP(MI);
4723 case G_SITOFP:
4724 return lowerSITOFP(MI);
4725 case G_FPTOUI:
4726 return lowerFPTOUI(MI);
4727 case G_FPTOSI:
4728 return lowerFPTOSI(MI);
4729 case G_FPTOUI_SAT:
4730 case G_FPTOSI_SAT:
4731 return lowerFPTOINT_SAT(MI);
4732 case G_FPTRUNC:
4733 return lowerFPTRUNC(MI);
4734 case G_FPOWI:
4735 return lowerFPOWI(MI);
4736 case G_SMIN:
4737 case G_SMAX:
4738 case G_UMIN:
4739 case G_UMAX:
4740 return lowerMinMax(MI);
4741 case G_SCMP:
4742 case G_UCMP:
4743 return lowerThreewayCompare(MI);
4744 case G_FCOPYSIGN:
4745 return lowerFCopySign(MI);
4746 case G_FMINNUM:
4747 case G_FMAXNUM:
4748 case G_FMINIMUMNUM:
4749 case G_FMAXIMUMNUM:
4750 return lowerFMinNumMaxNum(MI);
4751 case G_FMINIMUM:
4752 case G_FMAXIMUM:
4753 return lowerFMinimumMaximum(MI);
4754 case G_MERGE_VALUES:
4755 return lowerMergeValues(MI);
4756 case G_UNMERGE_VALUES:
4757 return lowerUnmergeValues(MI);
4758 case TargetOpcode::G_SEXT_INREG: {
4759 assert(MI.getOperand(2).isImm() && "Expected immediate");
4760 int64_t SizeInBits = MI.getOperand(2).getImm();
4761
4762 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4763 LLT DstTy = MRI.getType(DstReg);
4764 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4765
4766 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4767 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4768 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4769 MI.eraseFromParent();
4770 return Legalized;
4771 }
4772 case G_EXTRACT_VECTOR_ELT:
4773 case G_INSERT_VECTOR_ELT:
4775 case G_SHUFFLE_VECTOR:
4776 return lowerShuffleVector(MI);
4777 case G_VECTOR_COMPRESS:
4778 return lowerVECTOR_COMPRESS(MI);
4779 case G_DYN_STACKALLOC:
4780 return lowerDynStackAlloc(MI);
4781 case G_STACKSAVE:
4782 return lowerStackSave(MI);
4783 case G_STACKRESTORE:
4784 return lowerStackRestore(MI);
4785 case G_EXTRACT:
4786 return lowerExtract(MI);
4787 case G_INSERT:
4788 return lowerInsert(MI);
4789 case G_BSWAP:
4790 return lowerBswap(MI);
4791 case G_BITREVERSE:
4792 return lowerBitreverse(MI);
4793 case G_READ_REGISTER:
4794 case G_WRITE_REGISTER:
4795 return lowerReadWriteRegister(MI);
4796 case G_UADDSAT:
4797 case G_USUBSAT: {
4798 // Try to make a reasonable guess about which lowering strategy to use. The
4799 // target can override this with custom lowering and calling the
4800 // implementation functions.
4801 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4802 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4803 return lowerAddSubSatToMinMax(MI);
4805 }
4806 case G_SADDSAT:
4807 case G_SSUBSAT: {
4808 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4809
4810 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4811 // since it's a shorter expansion. However, we would need to figure out the
4812 // preferred boolean type for the carry out for the query.
4813 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4814 return lowerAddSubSatToMinMax(MI);
4816 }
4817 case G_SSHLSAT:
4818 case G_USHLSAT:
4819 return lowerShlSat(MI);
4820 case G_ABS:
4821 return lowerAbsToAddXor(MI);
4822 case G_ABDS:
4823 case G_ABDU: {
4824 bool IsSigned = MI.getOpcode() == G_ABDS;
4825 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4826 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4827 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4828 return lowerAbsDiffToMinMax(MI);
4829 }
4830 return lowerAbsDiffToSelect(MI);
4831 }
4832 case G_FABS:
4833 return lowerFAbs(MI);
4834 case G_SELECT:
4835 return lowerSelect(MI);
4836 case G_IS_FPCLASS:
4837 return lowerISFPCLASS(MI);
4838 case G_SDIVREM:
4839 case G_UDIVREM:
4840 return lowerDIVREM(MI);
4841 case G_FSHL:
4842 case G_FSHR:
4843 return lowerFunnelShift(MI);
4844 case G_ROTL:
4845 case G_ROTR:
4846 return lowerRotate(MI);
4847 case G_MEMSET:
4848 case G_MEMCPY:
4849 case G_MEMMOVE:
4850 return lowerMemCpyFamily(MI);
4851 case G_MEMCPY_INLINE:
4852 return lowerMemcpyInline(MI);
4853 case G_ZEXT:
4854 case G_SEXT:
4855 case G_ANYEXT:
4856 return lowerEXT(MI);
4857 case G_TRUNC:
4858 return lowerTRUNC(MI);
4860 return lowerVectorReduction(MI);
4861 case G_VAARG:
4862 return lowerVAArg(MI);
4863 case G_ATOMICRMW_SUB: {
4864 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4865 const LLT ValTy = MRI.getType(Val);
4866 MachineMemOperand *MMO = *MI.memoperands_begin();
4867
4868 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4869 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4870 MI.eraseFromParent();
4871 return Legalized;
4872 }
4873 }
4874}
4875
4877 Align MinAlign) const {
4878 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4879 // datalayout for the preferred alignment. Also there should be a target hook
4880 // for this to allow targets to reduce the alignment and ignore the
4881 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4882 // the type.
4883 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4884}
4885
4888 MachinePointerInfo &PtrInfo) {
4889 MachineFunction &MF = MIRBuilder.getMF();
4890 const DataLayout &DL = MIRBuilder.getDataLayout();
4891 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4892
4893 unsigned AddrSpace = DL.getAllocaAddrSpace();
4894 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4895
4896 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4897 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4898}
4899
4901 const SrcOp &Val) {
4902 LLT SrcTy = Val.getLLTTy(MRI);
4903 Align StackTypeAlign =
4904 std::max(getStackTemporaryAlignment(SrcTy),
4906 MachinePointerInfo PtrInfo;
4907 auto StackTemp =
4908 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4909
4910 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4911 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4912}
4913
4915 LLT VecTy) {
4916 LLT IdxTy = B.getMRI()->getType(IdxReg);
4917 unsigned NElts = VecTy.getNumElements();
4918
4919 int64_t IdxVal;
4920 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4921 if (IdxVal < VecTy.getNumElements())
4922 return IdxReg;
4923 // If a constant index would be out of bounds, clamp it as well.
4924 }
4925
4926 if (isPowerOf2_32(NElts)) {
4927 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4928 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4929 }
4930
4931 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4932 .getReg(0);
4933}
4934
4936 Register Index) {
4937 LLT EltTy = VecTy.getElementType();
4938
4939 // Calculate the element offset and add it to the pointer.
4940 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4941 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4942 "Converting bits to bytes lost precision");
4943
4944 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4945
4946 // Convert index to the correct size for the address space.
4947 const DataLayout &DL = MIRBuilder.getDataLayout();
4948 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4949 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4950 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4951 if (IdxTy != MRI.getType(Index))
4952 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4953
4954 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4955 MIRBuilder.buildConstant(IdxTy, EltSize));
4956
4957 LLT PtrTy = MRI.getType(VecPtr);
4958 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4959}
4960
4961#ifndef NDEBUG
4962/// Check that all vector operands have same number of elements. Other operands
4963/// should be listed in NonVecOp.
4966 std::initializer_list<unsigned> NonVecOpIndices) {
4967 if (MI.getNumMemOperands() != 0)
4968 return false;
4969
4970 LLT VecTy = MRI.getType(MI.getReg(0));
4971 if (!VecTy.isVector())
4972 return false;
4973 unsigned NumElts = VecTy.getNumElements();
4974
4975 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4976 MachineOperand &Op = MI.getOperand(OpIdx);
4977 if (!Op.isReg()) {
4978 if (!is_contained(NonVecOpIndices, OpIdx))
4979 return false;
4980 continue;
4981 }
4982
4983 LLT Ty = MRI.getType(Op.getReg());
4984 if (!Ty.isVector()) {
4985 if (!is_contained(NonVecOpIndices, OpIdx))
4986 return false;
4987 continue;
4988 }
4989
4990 if (Ty.getNumElements() != NumElts)
4991 return false;
4992 }
4993
4994 return true;
4995}
4996#endif
4997
4998/// Fill \p DstOps with DstOps that have same number of elements combined as
4999/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5000/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5001/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5002static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5003 unsigned NumElts) {
5004 LLT LeftoverTy;
5005 assert(Ty.isVector() && "Expected vector type");
5006 LLT EltTy = Ty.getElementType();
5007 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
5008 int NumParts, NumLeftover;
5009 std::tie(NumParts, NumLeftover) =
5010 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5011
5012 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5013 for (int i = 0; i < NumParts; ++i) {
5014 DstOps.push_back(NarrowTy);
5015 }
5016
5017 if (LeftoverTy.isValid()) {
5018 assert(NumLeftover == 1 && "expected exactly one leftover");
5019 DstOps.push_back(LeftoverTy);
5020 }
5021}
5022
5023/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5024/// made from \p Op depending on operand type.
5026 MachineOperand &Op) {
5027 for (unsigned i = 0; i < N; ++i) {
5028 if (Op.isReg())
5029 Ops.push_back(Op.getReg());
5030 else if (Op.isImm())
5031 Ops.push_back(Op.getImm());
5032 else if (Op.isPredicate())
5033 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5034 else
5035 llvm_unreachable("Unsupported type");
5036 }
5037}
5038
5039// Handle splitting vector operations which need to have the same number of
5040// elements in each type index, but each type index may have a different element
5041// type.
5042//
5043// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5044// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5045// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5046//
5047// Also handles some irregular breakdown cases, e.g.
5048// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5049// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5050// s64 = G_SHL s64, s32
5053 GenericMachineInstr &MI, unsigned NumElts,
5054 std::initializer_list<unsigned> NonVecOpIndices) {
5055 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5056 "Non-compatible opcode or not specified non-vector operands");
5057 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5058
5059 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5060 unsigned NumDefs = MI.getNumDefs();
5061
5062 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5063 // Build instructions with DstOps to use instruction found by CSE directly.
5064 // CSE copies found instruction into given vreg when building with vreg dest.
5065 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5066 // Output registers will be taken from created instructions.
5067 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5068 for (unsigned i = 0; i < NumDefs; ++i) {
5069 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5070 }
5071
5072 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5073 // Operands listed in NonVecOpIndices will be used as is without splitting;
5074 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5075 // scalar condition (op 1), immediate in sext_inreg (op 2).
5076 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5077 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5078 ++UseIdx, ++UseNo) {
5079 if (is_contained(NonVecOpIndices, UseIdx)) {
5080 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5081 MI.getOperand(UseIdx));
5082 } else {
5083 SmallVector<Register, 8> SplitPieces;
5084 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5085 MRI);
5086 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5087 }
5088 }
5089
5090 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5091
5092 // Take i-th piece of each input operand split and build sub-vector/scalar
5093 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5094 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5096 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5097 Defs.push_back(OutputOpsPieces[DstNo][i]);
5098
5100 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5101 Uses.push_back(InputOpsPieces[InputNo][i]);
5102
5103 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5104 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5105 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5106 }
5107
5108 // Merge small outputs into MI's output for each def operand.
5109 if (NumLeftovers) {
5110 for (unsigned i = 0; i < NumDefs; ++i)
5111 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5112 } else {
5113 for (unsigned i = 0; i < NumDefs; ++i)
5114 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5115 }
5116
5117 MI.eraseFromParent();
5118 return Legalized;
5119}
5120
5123 unsigned NumElts) {
5124 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5125
5126 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5127 unsigned NumDefs = MI.getNumDefs();
5128
5129 SmallVector<DstOp, 8> OutputOpsPieces;
5130 SmallVector<Register, 8> OutputRegs;
5131 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5132
5133 // Instructions that perform register split will be inserted in basic block
5134 // where register is defined (basic block is in the next operand).
5135 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5136 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5137 UseIdx += 2, ++UseNo) {
5138 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5139 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5140 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5141 MIRBuilder, MRI);
5142 }
5143
5144 // Build PHIs with fewer elements.
5145 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5146 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5147 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5148 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5149 Phi.addDef(
5150 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5151 OutputRegs.push_back(Phi.getReg(0));
5152
5153 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5154 Phi.addUse(InputOpsPieces[j][i]);
5155 Phi.add(MI.getOperand(1 + j * 2 + 1));
5156 }
5157 }
5158
5159 // Set the insert point after the existing PHIs
5160 MachineBasicBlock &MBB = *MI.getParent();
5161 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5162
5163 // Merge small outputs into MI's def.
5164 if (NumLeftovers) {
5165 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5166 } else {
5167 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5168 }
5169
5170 MI.eraseFromParent();
5171 return Legalized;
5172}
5173
5176 unsigned TypeIdx,
5177 LLT NarrowTy) {
5178 const int NumDst = MI.getNumOperands() - 1;
5179 const Register SrcReg = MI.getOperand(NumDst).getReg();
5180 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5181 LLT SrcTy = MRI.getType(SrcReg);
5182
5183 if (TypeIdx != 1 || NarrowTy == DstTy)
5184 return UnableToLegalize;
5185
5186 // Requires compatible types. Otherwise SrcReg should have been defined by
5187 // merge-like instruction that would get artifact combined. Most likely
5188 // instruction that defines SrcReg has to perform more/fewer elements
5189 // legalization compatible with NarrowTy.
5190 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5191 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5192
5193 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5194 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5195 return UnableToLegalize;
5196
5197 // This is most likely DstTy (smaller then register size) packed in SrcTy
5198 // (larger then register size) and since unmerge was not combined it will be
5199 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5200 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5201
5202 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5203 //
5204 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5205 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5206 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5207 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5208 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5209 const int PartsPerUnmerge = NumDst / NumUnmerge;
5210
5211 for (int I = 0; I != NumUnmerge; ++I) {
5212 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5213
5214 for (int J = 0; J != PartsPerUnmerge; ++J)
5215 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5216 MIB.addUse(Unmerge.getReg(I));
5217 }
5218
5219 MI.eraseFromParent();
5220 return Legalized;
5221}
5222
5225 LLT NarrowTy) {
5226 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5227 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5228 // that should have been artifact combined. Most likely instruction that uses
5229 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5230 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5231 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5232 if (NarrowTy == SrcTy)
5233 return UnableToLegalize;
5234
5235 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5236 // is for old mir tests. Since the changes to more/fewer elements it should no
5237 // longer be possible to generate MIR like this when starting from llvm-ir
5238 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5239 if (TypeIdx == 1) {
5240 assert(SrcTy.isVector() && "Expected vector types");
5241 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5242 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5243 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5244 return UnableToLegalize;
5245 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5246 //
5247 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5248 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5249 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5250 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5251 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5252 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5253
5255 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5256 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5257 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5258 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5259 Elts.push_back(Unmerge.getReg(j));
5260 }
5261
5262 SmallVector<Register, 8> NarrowTyElts;
5263 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5264 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5265 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5266 ++i, Offset += NumNarrowTyElts) {
5267 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5268 NarrowTyElts.push_back(
5269 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5270 }
5271
5272 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5273 MI.eraseFromParent();
5274 return Legalized;
5275 }
5276
5277 assert(TypeIdx == 0 && "Bad type index");
5278 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5279 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5280 return UnableToLegalize;
5281
5282 // This is most likely SrcTy (smaller then register size) packed in DstTy
5283 // (larger then register size) and since merge was not combined it will be
5284 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5285 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5286
5287 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5288 //
5289 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5290 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5291 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5292 SmallVector<Register, 8> NarrowTyElts;
5293 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5294 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5295 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5296 for (unsigned i = 0; i < NumParts; ++i) {
5298 for (unsigned j = 0; j < NumElts; ++j)
5299 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5300 NarrowTyElts.push_back(
5301 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5302 }
5303
5304 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5305 MI.eraseFromParent();
5306 return Legalized;
5307}
5308
5311 unsigned TypeIdx,
5312 LLT NarrowVecTy) {
5313 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5314 Register InsertVal;
5315 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5316
5317 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5318 if (IsInsert)
5319 InsertVal = MI.getOperand(2).getReg();
5320
5321 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5322 LLT VecTy = MRI.getType(SrcVec);
5323
5324 // If the index is a constant, we can really break this down as you would
5325 // expect, and index into the target size pieces.
5326 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5327 if (MaybeCst) {
5328 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5329 // Avoid out of bounds indexing the pieces.
5330 if (IdxVal >= VecTy.getNumElements()) {
5331 MIRBuilder.buildUndef(DstReg);
5332 MI.eraseFromParent();
5333 return Legalized;
5334 }
5335
5336 if (!NarrowVecTy.isVector()) {
5337 SmallVector<Register, 8> SplitPieces;
5338 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5339 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5340 if (IsInsert) {
5341 SplitPieces[IdxVal] = InsertVal;
5342 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5343 } else {
5344 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5345 }
5346 } else {
5347 SmallVector<Register, 8> VecParts;
5348 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5349
5350 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5351 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5352 TargetOpcode::G_ANYEXT);
5353
5354 unsigned NewNumElts = NarrowVecTy.getNumElements();
5355
5356 LLT IdxTy = MRI.getType(Idx);
5357 int64_t PartIdx = IdxVal / NewNumElts;
5358 auto NewIdx =
5359 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5360
5361 if (IsInsert) {
5362 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5363
5364 // Use the adjusted index to insert into one of the subvectors.
5365 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5366 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5367 VecParts[PartIdx] = InsertPart.getReg(0);
5368
5369 // Recombine the inserted subvector with the others to reform the result
5370 // vector.
5371 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5372 } else {
5373 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5374 }
5375 }
5376
5377 MI.eraseFromParent();
5378 return Legalized;
5379 }
5380
5381 // With a variable index, we can't perform the operation in a smaller type, so
5382 // we're forced to expand this.
5383 //
5384 // TODO: We could emit a chain of compare/select to figure out which piece to
5385 // index.
5387}
5388
5391 LLT NarrowTy) {
5392 // FIXME: Don't know how to handle secondary types yet.
5393 if (TypeIdx != 0)
5394 return UnableToLegalize;
5395
5396 if (!NarrowTy.isByteSized()) {
5397 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5398 return UnableToLegalize;
5399 }
5400
5401 // This implementation doesn't work for atomics. Give up instead of doing
5402 // something invalid.
5403 if (LdStMI.isAtomic())
5404 return UnableToLegalize;
5405
5406 bool IsLoad = isa<GLoad>(LdStMI);
5407 Register ValReg = LdStMI.getReg(0);
5408 Register AddrReg = LdStMI.getPointerReg();
5409 LLT ValTy = MRI.getType(ValReg);
5410
5411 // FIXME: Do we need a distinct NarrowMemory legalize action?
5412 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5413 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5414 return UnableToLegalize;
5415 }
5416
5417 int NumParts = -1;
5418 int NumLeftover = -1;
5419 LLT LeftoverTy;
5420 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5421 if (IsLoad) {
5422 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5423 } else {
5424 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5425 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5426 NumParts = NarrowRegs.size();
5427 NumLeftover = NarrowLeftoverRegs.size();
5428 }
5429 }
5430
5431 if (NumParts == -1)
5432 return UnableToLegalize;
5433
5434 LLT PtrTy = MRI.getType(AddrReg);
5435 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5436
5437 unsigned TotalSize = ValTy.getSizeInBits();
5438
5439 // Split the load/store into PartTy sized pieces starting at Offset. If this
5440 // is a load, return the new registers in ValRegs. For a store, each elements
5441 // of ValRegs should be PartTy. Returns the next offset that needs to be
5442 // handled.
5443 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5444 auto MMO = LdStMI.getMMO();
5445 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5446 unsigned NumParts, unsigned Offset) -> unsigned {
5447 MachineFunction &MF = MIRBuilder.getMF();
5448 unsigned PartSize = PartTy.getSizeInBits();
5449 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5450 ++Idx) {
5451 unsigned ByteOffset = Offset / 8;
5452 Register NewAddrReg;
5453
5454 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5455 ByteOffset);
5456
5457 MachineMemOperand *NewMMO =
5458 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5459
5460 if (IsLoad) {
5461 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5462 ValRegs.push_back(Dst);
5463 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5464 } else {
5465 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5466 }
5467 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5468 }
5469
5470 return Offset;
5471 };
5472
5473 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5474 unsigned HandledOffset =
5475 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5476
5477 // Handle the rest of the register if this isn't an even type breakdown.
5478 if (LeftoverTy.isValid())
5479 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5480
5481 if (IsLoad) {
5482 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5483 LeftoverTy, NarrowLeftoverRegs);
5484 }
5485
5486 LdStMI.eraseFromParent();
5487 return Legalized;
5488}
5489
5492 LLT NarrowTy) {
5493 using namespace TargetOpcode;
5495 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5496
5497 switch (MI.getOpcode()) {
5498 case G_IMPLICIT_DEF:
5499 case G_TRUNC:
5500 case G_AND:
5501 case G_OR:
5502 case G_XOR:
5503 case G_ADD:
5504 case G_SUB:
5505 case G_MUL:
5506 case G_PTR_ADD:
5507 case G_SMULH:
5508 case G_UMULH:
5509 case G_FADD:
5510 case G_FMUL:
5511 case G_FSUB:
5512 case G_FNEG:
5513 case G_FABS:
5514 case G_FCANONICALIZE:
5515 case G_FDIV:
5516 case G_FREM:
5517 case G_FMA:
5518 case G_FMAD:
5519 case G_FPOW:
5520 case G_FEXP:
5521 case G_FEXP2:
5522 case G_FEXP10:
5523 case G_FLOG:
5524 case G_FLOG2:
5525 case G_FLOG10:
5526 case G_FLDEXP:
5527 case G_FNEARBYINT:
5528 case G_FCEIL:
5529 case G_FFLOOR:
5530 case G_FRINT:
5531 case G_INTRINSIC_LRINT:
5532 case G_INTRINSIC_LLRINT:
5533 case G_INTRINSIC_ROUND:
5534 case G_INTRINSIC_ROUNDEVEN:
5535 case G_LROUND:
5536 case G_LLROUND:
5537 case G_INTRINSIC_TRUNC:
5538 case G_FMODF:
5539 case G_FCOS:
5540 case G_FSIN:
5541 case G_FTAN:
5542 case G_FACOS:
5543 case G_FASIN:
5544 case G_FATAN:
5545 case G_FATAN2:
5546 case G_FCOSH:
5547 case G_FSINH:
5548 case G_FTANH:
5549 case G_FSQRT:
5550 case G_BSWAP:
5551 case G_BITREVERSE:
5552 case G_SDIV:
5553 case G_UDIV:
5554 case G_SREM:
5555 case G_UREM:
5556 case G_SDIVREM:
5557 case G_UDIVREM:
5558 case G_SMIN:
5559 case G_SMAX:
5560 case G_UMIN:
5561 case G_UMAX:
5562 case G_ABS:
5563 case G_FMINNUM:
5564 case G_FMAXNUM:
5565 case G_FMINNUM_IEEE:
5566 case G_FMAXNUM_IEEE:
5567 case G_FMINIMUM:
5568 case G_FMAXIMUM:
5569 case G_FMINIMUMNUM:
5570 case G_FMAXIMUMNUM:
5571 case G_FSHL:
5572 case G_FSHR:
5573 case G_ROTL:
5574 case G_ROTR:
5575 case G_FREEZE:
5576 case G_SADDSAT:
5577 case G_SSUBSAT:
5578 case G_UADDSAT:
5579 case G_USUBSAT:
5580 case G_UMULO:
5581 case G_SMULO:
5582 case G_SHL:
5583 case G_LSHR:
5584 case G_ASHR:
5585 case G_SSHLSAT:
5586 case G_USHLSAT:
5587 case G_CTLZ:
5588 case G_CTLZ_ZERO_UNDEF:
5589 case G_CTTZ:
5590 case G_CTTZ_ZERO_UNDEF:
5591 case G_CTPOP:
5592 case G_FCOPYSIGN:
5593 case G_ZEXT:
5594 case G_SEXT:
5595 case G_ANYEXT:
5596 case G_FPEXT:
5597 case G_FPTRUNC:
5598 case G_SITOFP:
5599 case G_UITOFP:
5600 case G_FPTOSI:
5601 case G_FPTOUI:
5602 case G_FPTOSI_SAT:
5603 case G_FPTOUI_SAT:
5604 case G_INTTOPTR:
5605 case G_PTRTOINT:
5606 case G_ADDRSPACE_CAST:
5607 case G_UADDO:
5608 case G_USUBO:
5609 case G_UADDE:
5610 case G_USUBE:
5611 case G_SADDO:
5612 case G_SSUBO:
5613 case G_SADDE:
5614 case G_SSUBE:
5615 case G_STRICT_FADD:
5616 case G_STRICT_FSUB:
5617 case G_STRICT_FMUL:
5618 case G_STRICT_FMA:
5619 case G_STRICT_FLDEXP:
5620 case G_FFREXP:
5621 return fewerElementsVectorMultiEltType(GMI, NumElts);
5622 case G_ICMP:
5623 case G_FCMP:
5624 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5625 case G_IS_FPCLASS:
5626 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5627 case G_SELECT:
5628 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5629 return fewerElementsVectorMultiEltType(GMI, NumElts);
5630 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5631 case G_PHI:
5632 return fewerElementsVectorPhi(GMI, NumElts);
5633 case G_UNMERGE_VALUES:
5634 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5635 case G_BUILD_VECTOR:
5636 assert(TypeIdx == 0 && "not a vector type index");
5637 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5638 case G_CONCAT_VECTORS:
5639 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5640 return UnableToLegalize;
5641 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5642 case G_EXTRACT_VECTOR_ELT:
5643 case G_INSERT_VECTOR_ELT:
5644 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5645 case G_LOAD:
5646 case G_STORE:
5647 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5648 case G_SEXT_INREG:
5649 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5651 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5652 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5653 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5654 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5655 case G_SHUFFLE_VECTOR:
5656 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5657 case G_FPOWI:
5658 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5659 case G_BITCAST:
5660 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5661 case G_INTRINSIC_FPTRUNC_ROUND:
5662 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5663 default:
5664 return UnableToLegalize;
5665 }
5666}
5667
5670 LLT NarrowTy) {
5671 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5672 "Not a bitcast operation");
5673
5674 if (TypeIdx != 0)
5675 return UnableToLegalize;
5676
5677 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5678
5679 unsigned NewElemCount =
5680 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5681 SmallVector<Register> SrcVRegs, BitcastVRegs;
5682 if (NewElemCount == 1) {
5683 LLT SrcNarrowTy = SrcTy.getElementType();
5684
5685 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5686 getUnmergeResults(SrcVRegs, *Unmerge);
5687 } else {
5688 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5689
5690 // Split the Src and Dst Reg into smaller registers
5691 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5692 return UnableToLegalize;
5693 }
5694
5695 // Build new smaller bitcast instructions
5696 // Not supporting Leftover types for now but will have to
5697 for (Register Reg : SrcVRegs)
5698 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5699
5700 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5701 MI.eraseFromParent();
5702 return Legalized;
5703}
5704
5706 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5707 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5708 if (TypeIdx != 0)
5709 return UnableToLegalize;
5710
5711 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5712 MI.getFirst3RegLLTs();
5713 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5714 // The shuffle should be canonicalized by now.
5715 if (DstTy != Src1Ty)
5716 return UnableToLegalize;
5717 if (DstTy != Src2Ty)
5718 return UnableToLegalize;
5719
5720 if (!isPowerOf2_32(DstTy.getNumElements()))
5721 return UnableToLegalize;
5722
5723 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5724 // Further legalization attempts will be needed to do split further.
5725 NarrowTy =
5726 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5727 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5728
5729 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5730 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5731 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5732 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5733 SplitSrc2Regs[1]};
5734
5735 Register Hi, Lo;
5736
5737 // If Lo or Hi uses elements from at most two of the four input vectors, then
5738 // express it as a vector shuffle of those two inputs. Otherwise extract the
5739 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5741 for (unsigned High = 0; High < 2; ++High) {
5742 Register &Output = High ? Hi : Lo;
5743
5744 // Build a shuffle mask for the output, discovering on the fly which
5745 // input vectors to use as shuffle operands (recorded in InputUsed).
5746 // If building a suitable shuffle vector proves too hard, then bail
5747 // out with useBuildVector set.
5748 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5749 unsigned FirstMaskIdx = High * NewElts;
5750 bool UseBuildVector = false;
5751 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5752 // The mask element. This indexes into the input.
5753 int Idx = Mask[FirstMaskIdx + MaskOffset];
5754
5755 // The input vector this mask element indexes into.
5756 unsigned Input = (unsigned)Idx / NewElts;
5757
5758 if (Input >= std::size(Inputs)) {
5759 // The mask element does not index into any input vector.
5760 Ops.push_back(-1);
5761 continue;
5762 }
5763
5764 // Turn the index into an offset from the start of the input vector.
5765 Idx -= Input * NewElts;
5766
5767 // Find or create a shuffle vector operand to hold this input.
5768 unsigned OpNo;
5769 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5770 if (InputUsed[OpNo] == Input) {
5771 // This input vector is already an operand.
5772 break;
5773 } else if (InputUsed[OpNo] == -1U) {
5774 // Create a new operand for this input vector.
5775 InputUsed[OpNo] = Input;
5776 break;
5777 }
5778 }
5779
5780 if (OpNo >= std::size(InputUsed)) {
5781 // More than two input vectors used! Give up on trying to create a
5782 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5783 UseBuildVector = true;
5784 break;
5785 }
5786
5787 // Add the mask index for the new shuffle vector.
5788 Ops.push_back(Idx + OpNo * NewElts);
5789 }
5790
5791 if (UseBuildVector) {
5792 LLT EltTy = NarrowTy.getElementType();
5794
5795 // Extract the input elements by hand.
5796 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5797 // The mask element. This indexes into the input.
5798 int Idx = Mask[FirstMaskIdx + MaskOffset];
5799
5800 // The input vector this mask element indexes into.
5801 unsigned Input = (unsigned)Idx / NewElts;
5802
5803 if (Input >= std::size(Inputs)) {
5804 // The mask element is "undef" or indexes off the end of the input.
5805 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5806 continue;
5807 }
5808
5809 // Turn the index into an offset from the start of the input vector.
5810 Idx -= Input * NewElts;
5811
5812 // Extract the vector element by hand.
5813 SVOps.push_back(MIRBuilder
5814 .buildExtractVectorElement(
5815 EltTy, Inputs[Input],
5816 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5817 .getReg(0));
5818 }
5819
5820 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5821 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5822 } else if (InputUsed[0] == -1U) {
5823 // No input vectors were used! The result is undefined.
5824 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5825 } else if (NewElts == 1) {
5826 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5827 } else {
5828 Register Op0 = Inputs[InputUsed[0]];
5829 // If only one input was used, use an undefined vector for the other.
5830 Register Op1 = InputUsed[1] == -1U
5831 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5832 : Inputs[InputUsed[1]];
5833 // At least one input vector was used. Create a new shuffle vector.
5834 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5835 }
5836
5837 Ops.clear();
5838 }
5839
5840 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5841 MI.eraseFromParent();
5842 return Legalized;
5843}
5844
5846 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5847 auto &RdxMI = cast<GVecReduce>(MI);
5848
5849 if (TypeIdx != 1)
5850 return UnableToLegalize;
5851
5852 // The semantics of the normal non-sequential reductions allow us to freely
5853 // re-associate the operation.
5854 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5855
5856 if (NarrowTy.isVector() &&
5857 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5858 return UnableToLegalize;
5859
5860 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5861 SmallVector<Register> SplitSrcs;
5862 // If NarrowTy is a scalar then we're being asked to scalarize.
5863 const unsigned NumParts =
5864 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5865 : SrcTy.getNumElements();
5866
5867 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5868 if (NarrowTy.isScalar()) {
5869 if (DstTy != NarrowTy)
5870 return UnableToLegalize; // FIXME: handle implicit extensions.
5871
5872 if (isPowerOf2_32(NumParts)) {
5873 // Generate a tree of scalar operations to reduce the critical path.
5874 SmallVector<Register> PartialResults;
5875 unsigned NumPartsLeft = NumParts;
5876 while (NumPartsLeft > 1) {
5877 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5878 PartialResults.emplace_back(
5880 .buildInstr(ScalarOpc, {NarrowTy},
5881 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5882 .getReg(0));
5883 }
5884 SplitSrcs = PartialResults;
5885 PartialResults.clear();
5886 NumPartsLeft = SplitSrcs.size();
5887 }
5888 assert(SplitSrcs.size() == 1);
5889 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5890 MI.eraseFromParent();
5891 return Legalized;
5892 }
5893 // If we can't generate a tree, then just do sequential operations.
5894 Register Acc = SplitSrcs[0];
5895 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5896 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5897 .getReg(0);
5898 MIRBuilder.buildCopy(DstReg, Acc);
5899 MI.eraseFromParent();
5900 return Legalized;
5901 }
5902 SmallVector<Register> PartialReductions;
5903 for (unsigned Part = 0; Part < NumParts; ++Part) {
5904 PartialReductions.push_back(
5905 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5906 .getReg(0));
5907 }
5908
5909 // If the types involved are powers of 2, we can generate intermediate vector
5910 // ops, before generating a final reduction operation.
5911 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5912 isPowerOf2_32(NarrowTy.getNumElements())) {
5913 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5914 }
5915
5916 Register Acc = PartialReductions[0];
5917 for (unsigned Part = 1; Part < NumParts; ++Part) {
5918 if (Part == NumParts - 1) {
5919 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5920 {Acc, PartialReductions[Part]});
5921 } else {
5922 Acc = MIRBuilder
5923 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5924 .getReg(0);
5925 }
5926 }
5927 MI.eraseFromParent();
5928 return Legalized;
5929}
5930
5933 unsigned int TypeIdx,
5934 LLT NarrowTy) {
5935 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5936 MI.getFirst3RegLLTs();
5937 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5938 DstTy != NarrowTy)
5939 return UnableToLegalize;
5940
5941 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5942 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5943 "Unexpected vecreduce opcode");
5944 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5945 ? TargetOpcode::G_FADD
5946 : TargetOpcode::G_FMUL;
5947
5948 SmallVector<Register> SplitSrcs;
5949 unsigned NumParts = SrcTy.getNumElements();
5950 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5951 Register Acc = ScalarReg;
5952 for (unsigned i = 0; i < NumParts; i++)
5953 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5954 .getReg(0);
5955
5956 MIRBuilder.buildCopy(DstReg, Acc);
5957 MI.eraseFromParent();
5958 return Legalized;
5959}
5960
5962LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5963 LLT SrcTy, LLT NarrowTy,
5964 unsigned ScalarOpc) {
5965 SmallVector<Register> SplitSrcs;
5966 // Split the sources into NarrowTy size pieces.
5967 extractParts(SrcReg, NarrowTy,
5968 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5969 MIRBuilder, MRI);
5970 // We're going to do a tree reduction using vector operations until we have
5971 // one NarrowTy size value left.
5972 while (SplitSrcs.size() > 1) {
5973 SmallVector<Register> PartialRdxs;
5974 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5975 Register LHS = SplitSrcs[Idx];
5976 Register RHS = SplitSrcs[Idx + 1];
5977 // Create the intermediate vector op.
5978 Register Res =
5979 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5980 PartialRdxs.push_back(Res);
5981 }
5982 SplitSrcs = std::move(PartialRdxs);
5983 }
5984 // Finally generate the requested NarrowTy based reduction.
5985 Observer.changingInstr(MI);
5986 MI.getOperand(1).setReg(SplitSrcs[0]);
5987 Observer.changedInstr(MI);
5988 return Legalized;
5989}
5990
5993 const LLT HalfTy, const LLT AmtTy) {
5994
5995 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5996 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5997 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5998
5999 if (Amt.isZero()) {
6000 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6001 MI.eraseFromParent();
6002 return Legalized;
6003 }
6004
6005 LLT NVT = HalfTy;
6006 unsigned NVTBits = HalfTy.getSizeInBits();
6007 unsigned VTBits = 2 * NVTBits;
6008
6009 SrcOp Lo(Register(0)), Hi(Register(0));
6010 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6011 if (Amt.ugt(VTBits)) {
6012 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6013 } else if (Amt.ugt(NVTBits)) {
6014 Lo = MIRBuilder.buildConstant(NVT, 0);
6015 Hi = MIRBuilder.buildShl(NVT, InL,
6016 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6017 } else if (Amt == NVTBits) {
6018 Lo = MIRBuilder.buildConstant(NVT, 0);
6019 Hi = InL;
6020 } else {
6021 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6022 auto OrLHS =
6023 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6024 auto OrRHS = MIRBuilder.buildLShr(
6025 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6026 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6027 }
6028 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6029 if (Amt.ugt(VTBits)) {
6030 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6031 } else if (Amt.ugt(NVTBits)) {
6032 Lo = MIRBuilder.buildLShr(NVT, InH,
6033 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6034 Hi = MIRBuilder.buildConstant(NVT, 0);
6035 } else if (Amt == NVTBits) {
6036 Lo = InH;
6037 Hi = MIRBuilder.buildConstant(NVT, 0);
6038 } else {
6039 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6040
6041 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6042 auto OrRHS = MIRBuilder.buildShl(
6043 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6044
6045 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6046 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6047 }
6048 } else {
6049 if (Amt.ugt(VTBits)) {
6050 Hi = Lo = MIRBuilder.buildAShr(
6051 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6052 } else if (Amt.ugt(NVTBits)) {
6053 Lo = MIRBuilder.buildAShr(NVT, InH,
6054 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6055 Hi = MIRBuilder.buildAShr(NVT, InH,
6056 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6057 } else if (Amt == NVTBits) {
6058 Lo = InH;
6059 Hi = MIRBuilder.buildAShr(NVT, InH,
6060 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6061 } else {
6062 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6063
6064 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6065 auto OrRHS = MIRBuilder.buildShl(
6066 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6067
6068 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6069 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6070 }
6071 }
6072
6073 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6074 MI.eraseFromParent();
6075
6076 return Legalized;
6077}
6078
6081 LLT RequestedTy) {
6082 if (TypeIdx == 1) {
6083 Observer.changingInstr(MI);
6084 narrowScalarSrc(MI, RequestedTy, 2);
6085 Observer.changedInstr(MI);
6086 return Legalized;
6087 }
6088
6089 Register DstReg = MI.getOperand(0).getReg();
6090 LLT DstTy = MRI.getType(DstReg);
6091 if (DstTy.isVector())
6092 return UnableToLegalize;
6093
6094 Register Amt = MI.getOperand(2).getReg();
6095 LLT ShiftAmtTy = MRI.getType(Amt);
6096 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6097 if (DstEltSize % 2 != 0)
6098 return UnableToLegalize;
6099
6100 // Check if we should use multi-way splitting instead of recursive binary
6101 // splitting.
6102 //
6103 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6104 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6105 // and dependency chains created by usual binary splitting approach
6106 // (128->64->32).
6107 //
6108 // The >= 8 parts threshold ensures we only use this optimization when binary
6109 // splitting would require multiple recursive passes, avoiding overhead for
6110 // simple 2-way splits where binary approach is sufficient.
6111 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6112 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6113 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6114 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6115 // steps).
6116 if (NumParts >= 8)
6117 return narrowScalarShiftMultiway(MI, RequestedTy);
6118 }
6119
6120 // Fall back to binary splitting:
6121 // Ignore the input type. We can only go to exactly half the size of the
6122 // input. If that isn't small enough, the resulting pieces will be further
6123 // legalized.
6124 const unsigned NewBitSize = DstEltSize / 2;
6125 const LLT HalfTy = LLT::scalar(NewBitSize);
6126 const LLT CondTy = LLT::scalar(1);
6127
6128 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6129 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6130 ShiftAmtTy);
6131 }
6132
6133 // TODO: Expand with known bits.
6134
6135 // Handle the fully general expansion by an unknown amount.
6136 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6137
6138 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6139 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6140 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6141
6142 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6143 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6144
6145 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6146 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6147 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6148
6149 Register ResultRegs[2];
6150 switch (MI.getOpcode()) {
6151 case TargetOpcode::G_SHL: {
6152 // Short: ShAmt < NewBitSize
6153 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6154
6155 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6156 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6157 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6158
6159 // Long: ShAmt >= NewBitSize
6160 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6161 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6162
6163 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6164 auto Hi = MIRBuilder.buildSelect(
6165 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6166
6167 ResultRegs[0] = Lo.getReg(0);
6168 ResultRegs[1] = Hi.getReg(0);
6169 break;
6170 }
6171 case TargetOpcode::G_LSHR:
6172 case TargetOpcode::G_ASHR: {
6173 // Short: ShAmt < NewBitSize
6174 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6175
6176 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6177 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6178 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6179
6180 // Long: ShAmt >= NewBitSize
6182 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6183 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6184 } else {
6185 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6186 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6187 }
6188 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6189 {InH, AmtExcess}); // Lo from Hi part.
6190
6191 auto Lo = MIRBuilder.buildSelect(
6192 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6193
6194 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6195
6196 ResultRegs[0] = Lo.getReg(0);
6197 ResultRegs[1] = Hi.getReg(0);
6198 break;
6199 }
6200 default:
6201 llvm_unreachable("not a shift");
6202 }
6203
6204 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6205 MI.eraseFromParent();
6206 return Legalized;
6207}
6208
6210 unsigned PartIdx,
6211 unsigned NumParts,
6212 ArrayRef<Register> SrcParts,
6213 const ShiftParams &Params,
6214 LLT TargetTy, LLT ShiftAmtTy) {
6215 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6216 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6217 assert(WordShiftConst && BitShiftConst && "Expected constants");
6218
6219 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6220 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6221 const bool NeedsInterWordShift = ShiftBits != 0;
6222
6223 switch (Opcode) {
6224 case TargetOpcode::G_SHL: {
6225 // Data moves from lower indices to higher indices
6226 // If this part would come from a source beyond our range, it's zero
6227 if (PartIdx < ShiftWords)
6228 return Params.Zero;
6229
6230 unsigned SrcIdx = PartIdx - ShiftWords;
6231 if (!NeedsInterWordShift)
6232 return SrcParts[SrcIdx];
6233
6234 // Combine shifted main part with carry from previous part
6235 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6236 if (SrcIdx > 0) {
6237 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6238 Params.InvBitShift);
6239 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6240 }
6241 return Hi.getReg(0);
6242 }
6243
6244 case TargetOpcode::G_LSHR: {
6245 unsigned SrcIdx = PartIdx + ShiftWords;
6246 if (SrcIdx >= NumParts)
6247 return Params.Zero;
6248 if (!NeedsInterWordShift)
6249 return SrcParts[SrcIdx];
6250
6251 // Combine shifted main part with carry from next part
6252 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6253 if (SrcIdx + 1 < NumParts) {
6254 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6255 Params.InvBitShift);
6256 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6257 }
6258 return Lo.getReg(0);
6259 }
6260
6261 case TargetOpcode::G_ASHR: {
6262 // Like LSHR but preserves sign bit
6263 unsigned SrcIdx = PartIdx + ShiftWords;
6264 if (SrcIdx >= NumParts)
6265 return Params.SignBit;
6266 if (!NeedsInterWordShift)
6267 return SrcParts[SrcIdx];
6268
6269 // Only the original MSB part uses arithmetic shift to preserve sign. All
6270 // other parts use logical shift since they're just moving data bits.
6271 auto Lo =
6272 (SrcIdx == NumParts - 1)
6273 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6274 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6275 Register HiSrc =
6276 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6277 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6278 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6279 }
6280
6281 default:
6282 llvm_unreachable("not a shift");
6283 }
6284}
6285
6287 Register MainOperand,
6288 Register ShiftAmt,
6289 LLT TargetTy,
6290 Register CarryOperand) {
6291 // This helper generates a single output part for variable shifts by combining
6292 // the main operand (shifted by BitShift) with carry bits from an adjacent
6293 // part.
6294
6295 // For G_ASHR, individual parts don't have their own sign bit, only the
6296 // complete value does. So we use LSHR for the main operand shift in ASHR
6297 // context.
6298 unsigned MainOpcode =
6299 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6300
6301 // Perform the primary shift on the main operand
6302 Register MainShifted =
6303 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6304 .getReg(0);
6305
6306 // No carry operand available
6307 if (!CarryOperand.isValid())
6308 return MainShifted;
6309
6310 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6311 // so carry bits aren't needed.
6312 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6313 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6314 LLT BoolTy = LLT::scalar(1);
6315 auto IsZeroBitShift =
6316 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6317
6318 // Extract bits from the adjacent part that will "carry over" into this part.
6319 // The carry direction is opposite to the main shift direction, so we can
6320 // align the two shifted values before combining them with OR.
6321
6322 // Determine the carry shift opcode (opposite direction)
6323 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6324 : TargetOpcode::G_SHL;
6325
6326 // Calculate inverse shift amount: BitWidth - ShiftAmt
6327 auto TargetBitsConst =
6328 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6329 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6330
6331 // Shift the carry operand
6332 Register CarryBits =
6334 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6335 .getReg(0);
6336
6337 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6338 // TargetBits which would be poison for the individual carry shift operation).
6339 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6340 Register SafeCarryBits =
6341 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6342 .getReg(0);
6343
6344 // Combine the main shifted part with the carry bits
6345 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6346}
6347
6350 const APInt &Amt,
6351 LLT TargetTy,
6352 LLT ShiftAmtTy) {
6353 // Any wide shift can be decomposed into WordShift + BitShift components.
6354 // When shift amount is known constant, directly compute the decomposition
6355 // values and generate constant registers.
6356 Register DstReg = MI.getOperand(0).getReg();
6357 Register SrcReg = MI.getOperand(1).getReg();
6358 LLT DstTy = MRI.getType(DstReg);
6359
6360 const unsigned DstBits = DstTy.getScalarSizeInBits();
6361 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6362 const unsigned NumParts = DstBits / TargetBits;
6363
6364 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6365
6366 // When the shift amount is known at compile time, we just calculate which
6367 // source parts contribute to each output part.
6368
6369 SmallVector<Register, 8> SrcParts;
6370 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6371
6372 if (Amt.isZero()) {
6373 // No shift needed, just copy
6374 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6375 MI.eraseFromParent();
6376 return Legalized;
6377 }
6378
6379 ShiftParams Params;
6380 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6381 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6382
6383 // Generate constants and values needed by all shift types
6384 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6385 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6386 Params.InvBitShift =
6387 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6388 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6389
6390 // For ASHR, we need the sign-extended value to fill shifted-out positions
6391 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6392 Params.SignBit =
6394 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6395 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6396 .getReg(0);
6397
6398 SmallVector<Register, 8> DstParts(NumParts);
6399 for (unsigned I = 0; I < NumParts; ++I)
6400 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6401 Params, TargetTy, ShiftAmtTy);
6402
6403 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6404 MI.eraseFromParent();
6405 return Legalized;
6406}
6407
6410 Register DstReg = MI.getOperand(0).getReg();
6411 Register SrcReg = MI.getOperand(1).getReg();
6412 Register AmtReg = MI.getOperand(2).getReg();
6413 LLT DstTy = MRI.getType(DstReg);
6414 LLT ShiftAmtTy = MRI.getType(AmtReg);
6415
6416 const unsigned DstBits = DstTy.getScalarSizeInBits();
6417 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6418 const unsigned NumParts = DstBits / TargetBits;
6419
6420 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6421 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6422
6423 // If the shift amount is known at compile time, we can use direct indexing
6424 // instead of generating select chains in the general case.
6425 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6426 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6427 ShiftAmtTy);
6428
6429 // For runtime-variable shift amounts, we must generate a more complex
6430 // sequence that handles all possible shift values using select chains.
6431
6432 // Split the input into target-sized pieces
6433 SmallVector<Register, 8> SrcParts;
6434 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6435
6436 // Shifting by zero should be a no-op.
6437 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6438 LLT BoolTy = LLT::scalar(1);
6439 auto IsZeroShift =
6440 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6441
6442 // Any wide shift can be decomposed into two components:
6443 // 1. WordShift: number of complete target-sized words to shift
6444 // 2. BitShift: number of bits to shift within each word
6445 //
6446 // Example: 128-bit >> 50 with 32-bit target:
6447 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6448 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6449 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6450 auto TargetBitsLog2Const =
6451 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6452 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6453
6454 Register WordShift =
6455 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6456 Register BitShift =
6457 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6458
6459 // Fill values:
6460 // - SHL/LSHR: fill with zeros
6461 // - ASHR: fill with sign-extended MSB
6462 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6463
6464 Register FillValue;
6465 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6466 auto TargetBitsMinusOneConst =
6467 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6468 FillValue = MIRBuilder
6469 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6470 TargetBitsMinusOneConst)
6471 .getReg(0);
6472 } else {
6473 FillValue = ZeroReg;
6474 }
6475
6476 SmallVector<Register, 8> DstParts(NumParts);
6477
6478 // For each output part, generate a select chain that chooses the correct
6479 // result based on the runtime WordShift value. This handles all possible
6480 // word shift amounts by pre-calculating what each would produce.
6481 for (unsigned I = 0; I < NumParts; ++I) {
6482 // Initialize with appropriate default value for this shift type
6483 Register InBoundsResult = FillValue;
6484
6485 // clang-format off
6486 // Build a branchless select chain by pre-computing results for all possible
6487 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6488 //
6489 // K=0: select(WordShift==0, result0, FillValue)
6490 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6491 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6492 // clang-format on
6493 for (unsigned K = 0; K < NumParts; ++K) {
6494 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6495 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6496 WordShift, WordShiftKConst);
6497
6498 // Calculate source indices for this word shift
6499 //
6500 // For 4-part 128-bit value with K=1 word shift:
6501 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6502 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6503 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6504 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6505 int MainSrcIdx;
6506 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6507
6508 switch (MI.getOpcode()) {
6509 case TargetOpcode::G_SHL:
6510 MainSrcIdx = (int)I - (int)K;
6511 CarrySrcIdx = MainSrcIdx - 1;
6512 break;
6513 case TargetOpcode::G_LSHR:
6514 case TargetOpcode::G_ASHR:
6515 MainSrcIdx = (int)I + (int)K;
6516 CarrySrcIdx = MainSrcIdx + 1;
6517 break;
6518 default:
6519 llvm_unreachable("Not a shift");
6520 }
6521
6522 // Check bounds and build the result for this word shift
6523 Register ResultForK;
6524 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6525 Register MainOp = SrcParts[MainSrcIdx];
6526 Register CarryOp;
6527
6528 // Determine carry operand with bounds checking
6529 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6530 CarryOp = SrcParts[CarrySrcIdx];
6531 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6532 CarrySrcIdx >= (int)NumParts)
6533 CarryOp = FillValue; // Use sign extension
6534
6535 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6536 TargetTy, CarryOp);
6537 } else {
6538 // Out of bounds - use fill value for this k
6539 ResultForK = FillValue;
6540 }
6541
6542 // Select this result if WordShift equals k
6543 InBoundsResult =
6545 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6546 .getReg(0);
6547 }
6548
6549 // Handle zero-shift special case: if shift is 0, use original input
6550 DstParts[I] =
6552 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6553 .getReg(0);
6554 }
6555
6556 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6557 MI.eraseFromParent();
6558 return Legalized;
6559}
6560
6563 LLT MoreTy) {
6564 assert(TypeIdx == 0 && "Expecting only Idx 0");
6565
6566 Observer.changingInstr(MI);
6567 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6568 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6569 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6570 moreElementsVectorSrc(MI, MoreTy, I);
6571 }
6572
6573 MachineBasicBlock &MBB = *MI.getParent();
6574 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6575 moreElementsVectorDst(MI, MoreTy, 0);
6576 Observer.changedInstr(MI);
6577 return Legalized;
6578}
6579
6580MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6581 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6582 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6583
6584 switch (Opcode) {
6585 default:
6587 "getNeutralElementForVecReduce called with invalid opcode!");
6588 case TargetOpcode::G_VECREDUCE_ADD:
6589 case TargetOpcode::G_VECREDUCE_OR:
6590 case TargetOpcode::G_VECREDUCE_XOR:
6591 case TargetOpcode::G_VECREDUCE_UMAX:
6592 return MIRBuilder.buildConstant(Ty, 0);
6593 case TargetOpcode::G_VECREDUCE_MUL:
6594 return MIRBuilder.buildConstant(Ty, 1);
6595 case TargetOpcode::G_VECREDUCE_AND:
6596 case TargetOpcode::G_VECREDUCE_UMIN:
6598 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6599 case TargetOpcode::G_VECREDUCE_SMAX:
6601 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6602 case TargetOpcode::G_VECREDUCE_SMIN:
6604 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6605 case TargetOpcode::G_VECREDUCE_FADD:
6606 return MIRBuilder.buildFConstant(Ty, -0.0);
6607 case TargetOpcode::G_VECREDUCE_FMUL:
6608 return MIRBuilder.buildFConstant(Ty, 1.0);
6609 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6610 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6611 assert(false && "getNeutralElementForVecReduce unimplemented for "
6612 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6613 }
6614 llvm_unreachable("switch expected to return!");
6615}
6616
6619 LLT MoreTy) {
6620 unsigned Opc = MI.getOpcode();
6621 switch (Opc) {
6622 case TargetOpcode::G_IMPLICIT_DEF:
6623 case TargetOpcode::G_LOAD: {
6624 if (TypeIdx != 0)
6625 return UnableToLegalize;
6626 Observer.changingInstr(MI);
6627 moreElementsVectorDst(MI, MoreTy, 0);
6628 Observer.changedInstr(MI);
6629 return Legalized;
6630 }
6631 case TargetOpcode::G_STORE:
6632 if (TypeIdx != 0)
6633 return UnableToLegalize;
6634 Observer.changingInstr(MI);
6635 moreElementsVectorSrc(MI, MoreTy, 0);
6636 Observer.changedInstr(MI);
6637 return Legalized;
6638 case TargetOpcode::G_AND:
6639 case TargetOpcode::G_OR:
6640 case TargetOpcode::G_XOR:
6641 case TargetOpcode::G_ADD:
6642 case TargetOpcode::G_SUB:
6643 case TargetOpcode::G_MUL:
6644 case TargetOpcode::G_FADD:
6645 case TargetOpcode::G_FSUB:
6646 case TargetOpcode::G_FMUL:
6647 case TargetOpcode::G_FDIV:
6648 case TargetOpcode::G_FCOPYSIGN:
6649 case TargetOpcode::G_UADDSAT:
6650 case TargetOpcode::G_USUBSAT:
6651 case TargetOpcode::G_SADDSAT:
6652 case TargetOpcode::G_SSUBSAT:
6653 case TargetOpcode::G_SMIN:
6654 case TargetOpcode::G_SMAX:
6655 case TargetOpcode::G_UMIN:
6656 case TargetOpcode::G_UMAX:
6657 case TargetOpcode::G_FMINNUM:
6658 case TargetOpcode::G_FMAXNUM:
6659 case TargetOpcode::G_FMINNUM_IEEE:
6660 case TargetOpcode::G_FMAXNUM_IEEE:
6661 case TargetOpcode::G_FMINIMUM:
6662 case TargetOpcode::G_FMAXIMUM:
6663 case TargetOpcode::G_FMINIMUMNUM:
6664 case TargetOpcode::G_FMAXIMUMNUM:
6665 case TargetOpcode::G_STRICT_FADD:
6666 case TargetOpcode::G_STRICT_FSUB:
6667 case TargetOpcode::G_STRICT_FMUL:
6668 case TargetOpcode::G_SHL:
6669 case TargetOpcode::G_ASHR:
6670 case TargetOpcode::G_LSHR: {
6671 Observer.changingInstr(MI);
6672 moreElementsVectorSrc(MI, MoreTy, 1);
6673 moreElementsVectorSrc(MI, MoreTy, 2);
6674 moreElementsVectorDst(MI, MoreTy, 0);
6675 Observer.changedInstr(MI);
6676 return Legalized;
6677 }
6678 case TargetOpcode::G_FMA:
6679 case TargetOpcode::G_STRICT_FMA:
6680 case TargetOpcode::G_FSHR:
6681 case TargetOpcode::G_FSHL: {
6682 Observer.changingInstr(MI);
6683 moreElementsVectorSrc(MI, MoreTy, 1);
6684 moreElementsVectorSrc(MI, MoreTy, 2);
6685 moreElementsVectorSrc(MI, MoreTy, 3);
6686 moreElementsVectorDst(MI, MoreTy, 0);
6687 Observer.changedInstr(MI);
6688 return Legalized;
6689 }
6690 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6691 case TargetOpcode::G_EXTRACT:
6692 if (TypeIdx != 1)
6693 return UnableToLegalize;
6694 Observer.changingInstr(MI);
6695 moreElementsVectorSrc(MI, MoreTy, 1);
6696 Observer.changedInstr(MI);
6697 return Legalized;
6698 case TargetOpcode::G_INSERT:
6699 case TargetOpcode::G_INSERT_VECTOR_ELT:
6700 case TargetOpcode::G_FREEZE:
6701 case TargetOpcode::G_FNEG:
6702 case TargetOpcode::G_FABS:
6703 case TargetOpcode::G_FSQRT:
6704 case TargetOpcode::G_FCEIL:
6705 case TargetOpcode::G_FFLOOR:
6706 case TargetOpcode::G_FNEARBYINT:
6707 case TargetOpcode::G_FRINT:
6708 case TargetOpcode::G_INTRINSIC_ROUND:
6709 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6710 case TargetOpcode::G_INTRINSIC_TRUNC:
6711 case TargetOpcode::G_BITREVERSE:
6712 case TargetOpcode::G_BSWAP:
6713 case TargetOpcode::G_FCANONICALIZE:
6714 case TargetOpcode::G_SEXT_INREG:
6715 case TargetOpcode::G_ABS:
6716 case TargetOpcode::G_CTLZ:
6717 case TargetOpcode::G_CTPOP:
6718 if (TypeIdx != 0)
6719 return UnableToLegalize;
6720 Observer.changingInstr(MI);
6721 moreElementsVectorSrc(MI, MoreTy, 1);
6722 moreElementsVectorDst(MI, MoreTy, 0);
6723 Observer.changedInstr(MI);
6724 return Legalized;
6725 case TargetOpcode::G_SELECT: {
6726 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6727 if (TypeIdx == 1) {
6728 if (!CondTy.isScalar() ||
6729 DstTy.getElementCount() != MoreTy.getElementCount())
6730 return UnableToLegalize;
6731
6732 // This is turning a scalar select of vectors into a vector
6733 // select. Broadcast the select condition.
6734 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6735 Observer.changingInstr(MI);
6736 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6737 Observer.changedInstr(MI);
6738 return Legalized;
6739 }
6740
6741 if (CondTy.isVector())
6742 return UnableToLegalize;
6743
6744 Observer.changingInstr(MI);
6745 moreElementsVectorSrc(MI, MoreTy, 2);
6746 moreElementsVectorSrc(MI, MoreTy, 3);
6747 moreElementsVectorDst(MI, MoreTy, 0);
6748 Observer.changedInstr(MI);
6749 return Legalized;
6750 }
6751 case TargetOpcode::G_UNMERGE_VALUES:
6752 return UnableToLegalize;
6753 case TargetOpcode::G_PHI:
6754 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6755 case TargetOpcode::G_SHUFFLE_VECTOR:
6756 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6757 case TargetOpcode::G_BUILD_VECTOR: {
6759 for (auto Op : MI.uses()) {
6760 Elts.push_back(Op.getReg());
6761 }
6762
6763 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6764 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6765 }
6766
6767 MIRBuilder.buildDeleteTrailingVectorElements(
6768 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6769 MI.eraseFromParent();
6770 return Legalized;
6771 }
6772 case TargetOpcode::G_SEXT:
6773 case TargetOpcode::G_ZEXT:
6774 case TargetOpcode::G_ANYEXT:
6775 case TargetOpcode::G_TRUNC:
6776 case TargetOpcode::G_FPTRUNC:
6777 case TargetOpcode::G_FPEXT:
6778 case TargetOpcode::G_FPTOSI:
6779 case TargetOpcode::G_FPTOUI:
6780 case TargetOpcode::G_FPTOSI_SAT:
6781 case TargetOpcode::G_FPTOUI_SAT:
6782 case TargetOpcode::G_SITOFP:
6783 case TargetOpcode::G_UITOFP: {
6784 Observer.changingInstr(MI);
6785 LLT SrcExtTy;
6786 LLT DstExtTy;
6787 if (TypeIdx == 0) {
6788 DstExtTy = MoreTy;
6789 SrcExtTy = LLT::fixed_vector(
6790 MoreTy.getNumElements(),
6791 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6792 } else {
6793 DstExtTy = LLT::fixed_vector(
6794 MoreTy.getNumElements(),
6795 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6796 SrcExtTy = MoreTy;
6797 }
6798 moreElementsVectorSrc(MI, SrcExtTy, 1);
6799 moreElementsVectorDst(MI, DstExtTy, 0);
6800 Observer.changedInstr(MI);
6801 return Legalized;
6802 }
6803 case TargetOpcode::G_ICMP:
6804 case TargetOpcode::G_FCMP: {
6805 if (TypeIdx != 1)
6806 return UnableToLegalize;
6807
6808 Observer.changingInstr(MI);
6809 moreElementsVectorSrc(MI, MoreTy, 2);
6810 moreElementsVectorSrc(MI, MoreTy, 3);
6811 LLT CondTy = LLT::fixed_vector(
6812 MoreTy.getNumElements(),
6813 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6814 moreElementsVectorDst(MI, CondTy, 0);
6815 Observer.changedInstr(MI);
6816 return Legalized;
6817 }
6818 case TargetOpcode::G_BITCAST: {
6819 if (TypeIdx != 0)
6820 return UnableToLegalize;
6821
6822 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6823 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6824
6825 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6826 if (coefficient % DstTy.getNumElements() != 0)
6827 return UnableToLegalize;
6828
6829 coefficient = coefficient / DstTy.getNumElements();
6830
6831 LLT NewTy = SrcTy.changeElementCount(
6832 ElementCount::get(coefficient, MoreTy.isScalable()));
6833 Observer.changingInstr(MI);
6834 moreElementsVectorSrc(MI, NewTy, 1);
6835 moreElementsVectorDst(MI, MoreTy, 0);
6836 Observer.changedInstr(MI);
6837 return Legalized;
6838 }
6839 case TargetOpcode::G_VECREDUCE_FADD:
6840 case TargetOpcode::G_VECREDUCE_FMUL:
6841 case TargetOpcode::G_VECREDUCE_ADD:
6842 case TargetOpcode::G_VECREDUCE_MUL:
6843 case TargetOpcode::G_VECREDUCE_AND:
6844 case TargetOpcode::G_VECREDUCE_OR:
6845 case TargetOpcode::G_VECREDUCE_XOR:
6846 case TargetOpcode::G_VECREDUCE_SMAX:
6847 case TargetOpcode::G_VECREDUCE_SMIN:
6848 case TargetOpcode::G_VECREDUCE_UMAX:
6849 case TargetOpcode::G_VECREDUCE_UMIN: {
6850 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6851 MachineOperand &MO = MI.getOperand(1);
6852 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6853 auto NeutralElement = getNeutralElementForVecReduce(
6854 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6855
6856 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6857 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6858 i != e; i++) {
6859 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6860 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6861 NeutralElement, Idx);
6862 }
6863
6864 Observer.changingInstr(MI);
6865 MO.setReg(NewVec.getReg(0));
6866 Observer.changedInstr(MI);
6867 return Legalized;
6868 }
6869
6870 default:
6871 return UnableToLegalize;
6872 }
6873}
6874
6877 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6878 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6879 unsigned MaskNumElts = Mask.size();
6880 unsigned SrcNumElts = SrcTy.getNumElements();
6881 LLT DestEltTy = DstTy.getElementType();
6882
6883 if (MaskNumElts == SrcNumElts)
6884 return Legalized;
6885
6886 if (MaskNumElts < SrcNumElts) {
6887 // Extend mask to match new destination vector size with
6888 // undef values.
6889 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6890 llvm::copy(Mask, NewMask.begin());
6891
6892 moreElementsVectorDst(MI, SrcTy, 0);
6893 MIRBuilder.setInstrAndDebugLoc(MI);
6894 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6895 MI.getOperand(1).getReg(),
6896 MI.getOperand(2).getReg(), NewMask);
6897 MI.eraseFromParent();
6898
6899 return Legalized;
6900 }
6901
6902 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6903 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6904 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6905
6906 // Create new source vectors by concatenating the initial
6907 // source vectors with undefined vectors of the same size.
6908 auto Undef = MIRBuilder.buildUndef(SrcTy);
6909 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6910 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6911 MOps1[0] = MI.getOperand(1).getReg();
6912 MOps2[0] = MI.getOperand(2).getReg();
6913
6914 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6915 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6916
6917 // Readjust mask for new input vector length.
6918 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6919 for (unsigned I = 0; I != MaskNumElts; ++I) {
6920 int Idx = Mask[I];
6921 if (Idx >= static_cast<int>(SrcNumElts))
6922 Idx += PaddedMaskNumElts - SrcNumElts;
6923 MappedOps[I] = Idx;
6924 }
6925
6926 // If we got more elements than required, extract subvector.
6927 if (MaskNumElts != PaddedMaskNumElts) {
6928 auto Shuffle =
6929 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6930
6931 SmallVector<Register, 16> Elts(MaskNumElts);
6932 for (unsigned I = 0; I < MaskNumElts; ++I) {
6933 Elts[I] =
6934 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6935 .getReg(0);
6936 }
6937 MIRBuilder.buildBuildVector(DstReg, Elts);
6938 } else {
6939 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6940 }
6941
6942 MI.eraseFromParent();
6944}
6945
6948 unsigned int TypeIdx, LLT MoreTy) {
6949 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6950 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6951 unsigned NumElts = DstTy.getNumElements();
6952 unsigned WidenNumElts = MoreTy.getNumElements();
6953
6954 if (DstTy.isVector() && Src1Ty.isVector() &&
6955 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6957 }
6958
6959 if (TypeIdx != 0)
6960 return UnableToLegalize;
6961
6962 // Expect a canonicalized shuffle.
6963 if (DstTy != Src1Ty || DstTy != Src2Ty)
6964 return UnableToLegalize;
6965
6966 moreElementsVectorSrc(MI, MoreTy, 1);
6967 moreElementsVectorSrc(MI, MoreTy, 2);
6968
6969 // Adjust mask based on new input vector length.
6970 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6971 for (unsigned I = 0; I != NumElts; ++I) {
6972 int Idx = Mask[I];
6973 if (Idx < static_cast<int>(NumElts))
6974 NewMask[I] = Idx;
6975 else
6976 NewMask[I] = Idx - NumElts + WidenNumElts;
6977 }
6978 moreElementsVectorDst(MI, MoreTy, 0);
6979 MIRBuilder.setInstrAndDebugLoc(MI);
6980 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6981 MI.getOperand(1).getReg(),
6982 MI.getOperand(2).getReg(), NewMask);
6983 MI.eraseFromParent();
6984 return Legalized;
6985}
6986
6987void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6988 ArrayRef<Register> Src1Regs,
6989 ArrayRef<Register> Src2Regs,
6990 LLT NarrowTy) {
6992 unsigned SrcParts = Src1Regs.size();
6993 unsigned DstParts = DstRegs.size();
6994
6995 unsigned DstIdx = 0; // Low bits of the result.
6996 Register FactorSum =
6997 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6998 DstRegs[DstIdx] = FactorSum;
6999
7000 Register CarrySumPrevDstIdx;
7002
7003 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7004 // Collect low parts of muls for DstIdx.
7005 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7006 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7008 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7009 Factors.push_back(Mul.getReg(0));
7010 }
7011 // Collect high parts of muls from previous DstIdx.
7012 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7013 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7014 MachineInstrBuilder Umulh =
7015 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7016 Factors.push_back(Umulh.getReg(0));
7017 }
7018 // Add CarrySum from additions calculated for previous DstIdx.
7019 if (DstIdx != 1) {
7020 Factors.push_back(CarrySumPrevDstIdx);
7021 }
7022
7023 Register CarrySum;
7024 // Add all factors and accumulate all carries into CarrySum.
7025 if (DstIdx != DstParts - 1) {
7026 MachineInstrBuilder Uaddo =
7027 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7028 FactorSum = Uaddo.getReg(0);
7029 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7030 for (unsigned i = 2; i < Factors.size(); ++i) {
7031 MachineInstrBuilder Uaddo =
7032 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7033 FactorSum = Uaddo.getReg(0);
7034 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7035 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7036 }
7037 } else {
7038 // Since value for the next index is not calculated, neither is CarrySum.
7039 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7040 for (unsigned i = 2; i < Factors.size(); ++i)
7041 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7042 }
7043
7044 CarrySumPrevDstIdx = CarrySum;
7045 DstRegs[DstIdx] = FactorSum;
7046 Factors.clear();
7047 }
7048}
7049
7052 LLT NarrowTy) {
7053 if (TypeIdx != 0)
7054 return UnableToLegalize;
7055
7056 Register DstReg = MI.getOperand(0).getReg();
7057 LLT DstType = MRI.getType(DstReg);
7058 // FIXME: add support for vector types
7059 if (DstType.isVector())
7060 return UnableToLegalize;
7061
7062 unsigned Opcode = MI.getOpcode();
7063 unsigned OpO, OpE, OpF;
7064 switch (Opcode) {
7065 case TargetOpcode::G_SADDO:
7066 case TargetOpcode::G_SADDE:
7067 case TargetOpcode::G_UADDO:
7068 case TargetOpcode::G_UADDE:
7069 case TargetOpcode::G_ADD:
7070 OpO = TargetOpcode::G_UADDO;
7071 OpE = TargetOpcode::G_UADDE;
7072 OpF = TargetOpcode::G_UADDE;
7073 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7074 OpF = TargetOpcode::G_SADDE;
7075 break;
7076 case TargetOpcode::G_SSUBO:
7077 case TargetOpcode::G_SSUBE:
7078 case TargetOpcode::G_USUBO:
7079 case TargetOpcode::G_USUBE:
7080 case TargetOpcode::G_SUB:
7081 OpO = TargetOpcode::G_USUBO;
7082 OpE = TargetOpcode::G_USUBE;
7083 OpF = TargetOpcode::G_USUBE;
7084 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7085 OpF = TargetOpcode::G_SSUBE;
7086 break;
7087 default:
7088 llvm_unreachable("Unexpected add/sub opcode!");
7089 }
7090
7091 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7092 unsigned NumDefs = MI.getNumExplicitDefs();
7093 Register Src1 = MI.getOperand(NumDefs).getReg();
7094 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7095 Register CarryDst, CarryIn;
7096 if (NumDefs == 2)
7097 CarryDst = MI.getOperand(1).getReg();
7098 if (MI.getNumOperands() == NumDefs + 3)
7099 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7100
7101 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7102 LLT LeftoverTy, DummyTy;
7103 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7104 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7105 MIRBuilder, MRI);
7106 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7107 MRI);
7108
7109 int NarrowParts = Src1Regs.size();
7110 Src1Regs.append(Src1Left);
7111 Src2Regs.append(Src2Left);
7112 DstRegs.reserve(Src1Regs.size());
7113
7114 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7115 Register DstReg =
7116 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7117 Register CarryOut;
7118 // Forward the final carry-out to the destination register
7119 if (i == e - 1 && CarryDst)
7120 CarryOut = CarryDst;
7121 else
7122 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7123
7124 if (!CarryIn) {
7125 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7126 {Src1Regs[i], Src2Regs[i]});
7127 } else if (i == e - 1) {
7128 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7129 {Src1Regs[i], Src2Regs[i], CarryIn});
7130 } else {
7131 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7132 {Src1Regs[i], Src2Regs[i], CarryIn});
7133 }
7134
7135 DstRegs.push_back(DstReg);
7136 CarryIn = CarryOut;
7137 }
7138 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7139 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7140 ArrayRef(DstRegs).drop_front(NarrowParts));
7141
7142 MI.eraseFromParent();
7143 return Legalized;
7144}
7145
7148 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7149
7150 LLT Ty = MRI.getType(DstReg);
7151 if (Ty.isVector())
7152 return UnableToLegalize;
7153
7154 unsigned Size = Ty.getSizeInBits();
7155 unsigned NarrowSize = NarrowTy.getSizeInBits();
7156 if (Size % NarrowSize != 0)
7157 return UnableToLegalize;
7158
7159 unsigned NumParts = Size / NarrowSize;
7160 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7161 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7162
7163 SmallVector<Register, 2> Src1Parts, Src2Parts;
7164 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7165 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7166 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7167 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7168
7169 // Take only high half of registers if this is high mul.
7170 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7171 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7172 MI.eraseFromParent();
7173 return Legalized;
7174}
7175
7178 LLT NarrowTy) {
7179 if (TypeIdx != 0)
7180 return UnableToLegalize;
7181
7182 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7183
7184 Register Src = MI.getOperand(1).getReg();
7185 LLT SrcTy = MRI.getType(Src);
7186
7187 // If all finite floats fit into the narrowed integer type, we can just swap
7188 // out the result type. This is practically only useful for conversions from
7189 // half to at least 16-bits, so just handle the one case.
7190 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7191 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7192 return UnableToLegalize;
7193
7194 Observer.changingInstr(MI);
7195 narrowScalarDst(MI, NarrowTy, 0,
7196 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7197 Observer.changedInstr(MI);
7198 return Legalized;
7199}
7200
7203 LLT NarrowTy) {
7204 if (TypeIdx != 1)
7205 return UnableToLegalize;
7206
7207 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7208
7209 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7210 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7211 // NarrowSize.
7212 if (SizeOp1 % NarrowSize != 0)
7213 return UnableToLegalize;
7214 int NumParts = SizeOp1 / NarrowSize;
7215
7216 SmallVector<Register, 2> SrcRegs, DstRegs;
7217 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7218 MIRBuilder, MRI);
7219
7220 Register OpReg = MI.getOperand(0).getReg();
7221 uint64_t OpStart = MI.getOperand(2).getImm();
7222 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7223 for (int i = 0; i < NumParts; ++i) {
7224 unsigned SrcStart = i * NarrowSize;
7225
7226 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7227 // No part of the extract uses this subregister, ignore it.
7228 continue;
7229 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7230 // The entire subregister is extracted, forward the value.
7231 DstRegs.push_back(SrcRegs[i]);
7232 continue;
7233 }
7234
7235 // OpSegStart is where this destination segment would start in OpReg if it
7236 // extended infinitely in both directions.
7237 int64_t ExtractOffset;
7238 uint64_t SegSize;
7239 if (OpStart < SrcStart) {
7240 ExtractOffset = 0;
7241 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7242 } else {
7243 ExtractOffset = OpStart - SrcStart;
7244 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7245 }
7246
7247 Register SegReg = SrcRegs[i];
7248 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7249 // A genuine extract is needed.
7250 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7251 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7252 }
7253
7254 DstRegs.push_back(SegReg);
7255 }
7256
7257 Register DstReg = MI.getOperand(0).getReg();
7258 if (MRI.getType(DstReg).isVector())
7259 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7260 else if (DstRegs.size() > 1)
7261 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7262 else
7263 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7264 MI.eraseFromParent();
7265 return Legalized;
7266}
7267
7270 LLT NarrowTy) {
7271 // FIXME: Don't know how to handle secondary types yet.
7272 if (TypeIdx != 0)
7273 return UnableToLegalize;
7274
7275 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7276 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7277 LLT LeftoverTy;
7278 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7279 LeftoverRegs, MIRBuilder, MRI);
7280
7281 SrcRegs.append(LeftoverRegs);
7282
7283 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7284 Register OpReg = MI.getOperand(2).getReg();
7285 uint64_t OpStart = MI.getOperand(3).getImm();
7286 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7287 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7288 unsigned DstStart = I * NarrowSize;
7289
7290 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7291 // The entire subregister is defined by this insert, forward the new
7292 // value.
7293 DstRegs.push_back(OpReg);
7294 continue;
7295 }
7296
7297 Register SrcReg = SrcRegs[I];
7298 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7299 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7300 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7301 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7302 }
7303
7304 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7305 // No part of the insert affects this subregister, forward the original.
7306 DstRegs.push_back(SrcReg);
7307 continue;
7308 }
7309
7310 // OpSegStart is where this destination segment would start in OpReg if it
7311 // extended infinitely in both directions.
7312 int64_t ExtractOffset, InsertOffset;
7313 uint64_t SegSize;
7314 if (OpStart < DstStart) {
7315 InsertOffset = 0;
7316 ExtractOffset = DstStart - OpStart;
7317 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7318 } else {
7319 InsertOffset = OpStart - DstStart;
7320 ExtractOffset = 0;
7321 SegSize =
7322 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7323 }
7324
7325 Register SegReg = OpReg;
7326 if (ExtractOffset != 0 || SegSize != OpSize) {
7327 // A genuine extract is needed.
7328 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7329 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7330 }
7331
7332 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7333 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7334 DstRegs.push_back(DstReg);
7335 }
7336
7337 uint64_t WideSize = DstRegs.size() * NarrowSize;
7338 Register DstReg = MI.getOperand(0).getReg();
7339 if (WideSize > RegTy.getSizeInBits()) {
7340 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7341 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7342 MIRBuilder.buildTrunc(DstReg, MergeReg);
7343 } else
7344 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7345
7346 MI.eraseFromParent();
7347 return Legalized;
7348}
7349
7352 LLT NarrowTy) {
7353 Register DstReg = MI.getOperand(0).getReg();
7354 LLT DstTy = MRI.getType(DstReg);
7355
7356 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7357
7358 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7359 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7360 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7361 LLT LeftoverTy;
7362 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7363 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7364 return UnableToLegalize;
7365
7366 LLT Unused;
7367 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7368 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7369 llvm_unreachable("inconsistent extractParts result");
7370
7371 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7372 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7373 {Src0Regs[I], Src1Regs[I]});
7374 DstRegs.push_back(Inst.getReg(0));
7375 }
7376
7377 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7378 auto Inst = MIRBuilder.buildInstr(
7379 MI.getOpcode(),
7380 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7381 DstLeftoverRegs.push_back(Inst.getReg(0));
7382 }
7383
7384 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7385 LeftoverTy, DstLeftoverRegs);
7386
7387 MI.eraseFromParent();
7388 return Legalized;
7389}
7390
7393 LLT NarrowTy) {
7394 if (TypeIdx != 0)
7395 return UnableToLegalize;
7396
7397 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7398
7399 LLT DstTy = MRI.getType(DstReg);
7400 if (DstTy.isVector())
7401 return UnableToLegalize;
7402
7404 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7405 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7406 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7407
7408 MI.eraseFromParent();
7409 return Legalized;
7410}
7411
7414 LLT NarrowTy) {
7415 if (TypeIdx != 0)
7416 return UnableToLegalize;
7417
7418 Register CondReg = MI.getOperand(1).getReg();
7419 LLT CondTy = MRI.getType(CondReg);
7420 if (CondTy.isVector()) // TODO: Handle vselect
7421 return UnableToLegalize;
7422
7423 Register DstReg = MI.getOperand(0).getReg();
7424 LLT DstTy = MRI.getType(DstReg);
7425
7426 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7427 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7428 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7429 LLT LeftoverTy;
7430 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7431 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7432 return UnableToLegalize;
7433
7434 LLT Unused;
7435 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7436 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7437 llvm_unreachable("inconsistent extractParts result");
7438
7439 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7440 auto Select = MIRBuilder.buildSelect(NarrowTy,
7441 CondReg, Src1Regs[I], Src2Regs[I]);
7442 DstRegs.push_back(Select.getReg(0));
7443 }
7444
7445 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7446 auto Select = MIRBuilder.buildSelect(
7447 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7448 DstLeftoverRegs.push_back(Select.getReg(0));
7449 }
7450
7451 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7452 LeftoverTy, DstLeftoverRegs);
7453
7454 MI.eraseFromParent();
7455 return Legalized;
7456}
7457
7460 LLT NarrowTy) {
7461 if (TypeIdx != 1)
7462 return UnableToLegalize;
7463
7464 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7465 unsigned NarrowSize = NarrowTy.getSizeInBits();
7466
7467 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7468 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7469
7471 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7472 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7473 auto C_0 = B.buildConstant(NarrowTy, 0);
7474 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7475 UnmergeSrc.getReg(1), C_0);
7476 auto LoCTLZ = IsUndef ?
7477 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7478 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7479 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7480 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7481 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7482 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7483
7484 MI.eraseFromParent();
7485 return Legalized;
7486 }
7487
7488 return UnableToLegalize;
7489}
7490
7493 LLT NarrowTy) {
7494 if (TypeIdx != 1)
7495 return UnableToLegalize;
7496
7497 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7498 unsigned NarrowSize = NarrowTy.getSizeInBits();
7499
7500 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7501 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7502
7504 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7505 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7506 auto C_0 = B.buildConstant(NarrowTy, 0);
7507 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7508 UnmergeSrc.getReg(0), C_0);
7509 auto HiCTTZ = IsUndef ?
7510 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7511 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7512 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7513 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7514 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7515 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7516
7517 MI.eraseFromParent();
7518 return Legalized;
7519 }
7520
7521 return UnableToLegalize;
7522}
7523
7526 LLT NarrowTy) {
7527 if (TypeIdx != 1)
7528 return UnableToLegalize;
7529
7530 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7531 unsigned NarrowSize = NarrowTy.getSizeInBits();
7532
7533 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7534 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7535
7536 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7537 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7538 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7539
7540 MI.eraseFromParent();
7541 return Legalized;
7542 }
7543
7544 return UnableToLegalize;
7545}
7546
7549 LLT NarrowTy) {
7550 if (TypeIdx != 1)
7551 return UnableToLegalize;
7552
7554 Register ExpReg = MI.getOperand(2).getReg();
7555 LLT ExpTy = MRI.getType(ExpReg);
7556
7557 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7558
7559 // Clamp the exponent to the range of the target type.
7560 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7561 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7562 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7563 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7564
7565 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7566 Observer.changingInstr(MI);
7567 MI.getOperand(2).setReg(Trunc.getReg(0));
7568 Observer.changedInstr(MI);
7569 return Legalized;
7570}
7571
7574 unsigned Opc = MI.getOpcode();
7575 const auto &TII = MIRBuilder.getTII();
7576 auto isSupported = [this](const LegalityQuery &Q) {
7577 auto QAction = LI.getAction(Q).Action;
7578 return QAction == Legal || QAction == Libcall || QAction == Custom;
7579 };
7580 switch (Opc) {
7581 default:
7582 return UnableToLegalize;
7583 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7584 // This trivially expands to CTLZ.
7585 Observer.changingInstr(MI);
7586 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7587 Observer.changedInstr(MI);
7588 return Legalized;
7589 }
7590 case TargetOpcode::G_CTLZ: {
7591 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7592 unsigned Len = SrcTy.getSizeInBits();
7593
7594 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7595 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7596 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7597 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7598 auto ICmp = MIRBuilder.buildICmp(
7599 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7600 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7601 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7602 MI.eraseFromParent();
7603 return Legalized;
7604 }
7605 // for now, we do this:
7606 // NewLen = NextPowerOf2(Len);
7607 // x = x | (x >> 1);
7608 // x = x | (x >> 2);
7609 // ...
7610 // x = x | (x >>16);
7611 // x = x | (x >>32); // for 64-bit input
7612 // Upto NewLen/2
7613 // return Len - popcount(x);
7614 //
7615 // Ref: "Hacker's Delight" by Henry Warren
7616 Register Op = SrcReg;
7617 unsigned NewLen = PowerOf2Ceil(Len);
7618 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7619 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7620 auto MIBOp = MIRBuilder.buildOr(
7621 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7622 Op = MIBOp.getReg(0);
7623 }
7624 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7625 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7626 MIBPop);
7627 MI.eraseFromParent();
7628 return Legalized;
7629 }
7630 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7631 // This trivially expands to CTTZ.
7632 Observer.changingInstr(MI);
7633 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7634 Observer.changedInstr(MI);
7635 return Legalized;
7636 }
7637 case TargetOpcode::G_CTTZ: {
7638 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7639
7640 unsigned Len = SrcTy.getSizeInBits();
7641 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7642 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7643 // zero.
7644 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7645 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7646 auto ICmp = MIRBuilder.buildICmp(
7647 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7648 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7649 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7650 MI.eraseFromParent();
7651 return Legalized;
7652 }
7653 // for now, we use: { return popcount(~x & (x - 1)); }
7654 // unless the target has ctlz but not ctpop, in which case we use:
7655 // { return 32 - nlz(~x & (x-1)); }
7656 // Ref: "Hacker's Delight" by Henry Warren
7657 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7658 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7659 auto MIBTmp = MIRBuilder.buildAnd(
7660 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7661 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7662 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7663 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7664 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7665 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7666 MI.eraseFromParent();
7667 return Legalized;
7668 }
7669 Observer.changingInstr(MI);
7670 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7671 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7672 Observer.changedInstr(MI);
7673 return Legalized;
7674 }
7675 case TargetOpcode::G_CTPOP: {
7676 Register SrcReg = MI.getOperand(1).getReg();
7677 LLT Ty = MRI.getType(SrcReg);
7678 unsigned Size = Ty.getSizeInBits();
7680
7681 // Count set bits in blocks of 2 bits. Default approach would be
7682 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7683 // We use following formula instead:
7684 // B2Count = val - { (val >> 1) & 0x55555555 }
7685 // since it gives same result in blocks of 2 with one instruction less.
7686 auto C_1 = B.buildConstant(Ty, 1);
7687 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7688 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7689 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7690 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7691 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7692
7693 // In order to get count in blocks of 4 add values from adjacent block of 2.
7694 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7695 auto C_2 = B.buildConstant(Ty, 2);
7696 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7697 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7698 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7699 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7700 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7701 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7702
7703 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7704 // addition since count value sits in range {0,...,8} and 4 bits are enough
7705 // to hold such binary values. After addition high 4 bits still hold count
7706 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7707 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7708 auto C_4 = B.buildConstant(Ty, 4);
7709 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7710 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7711 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7712 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7713 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7714
7715 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7716 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7717 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7718 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7719
7720 // Shift count result from 8 high bits to low bits.
7721 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7722
7723 auto IsMulSupported = [this](const LLT Ty) {
7724 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7725 return Action == Legal || Action == WidenScalar || Action == Custom;
7726 };
7727 if (IsMulSupported(Ty)) {
7728 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7729 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7730 } else {
7731 auto ResTmp = B8Count;
7732 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7733 auto ShiftC = B.buildConstant(Ty, Shift);
7734 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7735 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7736 }
7737 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7738 }
7739 MI.eraseFromParent();
7740 return Legalized;
7741 }
7742 }
7743}
7744
7745// Check that (every element of) Reg is undef or not an exact multiple of BW.
7747 Register Reg, unsigned BW) {
7748 return matchUnaryPredicate(
7749 MRI, Reg,
7750 [=](const Constant *C) {
7751 // Null constant here means an undef.
7753 return !CI || CI->getValue().urem(BW) != 0;
7754 },
7755 /*AllowUndefs*/ true);
7756}
7757
7760 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7761 LLT Ty = MRI.getType(Dst);
7762 LLT ShTy = MRI.getType(Z);
7763
7764 unsigned BW = Ty.getScalarSizeInBits();
7765
7766 if (!isPowerOf2_32(BW))
7767 return UnableToLegalize;
7768
7769 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7770 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7771
7772 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7773 // fshl X, Y, Z -> fshr X, Y, -Z
7774 // fshr X, Y, Z -> fshl X, Y, -Z
7775 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7776 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7777 } else {
7778 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7779 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7780 auto One = MIRBuilder.buildConstant(ShTy, 1);
7781 if (IsFSHL) {
7782 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7783 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7784 } else {
7785 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7786 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7787 }
7788
7789 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7790 }
7791
7792 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7793 MI.eraseFromParent();
7794 return Legalized;
7795}
7796
7799 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7800 LLT Ty = MRI.getType(Dst);
7801 LLT ShTy = MRI.getType(Z);
7802
7803 const unsigned BW = Ty.getScalarSizeInBits();
7804 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7805
7806 Register ShX, ShY;
7807 Register ShAmt, InvShAmt;
7808
7809 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7810 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7811 // fshl: X << C | Y >> (BW - C)
7812 // fshr: X << (BW - C) | Y >> C
7813 // where C = Z % BW is not zero
7814 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7815 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7816 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7817 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7818 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7819 } else {
7820 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7821 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7822 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7823 if (isPowerOf2_32(BW)) {
7824 // Z % BW -> Z & (BW - 1)
7825 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7826 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7827 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7828 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7829 } else {
7830 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7831 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7832 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7833 }
7834
7835 auto One = MIRBuilder.buildConstant(ShTy, 1);
7836 if (IsFSHL) {
7837 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7838 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7839 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7840 } else {
7841 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7842 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7843 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7844 }
7845 }
7846
7847 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7848 MI.eraseFromParent();
7849 return Legalized;
7850}
7851
7854 // These operations approximately do the following (while avoiding undefined
7855 // shifts by BW):
7856 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7857 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7858 Register Dst = MI.getOperand(0).getReg();
7859 LLT Ty = MRI.getType(Dst);
7860 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7861
7862 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7863 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7864
7865 // TODO: Use smarter heuristic that accounts for vector legalization.
7866 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7867 return lowerFunnelShiftAsShifts(MI);
7868
7869 // This only works for powers of 2, fallback to shifts if it fails.
7870 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7871 if (Result == UnableToLegalize)
7872 return lowerFunnelShiftAsShifts(MI);
7873 return Result;
7874}
7875
7877 auto [Dst, Src] = MI.getFirst2Regs();
7878 LLT DstTy = MRI.getType(Dst);
7879 LLT SrcTy = MRI.getType(Src);
7880
7881 uint32_t DstTySize = DstTy.getSizeInBits();
7882 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7883 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7884
7885 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7886 !isPowerOf2_32(SrcTyScalarSize))
7887 return UnableToLegalize;
7888
7889 // The step between extend is too large, split it by creating an intermediate
7890 // extend instruction
7891 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7892 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7893 // If the destination type is illegal, split it into multiple statements
7894 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7895 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7896 // Unmerge the vector
7897 LLT EltTy = MidTy.changeElementCount(
7899 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7900
7901 // ZExt the vectors
7902 LLT ZExtResTy = DstTy.changeElementCount(
7904 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7905 {UnmergeSrc.getReg(0)});
7906 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7907 {UnmergeSrc.getReg(1)});
7908
7909 // Merge the ending vectors
7910 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7911
7912 MI.eraseFromParent();
7913 return Legalized;
7914 }
7915 return UnableToLegalize;
7916}
7917
7919 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7920 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7921 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7922 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7923 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7924 // %lo16(<4 x s16>) = G_TRUNC %inlo
7925 // %hi16(<4 x s16>) = G_TRUNC %inhi
7926 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7927 // %res(<8 x s8>) = G_TRUNC %in16
7928
7929 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7930
7931 Register DstReg = MI.getOperand(0).getReg();
7932 Register SrcReg = MI.getOperand(1).getReg();
7933 LLT DstTy = MRI.getType(DstReg);
7934 LLT SrcTy = MRI.getType(SrcReg);
7935
7936 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7938 isPowerOf2_32(SrcTy.getNumElements()) &&
7939 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7940 // Split input type.
7941 LLT SplitSrcTy = SrcTy.changeElementCount(
7942 SrcTy.getElementCount().divideCoefficientBy(2));
7943
7944 // First, split the source into two smaller vectors.
7945 SmallVector<Register, 2> SplitSrcs;
7946 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7947
7948 // Truncate the splits into intermediate narrower elements.
7949 LLT InterTy;
7950 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7951 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7952 else
7953 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7954 for (Register &Src : SplitSrcs)
7955 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7956
7957 // Combine the new truncates into one vector
7958 auto Merge = MIRBuilder.buildMergeLikeInstr(
7959 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7960
7961 // Truncate the new vector to the final result type
7962 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7963 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7964 else
7965 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7966
7967 MI.eraseFromParent();
7968
7969 return Legalized;
7970 }
7971 return UnableToLegalize;
7972}
7973
7976 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7977 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7978 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7979 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7980 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7981 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7982 MI.eraseFromParent();
7983 return Legalized;
7984}
7985
7987 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7988
7989 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7990 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7991
7992 MIRBuilder.setInstrAndDebugLoc(MI);
7993
7994 // If a rotate in the other direction is supported, use it.
7995 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7996 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7997 isPowerOf2_32(EltSizeInBits))
7998 return lowerRotateWithReverseRotate(MI);
7999
8000 // If a funnel shift is supported, use it.
8001 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8002 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8003 bool IsFShLegal = false;
8004 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8005 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8006 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8007 Register R3) {
8008 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8009 MI.eraseFromParent();
8010 return Legalized;
8011 };
8012 // If a funnel shift in the other direction is supported, use it.
8013 if (IsFShLegal) {
8014 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8015 } else if (isPowerOf2_32(EltSizeInBits)) {
8016 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8017 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8018 }
8019 }
8020
8021 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8022 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8023 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8024 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8025 Register ShVal;
8026 Register RevShiftVal;
8027 if (isPowerOf2_32(EltSizeInBits)) {
8028 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8029 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8030 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8031 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8032 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8033 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8034 RevShiftVal =
8035 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8036 } else {
8037 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8038 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8039 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8040 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8041 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8042 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8043 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8044 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8045 RevShiftVal =
8046 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8047 }
8048 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8049 MI.eraseFromParent();
8050 return Legalized;
8051}
8052
8053// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8054// representation.
8057 auto [Dst, Src] = MI.getFirst2Regs();
8058 const LLT S64 = LLT::scalar(64);
8059 const LLT S32 = LLT::scalar(32);
8060 const LLT S1 = LLT::scalar(1);
8061
8062 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8063
8064 // unsigned cul2f(ulong u) {
8065 // uint lz = clz(u);
8066 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8067 // u = (u << lz) & 0x7fffffffffffffffUL;
8068 // ulong t = u & 0xffffffffffUL;
8069 // uint v = (e << 23) | (uint)(u >> 40);
8070 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8071 // return as_float(v + r);
8072 // }
8073
8074 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8075 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8076
8077 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8078
8079 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8080 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8081
8082 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8083 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8084
8085 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8086 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8087
8088 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8089
8090 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8091 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8092
8093 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8094 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8095 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8096
8097 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8098 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8099 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8100 auto One = MIRBuilder.buildConstant(S32, 1);
8101
8102 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8103 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8104 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8105 MIRBuilder.buildAdd(Dst, V, R);
8106
8107 MI.eraseFromParent();
8108 return Legalized;
8109}
8110
8111// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8112// operations and G_SITOFP
8115 auto [Dst, Src] = MI.getFirst2Regs();
8116 const LLT S64 = LLT::scalar(64);
8117 const LLT S32 = LLT::scalar(32);
8118 const LLT S1 = LLT::scalar(1);
8119
8120 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8121
8122 // For i64 < INT_MAX we simply reuse SITOFP.
8123 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8124 // saved before division, convert to float by SITOFP, multiply the result
8125 // by 2.
8126 auto One = MIRBuilder.buildConstant(S64, 1);
8127 auto Zero = MIRBuilder.buildConstant(S64, 0);
8128 // Result if Src < INT_MAX
8129 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8130 // Result if Src >= INT_MAX
8131 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8132 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8133 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8134 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8135 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8136 // Check if the original value is larger than INT_MAX by comparing with
8137 // zero to pick one of the two conversions.
8138 auto IsLarge =
8139 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8140 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8141
8142 MI.eraseFromParent();
8143 return Legalized;
8144}
8145
8146// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8147// IEEE double representation.
8150 auto [Dst, Src] = MI.getFirst2Regs();
8151 const LLT S64 = LLT::scalar(64);
8152 const LLT S32 = LLT::scalar(32);
8153
8154 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8155
8156 // We create double value from 32 bit parts with 32 exponent difference.
8157 // Note that + and - are float operations that adjust the implicit leading
8158 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8159 //
8160 // X = 2^52 * 1.0...LowBits
8161 // Y = 2^84 * 1.0...HighBits
8162 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8163 // = - 2^52 * 1.0...HighBits
8164 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8165 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8166 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8167 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8168 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8169 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8170
8171 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8172 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8173 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8174 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8175 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8176 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8177 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8178
8179 MI.eraseFromParent();
8180 return Legalized;
8181}
8182
8183/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8184/// convert fpround f64->f16 without double-rounding, so we manually perform the
8185/// lowering here where we know it is valid.
8188 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8189 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8190 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8191 : MIRBuilder.buildSITOFP(SrcTy, Src);
8192 LLT S32Ty = SrcTy.changeElementSize(32);
8193 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8194 MIRBuilder.buildFPTrunc(Dst, M2);
8195 MI.eraseFromParent();
8197}
8198
8200 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8201
8202 if (SrcTy == LLT::scalar(1)) {
8203 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8204 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8205 MIRBuilder.buildSelect(Dst, Src, True, False);
8206 MI.eraseFromParent();
8207 return Legalized;
8208 }
8209
8210 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8211 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8212
8213 if (SrcTy != LLT::scalar(64))
8214 return UnableToLegalize;
8215
8216 if (DstTy == LLT::scalar(32))
8217 // TODO: SelectionDAG has several alternative expansions to port which may
8218 // be more reasonable depending on the available instructions. We also need
8219 // a more advanced mechanism to choose an optimal version depending on
8220 // target features such as sitofp or CTLZ availability.
8222
8223 if (DstTy == LLT::scalar(64))
8225
8226 return UnableToLegalize;
8227}
8228
8230 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8231
8232 const LLT S64 = LLT::scalar(64);
8233 const LLT S32 = LLT::scalar(32);
8234 const LLT S1 = LLT::scalar(1);
8235
8236 if (SrcTy == S1) {
8237 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8238 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8239 MIRBuilder.buildSelect(Dst, Src, True, False);
8240 MI.eraseFromParent();
8241 return Legalized;
8242 }
8243
8244 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8245 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8246
8247 if (SrcTy != S64)
8248 return UnableToLegalize;
8249
8250 if (DstTy == S32) {
8251 // signed cl2f(long l) {
8252 // long s = l >> 63;
8253 // float r = cul2f((l + s) ^ s);
8254 // return s ? -r : r;
8255 // }
8256 Register L = Src;
8257 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8258 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8259
8260 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8261 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8262 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8263
8264 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8265 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8266 MIRBuilder.buildConstant(S64, 0));
8267 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8268 MI.eraseFromParent();
8269 return Legalized;
8270 }
8271
8272 return UnableToLegalize;
8273}
8274
8276 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8277 const LLT S64 = LLT::scalar(64);
8278 const LLT S32 = LLT::scalar(32);
8279
8280 if (SrcTy != S64 && SrcTy != S32)
8281 return UnableToLegalize;
8282 if (DstTy != S32 && DstTy != S64)
8283 return UnableToLegalize;
8284
8285 // FPTOSI gives same result as FPTOUI for positive signed integers.
8286 // FPTOUI needs to deal with fp values that convert to unsigned integers
8287 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8288
8289 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8290 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8292 APInt::getZero(SrcTy.getSizeInBits()));
8293 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8294
8295 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8296
8297 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8298 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8299 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8300 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8301 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8302 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8303 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8304
8305 const LLT S1 = LLT::scalar(1);
8306
8307 MachineInstrBuilder FCMP =
8308 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8309 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8310
8311 MI.eraseFromParent();
8312 return Legalized;
8313}
8314
8316 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8317 const LLT S64 = LLT::scalar(64);
8318 const LLT S32 = LLT::scalar(32);
8319
8320 // FIXME: Only f32 to i64 conversions are supported.
8321 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8322 return UnableToLegalize;
8323
8324 // Expand f32 -> i64 conversion
8325 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8326 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8327
8328 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8329
8330 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8331 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8332
8333 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8334 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8335
8336 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8337 APInt::getSignMask(SrcEltBits));
8338 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8339 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8340 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8341 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8342
8343 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8344 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8345 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8346
8347 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8348 R = MIRBuilder.buildZExt(DstTy, R);
8349
8350 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8351 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8352 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8353 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8354
8355 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8356 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8357
8358 const LLT S1 = LLT::scalar(1);
8359 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8360 S1, Exponent, ExponentLoBit);
8361
8362 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8363
8364 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8365 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8366
8367 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8368
8369 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8370 S1, Exponent, ZeroSrcTy);
8371
8372 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8373 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8374
8375 MI.eraseFromParent();
8376 return Legalized;
8377}
8378
8381 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8382
8383 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8384 unsigned SatWidth = DstTy.getScalarSizeInBits();
8385
8386 // Determine minimum and maximum integer values and their corresponding
8387 // floating-point values.
8388 APInt MinInt, MaxInt;
8389 if (IsSigned) {
8390 MinInt = APInt::getSignedMinValue(SatWidth);
8391 MaxInt = APInt::getSignedMaxValue(SatWidth);
8392 } else {
8393 MinInt = APInt::getMinValue(SatWidth);
8394 MaxInt = APInt::getMaxValue(SatWidth);
8395 }
8396
8397 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8398 APFloat MinFloat(Semantics);
8399 APFloat MaxFloat(Semantics);
8400
8401 APFloat::opStatus MinStatus =
8402 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8403 APFloat::opStatus MaxStatus =
8404 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8405 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8406 !(MaxStatus & APFloat::opStatus::opInexact);
8407
8408 // If the integer bounds are exactly representable as floats, emit a
8409 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8410 // and selects.
8411 if (AreExactFloatBounds) {
8412 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8413 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8414 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8415 SrcTy.changeElementSize(1), Src, MaxC);
8416 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8417 // Clamp by MaxFloat from above. NaN cannot occur.
8418 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8419 auto MinP =
8420 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8422 auto Min =
8423 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8424 // Convert clamped value to integer. In the unsigned case we're done,
8425 // because we mapped NaN to MinFloat, which will cast to zero.
8426 if (!IsSigned) {
8427 MIRBuilder.buildFPTOUI(Dst, Min);
8428 MI.eraseFromParent();
8429 return Legalized;
8430 }
8431
8432 // Otherwise, select 0 if Src is NaN.
8433 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8434 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8435 DstTy.changeElementSize(1), Src, Src);
8436 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8437 FpToInt);
8438 MI.eraseFromParent();
8439 return Legalized;
8440 }
8441
8442 // Result of direct conversion. The assumption here is that the operation is
8443 // non-trapping and it's fine to apply it to an out-of-range value if we
8444 // select it away later.
8445 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8446 : MIRBuilder.buildFPTOUI(DstTy, Src);
8447
8448 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8449 // MinInt if Src is NaN.
8450 auto ULT =
8451 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8452 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8453 auto Max = MIRBuilder.buildSelect(
8454 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8455 // If Src OGT MaxFloat, select MaxInt.
8456 auto OGT =
8457 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8458 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8459
8460 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8461 // is already zero.
8462 if (!IsSigned) {
8463 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8464 Max);
8465 MI.eraseFromParent();
8466 return Legalized;
8467 }
8468
8469 // Otherwise, select 0 if Src is NaN.
8470 auto Min = MIRBuilder.buildSelect(
8471 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8472 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8473 DstTy.changeElementSize(1), Src, Src);
8474 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8475 MI.eraseFromParent();
8476 return Legalized;
8477}
8478
8479// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8482 const LLT S1 = LLT::scalar(1);
8483 const LLT S32 = LLT::scalar(32);
8484
8485 auto [Dst, Src] = MI.getFirst2Regs();
8486 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8487 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8488
8489 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8490 return UnableToLegalize;
8491
8492 if (MI.getFlag(MachineInstr::FmAfn)) {
8493 unsigned Flags = MI.getFlags();
8494 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8495 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8496 MI.eraseFromParent();
8497 return Legalized;
8498 }
8499
8500 const unsigned ExpMask = 0x7ff;
8501 const unsigned ExpBiasf64 = 1023;
8502 const unsigned ExpBiasf16 = 15;
8503
8504 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8505 Register U = Unmerge.getReg(0);
8506 Register UH = Unmerge.getReg(1);
8507
8508 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8509 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8510
8511 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8512 // add the f16 bias (15) to get the biased exponent for the f16 format.
8513 E = MIRBuilder.buildAdd(
8514 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8515
8516 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8517 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8518
8519 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8520 MIRBuilder.buildConstant(S32, 0x1ff));
8521 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8522
8523 auto Zero = MIRBuilder.buildConstant(S32, 0);
8524 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8525 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8526 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8527
8528 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8529 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8530 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8531 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8532
8533 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8534 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8535
8536 // N = M | (E << 12);
8537 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8538 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8539
8540 // B = clamp(1-E, 0, 13);
8541 auto One = MIRBuilder.buildConstant(S32, 1);
8542 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8543 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8544 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8545
8546 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8547 MIRBuilder.buildConstant(S32, 0x1000));
8548
8549 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8550 auto D0 = MIRBuilder.buildShl(S32, D, B);
8551
8552 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8553 D0, SigSetHigh);
8554 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8555 D = MIRBuilder.buildOr(S32, D, D1);
8556
8557 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8558 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8559
8560 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8561 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8562
8563 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8564 MIRBuilder.buildConstant(S32, 3));
8565 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8566
8567 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8568 MIRBuilder.buildConstant(S32, 5));
8569 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8570
8571 V1 = MIRBuilder.buildOr(S32, V0, V1);
8572 V = MIRBuilder.buildAdd(S32, V, V1);
8573
8574 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8575 E, MIRBuilder.buildConstant(S32, 30));
8576 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8577 MIRBuilder.buildConstant(S32, 0x7c00), V);
8578
8579 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8580 E, MIRBuilder.buildConstant(S32, 1039));
8581 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8582
8583 // Extract the sign bit.
8584 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8585 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8586
8587 // Insert the sign bit
8588 V = MIRBuilder.buildOr(S32, Sign, V);
8589
8590 MIRBuilder.buildTrunc(Dst, V);
8591 MI.eraseFromParent();
8592 return Legalized;
8593}
8594
8597 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8598 const LLT S64 = LLT::scalar(64);
8599 const LLT S16 = LLT::scalar(16);
8600
8601 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8603
8604 return UnableToLegalize;
8605}
8606
8608 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8609 LLT Ty = MRI.getType(Dst);
8610
8611 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8612 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8613 MI.eraseFromParent();
8614 return Legalized;
8615}
8616
8618 switch (Opc) {
8619 case TargetOpcode::G_SMIN:
8620 return CmpInst::ICMP_SLT;
8621 case TargetOpcode::G_SMAX:
8622 return CmpInst::ICMP_SGT;
8623 case TargetOpcode::G_UMIN:
8624 return CmpInst::ICMP_ULT;
8625 case TargetOpcode::G_UMAX:
8626 return CmpInst::ICMP_UGT;
8627 default:
8628 llvm_unreachable("not in integer min/max");
8629 }
8630}
8631
8633 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8634
8635 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8636 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8637
8638 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8639 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8640
8641 MI.eraseFromParent();
8642 return Legalized;
8643}
8644
8647 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8648
8649 Register Dst = Cmp->getReg(0);
8650 LLT DstTy = MRI.getType(Dst);
8651 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8652 LLT CmpTy = DstTy.changeElementSize(1);
8653
8654 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8657 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8660
8661 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8662 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8663 Cmp->getRHSReg());
8664 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8665 Cmp->getRHSReg());
8666
8667 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8668 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8669 if (TLI.preferSelectsOverBooleanArithmetic(
8670 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8672 auto One = MIRBuilder.buildConstant(DstTy, 1);
8673 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8674
8675 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8676 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8677 } else {
8679 std::swap(IsGT, IsLT);
8680 // Extend boolean results to DstTy, which is at least i2, before subtracting
8681 // them.
8682 unsigned BoolExtOp =
8683 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8684 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8685 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8686 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8687 }
8688
8689 MI.eraseFromParent();
8690 return Legalized;
8691}
8692
8695 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8696 const int Src0Size = Src0Ty.getScalarSizeInBits();
8697 const int Src1Size = Src1Ty.getScalarSizeInBits();
8698
8699 auto SignBitMask = MIRBuilder.buildConstant(
8700 Src0Ty, APInt::getSignMask(Src0Size));
8701
8702 auto NotSignBitMask = MIRBuilder.buildConstant(
8703 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8704
8705 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8706 Register And1;
8707 if (Src0Ty == Src1Ty) {
8708 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8709 } else if (Src0Size > Src1Size) {
8710 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8711 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8712 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8713 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8714 } else {
8715 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8716 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8717 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8718 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8719 }
8720
8721 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8722 // constants are a nan and -0.0, but the final result should preserve
8723 // everything.
8724 unsigned Flags = MI.getFlags();
8725
8726 // We masked the sign bit and the not-sign bit, so these are disjoint.
8727 Flags |= MachineInstr::Disjoint;
8728
8729 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8730
8731 MI.eraseFromParent();
8732 return Legalized;
8733}
8734
8737 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8738 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8739 // depend on fminnum/fmaxnum.
8740
8741 unsigned NewOp;
8742 switch (MI.getOpcode()) {
8743 case TargetOpcode::G_FMINNUM:
8744 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8745 break;
8746 case TargetOpcode::G_FMINIMUMNUM:
8747 NewOp = TargetOpcode::G_FMINNUM;
8748 break;
8749 case TargetOpcode::G_FMAXNUM:
8750 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8751 break;
8752 case TargetOpcode::G_FMAXIMUMNUM:
8753 NewOp = TargetOpcode::G_FMAXNUM;
8754 break;
8755 default:
8756 llvm_unreachable("unexpected min/max opcode");
8757 }
8758
8759 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8760 LLT Ty = MRI.getType(Dst);
8761
8762 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8763 // Insert canonicalizes if it's possible we need to quiet to get correct
8764 // sNaN behavior.
8765
8766 // Note this must be done here, and not as an optimization combine in the
8767 // absence of a dedicate quiet-snan instruction as we're using an
8768 // omni-purpose G_FCANONICALIZE.
8769 if (!isKnownNeverSNaN(Src0, MRI))
8770 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8771
8772 if (!isKnownNeverSNaN(Src1, MRI))
8773 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8774 }
8775
8776 // If there are no nans, it's safe to simply replace this with the non-IEEE
8777 // version.
8778 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8779 MI.eraseFromParent();
8780 return Legalized;
8781}
8782
8785 unsigned Opc = MI.getOpcode();
8786 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8787 LLT Ty = MRI.getType(Dst);
8788 LLT CmpTy = Ty.changeElementSize(1);
8789
8790 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8791 unsigned OpcIeee =
8792 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8793 unsigned OpcNonIeee =
8794 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8795 bool MinMaxMustRespectOrderedZero = false;
8796 Register Res;
8797
8798 // IEEE variants don't need canonicalization
8799 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8800 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8801 MinMaxMustRespectOrderedZero = true;
8802 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8803 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8804 } else {
8805 auto Compare = MIRBuilder.buildFCmp(
8806 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8807 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8808 }
8809
8810 // Propagate any NaN of both operands
8811 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8812 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8813 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8814
8815 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8816 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8817 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8818 if (Ty.isVector())
8819 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8820
8821 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8822 }
8823
8824 // fminimum/fmaximum requires -0.0 less than +0.0
8825 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8826 GISelValueTracking VT(MIRBuilder.getMF());
8827 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8828 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8829
8830 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8831 const unsigned Flags = MI.getFlags();
8832 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8833 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8834
8835 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8836
8837 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8838 auto LHSSelect =
8839 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8840
8841 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8842 auto RHSSelect =
8843 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8844
8845 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8846 }
8847 }
8848
8849 MIRBuilder.buildCopy(Dst, Res);
8850 MI.eraseFromParent();
8851 return Legalized;
8852}
8853
8855 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8856 Register DstReg = MI.getOperand(0).getReg();
8857 LLT Ty = MRI.getType(DstReg);
8858 unsigned Flags = MI.getFlags();
8859
8860 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8861 Flags);
8862 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8863 MI.eraseFromParent();
8864 return Legalized;
8865}
8866
8869 auto [DstReg, X] = MI.getFirst2Regs();
8870 const unsigned Flags = MI.getFlags();
8871 const LLT Ty = MRI.getType(DstReg);
8872 const LLT CondTy = Ty.changeElementSize(1);
8873
8874 // round(x) =>
8875 // t = trunc(x);
8876 // d = fabs(x - t);
8877 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8878 // return t + o;
8879
8880 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8881
8882 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8883 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8884
8885 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8886 auto Cmp =
8887 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8888
8889 // Could emit G_UITOFP instead
8890 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8891 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8892 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8893 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8894
8895 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8896
8897 MI.eraseFromParent();
8898 return Legalized;
8899}
8900
8902 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8903 unsigned Flags = MI.getFlags();
8904 LLT Ty = MRI.getType(DstReg);
8905 const LLT CondTy = Ty.changeElementSize(1);
8906
8907 // result = trunc(src);
8908 // if (src < 0.0 && src != result)
8909 // result += -1.0.
8910
8911 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8912 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8913
8914 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8915 SrcReg, Zero, Flags);
8916 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8917 SrcReg, Trunc, Flags);
8918 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8919 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8920
8921 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8922 MI.eraseFromParent();
8923 return Legalized;
8924}
8925
8928 const unsigned NumOps = MI.getNumOperands();
8929 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8930 unsigned PartSize = Src0Ty.getSizeInBits();
8931
8932 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8933 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8934
8935 for (unsigned I = 2; I != NumOps; ++I) {
8936 const unsigned Offset = (I - 1) * PartSize;
8937
8938 Register SrcReg = MI.getOperand(I).getReg();
8939 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8940
8941 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8942 MRI.createGenericVirtualRegister(WideTy);
8943
8944 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8945 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8946 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8947 ResultReg = NextResult;
8948 }
8949
8950 if (DstTy.isPointer()) {
8951 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8952 DstTy.getAddressSpace())) {
8953 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8954 return UnableToLegalize;
8955 }
8956
8957 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8958 }
8959
8960 MI.eraseFromParent();
8961 return Legalized;
8962}
8963
8966 const unsigned NumDst = MI.getNumOperands() - 1;
8967 Register SrcReg = MI.getOperand(NumDst).getReg();
8968 Register Dst0Reg = MI.getOperand(0).getReg();
8969 LLT DstTy = MRI.getType(Dst0Reg);
8970 if (DstTy.isPointer())
8971 return UnableToLegalize; // TODO
8972
8973 SrcReg = coerceToScalar(SrcReg);
8974 if (!SrcReg)
8975 return UnableToLegalize;
8976
8977 // Expand scalarizing unmerge as bitcast to integer and shift.
8978 LLT IntTy = MRI.getType(SrcReg);
8979
8980 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8981
8982 const unsigned DstSize = DstTy.getSizeInBits();
8983 unsigned Offset = DstSize;
8984 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8985 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8986 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8987 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8988 }
8989
8990 MI.eraseFromParent();
8991 return Legalized;
8992}
8993
8994/// Lower a vector extract or insert by writing the vector to a stack temporary
8995/// and reloading the element or vector.
8996///
8997/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8998/// =>
8999/// %stack_temp = G_FRAME_INDEX
9000/// G_STORE %vec, %stack_temp
9001/// %idx = clamp(%idx, %vec.getNumElements())
9002/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9003/// %dst = G_LOAD %element_ptr
9006 Register DstReg = MI.getOperand(0).getReg();
9007 Register SrcVec = MI.getOperand(1).getReg();
9008 Register InsertVal;
9009 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9010 InsertVal = MI.getOperand(2).getReg();
9011
9012 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9013
9014 LLT VecTy = MRI.getType(SrcVec);
9015 LLT EltTy = VecTy.getElementType();
9016 unsigned NumElts = VecTy.getNumElements();
9017
9018 int64_t IdxVal;
9019 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9021 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9022
9023 if (InsertVal) {
9024 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9025 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9026 } else {
9027 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9028 }
9029
9030 MI.eraseFromParent();
9031 return Legalized;
9032 }
9033
9034 if (!EltTy.isByteSized()) { // Not implemented.
9035 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9036 return UnableToLegalize;
9037 }
9038
9039 unsigned EltBytes = EltTy.getSizeInBytes();
9040 Align VecAlign = getStackTemporaryAlignment(VecTy);
9041 Align EltAlign;
9042
9043 MachinePointerInfo PtrInfo;
9044 auto StackTemp = createStackTemporary(
9045 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9046 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9047
9048 // Get the pointer to the element, and be sure not to hit undefined behavior
9049 // if the index is out of bounds.
9050 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9051
9052 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9053 int64_t Offset = IdxVal * EltBytes;
9054 PtrInfo = PtrInfo.getWithOffset(Offset);
9055 EltAlign = commonAlignment(VecAlign, Offset);
9056 } else {
9057 // We lose information with a variable offset.
9058 EltAlign = getStackTemporaryAlignment(EltTy);
9059 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9060 }
9061
9062 if (InsertVal) {
9063 // Write the inserted element
9064 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9065
9066 // Reload the whole vector.
9067 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9068 } else {
9069 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9070 }
9071
9072 MI.eraseFromParent();
9073 return Legalized;
9074}
9075
9078 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9079 MI.getFirst3RegLLTs();
9080 LLT IdxTy = LLT::scalar(32);
9081
9082 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9083 Register Undef;
9085 LLT EltTy = DstTy.getScalarType();
9086
9087 for (int Idx : Mask) {
9088 if (Idx < 0) {
9089 if (!Undef.isValid())
9090 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9091 BuildVec.push_back(Undef);
9092 continue;
9093 }
9094
9095 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9096
9097 int NumElts = Src0Ty.getNumElements();
9098 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9099 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9100 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9101 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
9102 BuildVec.push_back(Extract.getReg(0));
9103 }
9104
9105 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9106 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9107 MI.eraseFromParent();
9108 return Legalized;
9109}
9110
9113 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9114 MI.getFirst4RegLLTs();
9115
9116 if (VecTy.isScalableVector())
9117 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9118
9119 Align VecAlign = getStackTemporaryAlignment(VecTy);
9120 MachinePointerInfo PtrInfo;
9121 Register StackPtr =
9122 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9123 PtrInfo)
9124 .getReg(0);
9125 MachinePointerInfo ValPtrInfo =
9127
9128 LLT IdxTy = LLT::scalar(32);
9129 LLT ValTy = VecTy.getElementType();
9130 Align ValAlign = getStackTemporaryAlignment(ValTy);
9131
9132 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9133
9134 bool HasPassthru =
9135 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9136
9137 if (HasPassthru)
9138 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9139
9140 Register LastWriteVal;
9141 std::optional<APInt> PassthruSplatVal =
9142 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9143
9144 if (PassthruSplatVal.has_value()) {
9145 LastWriteVal =
9146 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9147 } else if (HasPassthru) {
9148 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9149 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9150 {LLT::scalar(32)}, {Popcount});
9151
9152 Register LastElmtPtr =
9153 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9154 LastWriteVal =
9155 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9156 .getReg(0);
9157 }
9158
9159 unsigned NumElmts = VecTy.getNumElements();
9160 for (unsigned I = 0; I < NumElmts; ++I) {
9161 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9162 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9163 Register ElmtPtr =
9164 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9165 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9166
9167 LLT MaskITy = MaskTy.getElementType();
9168 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9169 if (MaskITy.getSizeInBits() > 1)
9170 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9171
9172 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9173 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9174
9175 if (HasPassthru && I == NumElmts - 1) {
9176 auto EndOfVector =
9177 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9178 auto AllLanesSelected = MIRBuilder.buildICmp(
9179 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9180 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9181 {OutPos, EndOfVector});
9182 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9183
9184 LastWriteVal =
9185 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9186 .getReg(0);
9187 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9188 }
9189 }
9190
9191 // TODO: Use StackPtr's FrameIndex alignment.
9192 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9193
9194 MI.eraseFromParent();
9195 return Legalized;
9196}
9197
9199 Register AllocSize,
9200 Align Alignment,
9201 LLT PtrTy) {
9202 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9203
9204 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9205 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9206
9207 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9208 // have to generate an extra instruction to negate the alloc and then use
9209 // G_PTR_ADD to add the negative offset.
9210 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9211 if (Alignment > Align(1)) {
9212 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9213 AlignMask.negate();
9214 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9215 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9216 }
9217
9218 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9219}
9220
9223 const auto &MF = *MI.getMF();
9224 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9225 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9226 return UnableToLegalize;
9227
9228 Register Dst = MI.getOperand(0).getReg();
9229 Register AllocSize = MI.getOperand(1).getReg();
9230 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9231
9232 LLT PtrTy = MRI.getType(Dst);
9233 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9234 Register SPTmp =
9235 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9236
9237 MIRBuilder.buildCopy(SPReg, SPTmp);
9238 MIRBuilder.buildCopy(Dst, SPTmp);
9239
9240 MI.eraseFromParent();
9241 return Legalized;
9242}
9243
9246 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9247 if (!StackPtr)
9248 return UnableToLegalize;
9249
9250 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9251 MI.eraseFromParent();
9252 return Legalized;
9253}
9254
9257 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9258 if (!StackPtr)
9259 return UnableToLegalize;
9260
9261 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9262 MI.eraseFromParent();
9263 return Legalized;
9264}
9265
9268 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9269 unsigned Offset = MI.getOperand(2).getImm();
9270
9271 // Extract sub-vector or one element
9272 if (SrcTy.isVector()) {
9273 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9274 unsigned DstSize = DstTy.getSizeInBits();
9275
9276 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9277 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9278 // Unmerge and allow access to each Src element for the artifact combiner.
9279 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9280
9281 // Take element(s) we need to extract and copy it (merge them).
9282 SmallVector<Register, 8> SubVectorElts;
9283 for (unsigned Idx = Offset / SrcEltSize;
9284 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9285 SubVectorElts.push_back(Unmerge.getReg(Idx));
9286 }
9287 if (SubVectorElts.size() == 1)
9288 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9289 else
9290 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9291
9292 MI.eraseFromParent();
9293 return Legalized;
9294 }
9295 }
9296
9297 if (DstTy.isScalar() &&
9298 (SrcTy.isScalar() ||
9299 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9300 LLT SrcIntTy = SrcTy;
9301 if (!SrcTy.isScalar()) {
9302 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9303 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9304 }
9305
9306 if (Offset == 0)
9307 MIRBuilder.buildTrunc(DstReg, SrcReg);
9308 else {
9309 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9310 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9311 MIRBuilder.buildTrunc(DstReg, Shr);
9312 }
9313
9314 MI.eraseFromParent();
9315 return Legalized;
9316 }
9317
9318 return UnableToLegalize;
9319}
9320
9322 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9323 uint64_t Offset = MI.getOperand(3).getImm();
9324
9325 LLT DstTy = MRI.getType(Src);
9326 LLT InsertTy = MRI.getType(InsertSrc);
9327
9328 // Insert sub-vector or one element
9329 if (DstTy.isVector() && !InsertTy.isPointer()) {
9330 LLT EltTy = DstTy.getElementType();
9331 unsigned EltSize = EltTy.getSizeInBits();
9332 unsigned InsertSize = InsertTy.getSizeInBits();
9333
9334 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9335 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9336 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9338 unsigned Idx = 0;
9339 // Elements from Src before insert start Offset
9340 for (; Idx < Offset / EltSize; ++Idx) {
9341 DstElts.push_back(UnmergeSrc.getReg(Idx));
9342 }
9343
9344 // Replace elements in Src with elements from InsertSrc
9345 if (InsertTy.getSizeInBits() > EltSize) {
9346 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9347 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9348 ++Idx, ++i) {
9349 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9350 }
9351 } else {
9352 DstElts.push_back(InsertSrc);
9353 ++Idx;
9354 }
9355
9356 // Remaining elements from Src after insert
9357 for (; Idx < DstTy.getNumElements(); ++Idx) {
9358 DstElts.push_back(UnmergeSrc.getReg(Idx));
9359 }
9360
9361 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9362 MI.eraseFromParent();
9363 return Legalized;
9364 }
9365 }
9366
9367 if (InsertTy.isVector() ||
9368 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9369 return UnableToLegalize;
9370
9371 const DataLayout &DL = MIRBuilder.getDataLayout();
9372 if ((DstTy.isPointer() &&
9373 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9374 (InsertTy.isPointer() &&
9375 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9376 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9377 return UnableToLegalize;
9378 }
9379
9380 LLT IntDstTy = DstTy;
9381
9382 if (!DstTy.isScalar()) {
9383 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9384 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9385 }
9386
9387 if (!InsertTy.isScalar()) {
9388 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9389 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9390 }
9391
9392 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9393 if (Offset != 0) {
9394 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9395 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9396 }
9397
9399 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9400
9401 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9402 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9403 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9404
9405 MIRBuilder.buildCast(Dst, Or);
9406 MI.eraseFromParent();
9407 return Legalized;
9408}
9409
9412 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9413 MI.getFirst4RegLLTs();
9414 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9415
9416 LLT Ty = Dst0Ty;
9417 LLT BoolTy = Dst1Ty;
9418
9419 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9420
9421 if (IsAdd)
9422 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9423 else
9424 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9425
9426 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9427
9428 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9429
9430 // For an addition, the result should be less than one of the operands (LHS)
9431 // if and only if the other operand (RHS) is negative, otherwise there will
9432 // be overflow.
9433 // For a subtraction, the result should be less than one of the operands
9434 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9435 // otherwise there will be overflow.
9436 auto ResultLowerThanLHS =
9437 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9438 auto ConditionRHS = MIRBuilder.buildICmp(
9439 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9440
9441 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9442
9443 MIRBuilder.buildCopy(Dst0, NewDst0);
9444 MI.eraseFromParent();
9445
9446 return Legalized;
9447}
9448
9450 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9451 const LLT Ty = MRI.getType(Res);
9452
9453 // sum = LHS + RHS + zext(CarryIn)
9454 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9455 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9456 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9457 MIRBuilder.buildCopy(Res, Sum);
9458
9459 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9460 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9461 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9462 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9463
9464 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9465 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9466
9467 MI.eraseFromParent();
9468 return Legalized;
9469}
9470
9472 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9473 const LLT Ty = MRI.getType(Res);
9474
9475 // Diff = LHS - (RHS + zext(CarryIn))
9476 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9477 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9478 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9479 MIRBuilder.buildCopy(Res, Diff);
9480
9481 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9482 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9483 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9484 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9485 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9486 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9487
9488 MI.eraseFromParent();
9489 return Legalized;
9490}
9491
9494 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9495 LLT Ty = MRI.getType(Res);
9496 bool IsSigned;
9497 bool IsAdd;
9498 unsigned BaseOp;
9499 switch (MI.getOpcode()) {
9500 default:
9501 llvm_unreachable("unexpected addsat/subsat opcode");
9502 case TargetOpcode::G_UADDSAT:
9503 IsSigned = false;
9504 IsAdd = true;
9505 BaseOp = TargetOpcode::G_ADD;
9506 break;
9507 case TargetOpcode::G_SADDSAT:
9508 IsSigned = true;
9509 IsAdd = true;
9510 BaseOp = TargetOpcode::G_ADD;
9511 break;
9512 case TargetOpcode::G_USUBSAT:
9513 IsSigned = false;
9514 IsAdd = false;
9515 BaseOp = TargetOpcode::G_SUB;
9516 break;
9517 case TargetOpcode::G_SSUBSAT:
9518 IsSigned = true;
9519 IsAdd = false;
9520 BaseOp = TargetOpcode::G_SUB;
9521 break;
9522 }
9523
9524 if (IsSigned) {
9525 // sadd.sat(a, b) ->
9526 // hi = 0x7fffffff - smax(a, 0)
9527 // lo = 0x80000000 - smin(a, 0)
9528 // a + smin(smax(lo, b), hi)
9529 // ssub.sat(a, b) ->
9530 // lo = smax(a, -1) - 0x7fffffff
9531 // hi = smin(a, -1) - 0x80000000
9532 // a - smin(smax(lo, b), hi)
9533 // TODO: AMDGPU can use a "median of 3" instruction here:
9534 // a +/- med3(lo, b, hi)
9535 uint64_t NumBits = Ty.getScalarSizeInBits();
9536 auto MaxVal =
9537 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9538 auto MinVal =
9539 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9541 if (IsAdd) {
9542 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9543 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9544 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9545 } else {
9546 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9547 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9548 MaxVal);
9549 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9550 MinVal);
9551 }
9552 auto RHSClamped =
9553 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9554 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9555 } else {
9556 // uadd.sat(a, b) -> a + umin(~a, b)
9557 // usub.sat(a, b) -> a - umin(a, b)
9558 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9559 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9560 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9561 }
9562
9563 MI.eraseFromParent();
9564 return Legalized;
9565}
9566
9569 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9570 LLT Ty = MRI.getType(Res);
9571 LLT BoolTy = Ty.changeElementSize(1);
9572 bool IsSigned;
9573 bool IsAdd;
9574 unsigned OverflowOp;
9575 switch (MI.getOpcode()) {
9576 default:
9577 llvm_unreachable("unexpected addsat/subsat opcode");
9578 case TargetOpcode::G_UADDSAT:
9579 IsSigned = false;
9580 IsAdd = true;
9581 OverflowOp = TargetOpcode::G_UADDO;
9582 break;
9583 case TargetOpcode::G_SADDSAT:
9584 IsSigned = true;
9585 IsAdd = true;
9586 OverflowOp = TargetOpcode::G_SADDO;
9587 break;
9588 case TargetOpcode::G_USUBSAT:
9589 IsSigned = false;
9590 IsAdd = false;
9591 OverflowOp = TargetOpcode::G_USUBO;
9592 break;
9593 case TargetOpcode::G_SSUBSAT:
9594 IsSigned = true;
9595 IsAdd = false;
9596 OverflowOp = TargetOpcode::G_SSUBO;
9597 break;
9598 }
9599
9600 auto OverflowRes =
9601 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9602 Register Tmp = OverflowRes.getReg(0);
9603 Register Ov = OverflowRes.getReg(1);
9604 MachineInstrBuilder Clamp;
9605 if (IsSigned) {
9606 // sadd.sat(a, b) ->
9607 // {tmp, ov} = saddo(a, b)
9608 // ov ? (tmp >>s 31) + 0x80000000 : r
9609 // ssub.sat(a, b) ->
9610 // {tmp, ov} = ssubo(a, b)
9611 // ov ? (tmp >>s 31) + 0x80000000 : r
9612 uint64_t NumBits = Ty.getScalarSizeInBits();
9613 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9614 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9615 auto MinVal =
9616 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9617 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9618 } else {
9619 // uadd.sat(a, b) ->
9620 // {tmp, ov} = uaddo(a, b)
9621 // ov ? 0xffffffff : tmp
9622 // usub.sat(a, b) ->
9623 // {tmp, ov} = usubo(a, b)
9624 // ov ? 0 : tmp
9625 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9626 }
9627 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9628
9629 MI.eraseFromParent();
9630 return Legalized;
9631}
9632
9635 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9636 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9637 "Expected shlsat opcode!");
9638 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9639 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9640 LLT Ty = MRI.getType(Res);
9641 LLT BoolTy = Ty.changeElementSize(1);
9642
9643 unsigned BW = Ty.getScalarSizeInBits();
9644 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9645 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9646 : MIRBuilder.buildLShr(Ty, Result, RHS);
9647
9648 MachineInstrBuilder SatVal;
9649 if (IsSigned) {
9650 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9651 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9652 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9653 MIRBuilder.buildConstant(Ty, 0));
9654 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9655 } else {
9656 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9657 }
9658 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9659 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9660
9661 MI.eraseFromParent();
9662 return Legalized;
9663}
9664
9666 auto [Dst, Src] = MI.getFirst2Regs();
9667 const LLT Ty = MRI.getType(Src);
9668 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9669 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9670
9671 // Swap most and least significant byte, set remaining bytes in Res to zero.
9672 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9673 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9674 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9675 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9676
9677 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9678 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9679 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9680 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9681 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9682 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9683 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9684 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9685 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9686 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9687 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9688 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9689 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9690 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9691 }
9692 Res.getInstr()->getOperand(0).setReg(Dst);
9693
9694 MI.eraseFromParent();
9695 return Legalized;
9696}
9697
9698//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9700 MachineInstrBuilder Src, const APInt &Mask) {
9701 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9702 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9703 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9704 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9705 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9706 return B.buildOr(Dst, LHS, RHS);
9707}
9708
9711 auto [Dst, Src] = MI.getFirst2Regs();
9712 const LLT SrcTy = MRI.getType(Src);
9713 unsigned Size = SrcTy.getScalarSizeInBits();
9714 unsigned VSize = SrcTy.getSizeInBits();
9715
9716 if (Size >= 8) {
9717 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9718 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9719 {LLT::fixed_vector(VSize / 8, 8),
9720 LLT::fixed_vector(VSize / 8, 8)}}))) {
9721 // If bitreverse is legal for i8 vector of the same size, then cast
9722 // to i8 vector type.
9723 // e.g. v4s32 -> v16s8
9724 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9725 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9726 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9727 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9728 MIRBuilder.buildBitcast(Dst, RBIT);
9729 } else {
9730 MachineInstrBuilder BSWAP =
9731 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9732
9733 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9734 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9735 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9736 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9737 APInt::getSplat(Size, APInt(8, 0xF0)));
9738
9739 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9740 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9741 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9742 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9743 APInt::getSplat(Size, APInt(8, 0xCC)));
9744
9745 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9746 // 6|7
9747 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9748 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9749 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9750 }
9751 } else {
9752 // Expand bitreverse for types smaller than 8 bits.
9754 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9756 if (I < J) {
9757 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9758 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9759 } else {
9760 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9761 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9762 }
9763
9764 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9765 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9766 if (I == 0)
9767 Tmp = Tmp2;
9768 else
9769 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9770 }
9771 MIRBuilder.buildCopy(Dst, Tmp);
9772 }
9773
9774 MI.eraseFromParent();
9775 return Legalized;
9776}
9777
9780 MachineFunction &MF = MIRBuilder.getMF();
9781
9782 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9783 int NameOpIdx = IsRead ? 1 : 0;
9784 int ValRegIndex = IsRead ? 0 : 1;
9785
9786 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9787 const LLT Ty = MRI.getType(ValReg);
9788 const MDString *RegStr = cast<MDString>(
9789 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9790
9791 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9792 if (!PhysReg) {
9793 const Function &Fn = MF.getFunction();
9795 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9796 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9797 Fn, MI.getDebugLoc()));
9798 if (IsRead)
9799 MIRBuilder.buildUndef(ValReg);
9800
9801 MI.eraseFromParent();
9802 return Legalized;
9803 }
9804
9805 if (IsRead)
9806 MIRBuilder.buildCopy(ValReg, PhysReg);
9807 else
9808 MIRBuilder.buildCopy(PhysReg, ValReg);
9809
9810 MI.eraseFromParent();
9811 return Legalized;
9812}
9813
9816 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9817 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9818 Register Result = MI.getOperand(0).getReg();
9819 LLT OrigTy = MRI.getType(Result);
9820 auto SizeInBits = OrigTy.getScalarSizeInBits();
9821 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9822
9823 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9824 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9825 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9826 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9827
9828 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9829 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9830 MIRBuilder.buildTrunc(Result, Shifted);
9831
9832 MI.eraseFromParent();
9833 return Legalized;
9834}
9835
9838 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9839 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9840
9841 if (Mask == fcNone) {
9842 MIRBuilder.buildConstant(DstReg, 0);
9843 MI.eraseFromParent();
9844 return Legalized;
9845 }
9846 if (Mask == fcAllFlags) {
9847 MIRBuilder.buildConstant(DstReg, 1);
9848 MI.eraseFromParent();
9849 return Legalized;
9850 }
9851
9852 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9853 // version
9854
9855 unsigned BitSize = SrcTy.getScalarSizeInBits();
9856 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9857
9858 LLT IntTy = LLT::scalar(BitSize);
9859 if (SrcTy.isVector())
9860 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9861 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9862
9863 // Various masks.
9864 APInt SignBit = APInt::getSignMask(BitSize);
9865 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9866 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9867 APInt ExpMask = Inf;
9868 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9869 APInt QNaNBitMask =
9870 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9871 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9872
9873 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9874 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9875 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9876 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9877 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9878
9879 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9880 auto Sign =
9881 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9882
9883 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9884 // Clang doesn't support capture of structured bindings:
9885 LLT DstTyCopy = DstTy;
9886 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9887 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9888 };
9889
9890 // Tests that involve more than one class should be processed first.
9891 if ((Mask & fcFinite) == fcFinite) {
9892 // finite(V) ==> abs(V) u< exp_mask
9893 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9894 ExpMaskC));
9895 Mask &= ~fcFinite;
9896 } else if ((Mask & fcFinite) == fcPosFinite) {
9897 // finite(V) && V > 0 ==> V u< exp_mask
9898 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9899 ExpMaskC));
9900 Mask &= ~fcPosFinite;
9901 } else if ((Mask & fcFinite) == fcNegFinite) {
9902 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9903 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9904 ExpMaskC);
9905 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9906 appendToRes(And);
9907 Mask &= ~fcNegFinite;
9908 }
9909
9910 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9911 // fcZero | fcSubnormal => test all exponent bits are 0
9912 // TODO: Handle sign bit specific cases
9913 // TODO: Handle inverted case
9914 if (PartialCheck == (fcZero | fcSubnormal)) {
9915 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9916 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9917 ExpBits, ZeroC));
9918 Mask &= ~PartialCheck;
9919 }
9920 }
9921
9922 // Check for individual classes.
9923 if (FPClassTest PartialCheck = Mask & fcZero) {
9924 if (PartialCheck == fcPosZero)
9925 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9926 AsInt, ZeroC));
9927 else if (PartialCheck == fcZero)
9928 appendToRes(
9929 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9930 else // fcNegZero
9931 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9932 AsInt, SignBitC));
9933 }
9934
9935 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9936 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9937 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9938 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9939 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9940 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9941 auto SubnormalRes =
9942 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9943 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9944 if (PartialCheck == fcNegSubnormal)
9945 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9946 appendToRes(SubnormalRes);
9947 }
9948
9949 if (FPClassTest PartialCheck = Mask & fcInf) {
9950 if (PartialCheck == fcPosInf)
9951 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9952 AsInt, InfC));
9953 else if (PartialCheck == fcInf)
9954 appendToRes(
9955 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9956 else { // fcNegInf
9957 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9958 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9959 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9960 AsInt, NegInfC));
9961 }
9962 }
9963
9964 if (FPClassTest PartialCheck = Mask & fcNan) {
9965 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9966 if (PartialCheck == fcNan) {
9967 // isnan(V) ==> abs(V) u> int(inf)
9968 appendToRes(
9969 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9970 } else if (PartialCheck == fcQNan) {
9971 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9972 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9973 InfWithQnanBitC));
9974 } else { // fcSNan
9975 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9976 // abs(V) u< (unsigned(Inf) | quiet_bit)
9977 auto IsNan =
9978 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9979 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9980 Abs, InfWithQnanBitC);
9981 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9982 }
9983 }
9984
9985 if (FPClassTest PartialCheck = Mask & fcNormal) {
9986 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9987 // (max_exp-1))
9988 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9989 auto ExpMinusOne = MIRBuilder.buildSub(
9990 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9991 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9992 auto NormalRes =
9993 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9994 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9995 if (PartialCheck == fcNegNormal)
9996 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9997 else if (PartialCheck == fcPosNormal) {
9998 auto PosSign = MIRBuilder.buildXor(
9999 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10000 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10001 }
10002 appendToRes(NormalRes);
10003 }
10004
10005 MIRBuilder.buildCopy(DstReg, Res);
10006 MI.eraseFromParent();
10007 return Legalized;
10008}
10009
10011 // Implement G_SELECT in terms of XOR, AND, OR.
10012 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10013 MI.getFirst4RegLLTs();
10014
10015 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10016 if (IsEltPtr) {
10017 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10018 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10019 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10020 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10021 DstTy = NewTy;
10022 }
10023
10024 if (MaskTy.isScalar()) {
10025 // Turn the scalar condition into a vector condition mask if needed.
10026
10027 Register MaskElt = MaskReg;
10028
10029 // The condition was potentially zero extended before, but we want a sign
10030 // extended boolean.
10031 if (MaskTy != LLT::scalar(1))
10032 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10033
10034 // Continue the sign extension (or truncate) to match the data type.
10035 MaskElt =
10036 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10037
10038 if (DstTy.isVector()) {
10039 // Generate a vector splat idiom.
10040 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10041 MaskReg = ShufSplat.getReg(0);
10042 } else {
10043 MaskReg = MaskElt;
10044 }
10045 MaskTy = DstTy;
10046 } else if (!DstTy.isVector()) {
10047 // Cannot handle the case that mask is a vector and dst is a scalar.
10048 return UnableToLegalize;
10049 }
10050
10051 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10052 return UnableToLegalize;
10053 }
10054
10055 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10056 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10057 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10058 if (IsEltPtr) {
10059 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10060 MIRBuilder.buildIntToPtr(DstReg, Or);
10061 } else {
10062 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10063 }
10064 MI.eraseFromParent();
10065 return Legalized;
10066}
10067
10069 // Split DIVREM into individual instructions.
10070 unsigned Opcode = MI.getOpcode();
10071
10072 MIRBuilder.buildInstr(
10073 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10074 : TargetOpcode::G_UDIV,
10075 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10076 MIRBuilder.buildInstr(
10077 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10078 : TargetOpcode::G_UREM,
10079 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10080 MI.eraseFromParent();
10081 return Legalized;
10082}
10083
10086 // Expand %res = G_ABS %a into:
10087 // %v1 = G_ASHR %a, scalar_size-1
10088 // %v2 = G_ADD %a, %v1
10089 // %res = G_XOR %v2, %v1
10090 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10091 Register OpReg = MI.getOperand(1).getReg();
10092 auto ShiftAmt =
10093 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10094 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10095 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10096 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10097 MI.eraseFromParent();
10098 return Legalized;
10099}
10100
10103 // Expand %res = G_ABS %a into:
10104 // %v1 = G_CONSTANT 0
10105 // %v2 = G_SUB %v1, %a
10106 // %res = G_SMAX %a, %v2
10107 Register SrcReg = MI.getOperand(1).getReg();
10108 LLT Ty = MRI.getType(SrcReg);
10109 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10110 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10111 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10112 MI.eraseFromParent();
10113 return Legalized;
10114}
10115
10118 Register SrcReg = MI.getOperand(1).getReg();
10119 Register DestReg = MI.getOperand(0).getReg();
10120 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10121 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10122 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10123 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10124 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10125 MI.eraseFromParent();
10126 return Legalized;
10127}
10128
10131 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10132 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10133 "Expected G_ABDS or G_ABDU instruction");
10134
10135 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10136 LLT Ty = MRI.getType(LHS);
10137
10138 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10139 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10140 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10141 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10142 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10145 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10146 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10147
10148 MI.eraseFromParent();
10149 return Legalized;
10150}
10151
10154 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10155 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10156 "Expected G_ABDS or G_ABDU instruction");
10157
10158 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10159 LLT Ty = MRI.getType(LHS);
10160
10161 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10162 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10163 Register MaxReg, MinReg;
10164 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10165 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10166 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10167 } else {
10168 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10169 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10170 }
10171 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10172
10173 MI.eraseFromParent();
10174 return Legalized;
10175}
10176
10178 Register SrcReg = MI.getOperand(1).getReg();
10179 Register DstReg = MI.getOperand(0).getReg();
10180
10181 LLT Ty = MRI.getType(DstReg);
10182
10183 // Reset sign bit
10184 MIRBuilder.buildAnd(
10185 DstReg, SrcReg,
10186 MIRBuilder.buildConstant(
10187 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10188
10189 MI.eraseFromParent();
10190 return Legalized;
10191}
10192
10195 Register SrcReg = MI.getOperand(1).getReg();
10196 LLT SrcTy = MRI.getType(SrcReg);
10197 LLT DstTy = MRI.getType(SrcReg);
10198
10199 // The source could be a scalar if the IR type was <1 x sN>.
10200 if (SrcTy.isScalar()) {
10201 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10202 return UnableToLegalize; // FIXME: handle extension.
10203 // This can be just a plain copy.
10204 Observer.changingInstr(MI);
10205 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10206 Observer.changedInstr(MI);
10207 return Legalized;
10208 }
10209 return UnableToLegalize;
10210}
10211
10213 MachineFunction &MF = *MI.getMF();
10214 const DataLayout &DL = MIRBuilder.getDataLayout();
10215 LLVMContext &Ctx = MF.getFunction().getContext();
10216 Register ListPtr = MI.getOperand(1).getReg();
10217 LLT PtrTy = MRI.getType(ListPtr);
10218
10219 // LstPtr is a pointer to the head of the list. Get the address
10220 // of the head of the list.
10221 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10222 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10223 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10224 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10225
10226 const Align A(MI.getOperand(2).getImm());
10227 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10228 if (A > TLI.getMinStackArgumentAlignment()) {
10229 Register AlignAmt =
10230 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10231 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10232 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10233 VAList = AndDst.getReg(0);
10234 }
10235
10236 // Increment the pointer, VAList, to the next vaarg
10237 // The list should be bumped by the size of element in the current head of
10238 // list.
10239 Register Dst = MI.getOperand(0).getReg();
10240 LLT LLTTy = MRI.getType(Dst);
10241 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10242 auto IncAmt =
10243 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10244 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10245
10246 // Store the increment VAList to the legalized pointer
10248 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10249 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10250 // Load the actual argument out of the pointer VAList
10251 Align EltAlignment = DL.getABITypeAlign(Ty);
10252 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10253 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10254 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10255
10256 MI.eraseFromParent();
10257 return Legalized;
10258}
10259
10261 // On Darwin, -Os means optimize for size without hurting performance, so
10262 // only really optimize for size when -Oz (MinSize) is used.
10264 return MF.getFunction().hasMinSize();
10265 return MF.getFunction().hasOptSize();
10266}
10267
10268// Returns a list of types to use for memory op lowering in MemOps. A partial
10269// port of findOptimalMemOpLowering in TargetLowering.
10270static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10271 unsigned Limit, const MemOp &Op,
10272 unsigned DstAS, unsigned SrcAS,
10273 const AttributeList &FuncAttributes,
10274 const TargetLowering &TLI) {
10275 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10276 return false;
10277
10278 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10279
10280 if (Ty == LLT()) {
10281 // Use the largest scalar type whose alignment constraints are satisfied.
10282 // We only need to check DstAlign here as SrcAlign is always greater or
10283 // equal to DstAlign (or zero).
10284 Ty = LLT::scalar(64);
10285 if (Op.isFixedDstAlign())
10286 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10287 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10288 Ty = LLT::scalar(Ty.getSizeInBytes());
10289 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10290 // FIXME: check for the largest legal type we can load/store to.
10291 }
10292
10293 unsigned NumMemOps = 0;
10294 uint64_t Size = Op.size();
10295 while (Size) {
10296 unsigned TySize = Ty.getSizeInBytes();
10297 while (TySize > Size) {
10298 // For now, only use non-vector load / store's for the left-over pieces.
10299 LLT NewTy = Ty;
10300 // FIXME: check for mem op safety and legality of the types. Not all of
10301 // SDAGisms map cleanly to GISel concepts.
10302 if (NewTy.isVector())
10303 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10304 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10305 unsigned NewTySize = NewTy.getSizeInBytes();
10306 assert(NewTySize > 0 && "Could not find appropriate type");
10307
10308 // If the new LLT cannot cover all of the remaining bits, then consider
10309 // issuing a (or a pair of) unaligned and overlapping load / store.
10310 unsigned Fast;
10311 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10312 MVT VT = getMVTForLLT(Ty);
10313 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10315 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10317 Fast)
10318 TySize = Size;
10319 else {
10320 Ty = NewTy;
10321 TySize = NewTySize;
10322 }
10323 }
10324
10325 if (++NumMemOps > Limit)
10326 return false;
10327
10328 MemOps.push_back(Ty);
10329 Size -= TySize;
10330 }
10331
10332 return true;
10333}
10334
10335// Get a vectorized representation of the memset value operand, GISel edition.
10337 MachineRegisterInfo &MRI = *MIB.getMRI();
10338 unsigned NumBits = Ty.getScalarSizeInBits();
10339 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10340 if (!Ty.isVector() && ValVRegAndVal) {
10341 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10342 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10343 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10344 }
10345
10346 // Extend the byte value to the larger type, and then multiply by a magic
10347 // value 0x010101... in order to replicate it across every byte.
10348 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10349 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10350 return MIB.buildConstant(Ty, 0).getReg(0);
10351 }
10352
10353 LLT ExtType = Ty.getScalarType();
10354 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10355 if (NumBits > 8) {
10356 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10357 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10358 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10359 }
10360
10361 // For vector types create a G_BUILD_VECTOR.
10362 if (Ty.isVector())
10363 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10364
10365 return Val;
10366}
10367
10369LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10370 uint64_t KnownLen, Align Alignment,
10371 bool IsVolatile) {
10372 auto &MF = *MI.getParent()->getParent();
10373 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10374 auto &DL = MF.getDataLayout();
10375 LLVMContext &C = MF.getFunction().getContext();
10376
10377 assert(KnownLen != 0 && "Have a zero length memset length!");
10378
10379 bool DstAlignCanChange = false;
10380 MachineFrameInfo &MFI = MF.getFrameInfo();
10381 bool OptSize = shouldLowerMemFuncForSize(MF);
10382
10383 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10384 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10385 DstAlignCanChange = true;
10386
10387 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10388 std::vector<LLT> MemOps;
10389
10390 const auto &DstMMO = **MI.memoperands_begin();
10391 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10392
10393 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10394 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10395
10396 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10397 MemOp::Set(KnownLen, DstAlignCanChange,
10398 Alignment,
10399 /*IsZeroMemset=*/IsZeroVal,
10400 /*IsVolatile=*/IsVolatile),
10401 DstPtrInfo.getAddrSpace(), ~0u,
10402 MF.getFunction().getAttributes(), TLI))
10403 return UnableToLegalize;
10404
10405 if (DstAlignCanChange) {
10406 // Get an estimate of the type from the LLT.
10407 Type *IRTy = getTypeForLLT(MemOps[0], C);
10408 Align NewAlign = DL.getABITypeAlign(IRTy);
10409 if (NewAlign > Alignment) {
10410 Alignment = NewAlign;
10411 unsigned FI = FIDef->getOperand(1).getIndex();
10412 // Give the stack frame object a larger alignment if needed.
10413 if (MFI.getObjectAlign(FI) < Alignment)
10414 MFI.setObjectAlignment(FI, Alignment);
10415 }
10416 }
10417
10418 MachineIRBuilder MIB(MI);
10419 // Find the largest store and generate the bit pattern for it.
10420 LLT LargestTy = MemOps[0];
10421 for (unsigned i = 1; i < MemOps.size(); i++)
10422 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10423 LargestTy = MemOps[i];
10424
10425 // The memset stored value is always defined as an s8, so in order to make it
10426 // work with larger store types we need to repeat the bit pattern across the
10427 // wider type.
10428 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10429
10430 if (!MemSetValue)
10431 return UnableToLegalize;
10432
10433 // Generate the stores. For each store type in the list, we generate the
10434 // matching store of that type to the destination address.
10435 LLT PtrTy = MRI.getType(Dst);
10436 unsigned DstOff = 0;
10437 unsigned Size = KnownLen;
10438 for (unsigned I = 0; I < MemOps.size(); I++) {
10439 LLT Ty = MemOps[I];
10440 unsigned TySize = Ty.getSizeInBytes();
10441 if (TySize > Size) {
10442 // Issuing an unaligned load / store pair that overlaps with the previous
10443 // pair. Adjust the offset accordingly.
10444 assert(I == MemOps.size() - 1 && I != 0);
10445 DstOff -= TySize - Size;
10446 }
10447
10448 // If this store is smaller than the largest store see whether we can get
10449 // the smaller value for free with a truncate.
10450 Register Value = MemSetValue;
10451 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10452 MVT VT = getMVTForLLT(Ty);
10453 MVT LargestVT = getMVTForLLT(LargestTy);
10454 if (!LargestTy.isVector() && !Ty.isVector() &&
10455 TLI.isTruncateFree(LargestVT, VT))
10456 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10457 else
10458 Value = getMemsetValue(Val, Ty, MIB);
10459 if (!Value)
10460 return UnableToLegalize;
10461 }
10462
10463 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10464
10465 Register Ptr = Dst;
10466 if (DstOff != 0) {
10467 auto Offset =
10468 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10469 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10470 }
10471
10472 MIB.buildStore(Value, Ptr, *StoreMMO);
10473 DstOff += Ty.getSizeInBytes();
10474 Size -= TySize;
10475 }
10476
10477 MI.eraseFromParent();
10478 return Legalized;
10479}
10480
10482LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10483 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10484
10485 auto [Dst, Src, Len] = MI.getFirst3Regs();
10486
10487 const auto *MMOIt = MI.memoperands_begin();
10488 const MachineMemOperand *MemOp = *MMOIt;
10489 bool IsVolatile = MemOp->isVolatile();
10490
10491 // See if this is a constant length copy
10492 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10493 // FIXME: support dynamically sized G_MEMCPY_INLINE
10494 assert(LenVRegAndVal &&
10495 "inline memcpy with dynamic size is not yet supported");
10496 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10497 if (KnownLen == 0) {
10498 MI.eraseFromParent();
10499 return Legalized;
10500 }
10501
10502 const auto &DstMMO = **MI.memoperands_begin();
10503 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10504 Align DstAlign = DstMMO.getBaseAlign();
10505 Align SrcAlign = SrcMMO.getBaseAlign();
10506
10507 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10508 IsVolatile);
10509}
10510
10512LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10513 uint64_t KnownLen, Align DstAlign,
10514 Align SrcAlign, bool IsVolatile) {
10515 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10516 return lowerMemcpy(MI, Dst, Src, KnownLen,
10517 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10518 IsVolatile);
10519}
10520
10522LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10523 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10524 Align SrcAlign, bool IsVolatile) {
10525 auto &MF = *MI.getParent()->getParent();
10526 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10527 auto &DL = MF.getDataLayout();
10529
10530 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10531
10532 bool DstAlignCanChange = false;
10533 MachineFrameInfo &MFI = MF.getFrameInfo();
10534 Align Alignment = std::min(DstAlign, SrcAlign);
10535
10536 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10537 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10538 DstAlignCanChange = true;
10539
10540 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10541 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10542 // if the memcpy is in a tail call position.
10543
10544 std::vector<LLT> MemOps;
10545
10546 const auto &DstMMO = **MI.memoperands_begin();
10547 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10548 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10549 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10550
10552 MemOps, Limit,
10553 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10554 IsVolatile),
10555 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10556 MF.getFunction().getAttributes(), TLI))
10557 return UnableToLegalize;
10558
10559 if (DstAlignCanChange) {
10560 // Get an estimate of the type from the LLT.
10561 Type *IRTy = getTypeForLLT(MemOps[0], C);
10562 Align NewAlign = DL.getABITypeAlign(IRTy);
10563
10564 // Don't promote to an alignment that would require dynamic stack
10565 // realignment.
10567 if (!TRI->hasStackRealignment(MF))
10568 if (MaybeAlign StackAlign = DL.getStackAlignment())
10569 NewAlign = std::min(NewAlign, *StackAlign);
10570
10571 if (NewAlign > Alignment) {
10572 Alignment = NewAlign;
10573 unsigned FI = FIDef->getOperand(1).getIndex();
10574 // Give the stack frame object a larger alignment if needed.
10575 if (MFI.getObjectAlign(FI) < Alignment)
10576 MFI.setObjectAlignment(FI, Alignment);
10577 }
10578 }
10579
10580 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10581
10582 MachineIRBuilder MIB(MI);
10583 // Now we need to emit a pair of load and stores for each of the types we've
10584 // collected. I.e. for each type, generate a load from the source pointer of
10585 // that type width, and then generate a corresponding store to the dest buffer
10586 // of that value loaded. This can result in a sequence of loads and stores
10587 // mixed types, depending on what the target specifies as good types to use.
10588 unsigned CurrOffset = 0;
10589 unsigned Size = KnownLen;
10590 for (auto CopyTy : MemOps) {
10591 // Issuing an unaligned load / store pair that overlaps with the previous
10592 // pair. Adjust the offset accordingly.
10593 if (CopyTy.getSizeInBytes() > Size)
10594 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10595
10596 // Construct MMOs for the accesses.
10597 auto *LoadMMO =
10598 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10599 auto *StoreMMO =
10600 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10601
10602 // Create the load.
10603 Register LoadPtr = Src;
10605 if (CurrOffset != 0) {
10606 LLT SrcTy = MRI.getType(Src);
10607 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10608 .getReg(0);
10609 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10610 }
10611 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10612
10613 // Create the store.
10614 Register StorePtr = Dst;
10615 if (CurrOffset != 0) {
10616 LLT DstTy = MRI.getType(Dst);
10617 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10618 }
10619 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10620 CurrOffset += CopyTy.getSizeInBytes();
10621 Size -= CopyTy.getSizeInBytes();
10622 }
10623
10624 MI.eraseFromParent();
10625 return Legalized;
10626}
10627
10629LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10630 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10631 bool IsVolatile) {
10632 auto &MF = *MI.getParent()->getParent();
10633 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10634 auto &DL = MF.getDataLayout();
10635 LLVMContext &C = MF.getFunction().getContext();
10636
10637 assert(KnownLen != 0 && "Have a zero length memmove length!");
10638
10639 bool DstAlignCanChange = false;
10640 MachineFrameInfo &MFI = MF.getFrameInfo();
10641 bool OptSize = shouldLowerMemFuncForSize(MF);
10642 Align Alignment = std::min(DstAlign, SrcAlign);
10643
10644 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10645 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10646 DstAlignCanChange = true;
10647
10648 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10649 std::vector<LLT> MemOps;
10650
10651 const auto &DstMMO = **MI.memoperands_begin();
10652 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10653 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10654 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10655
10656 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10657 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10658 // same thing here.
10660 MemOps, Limit,
10661 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10662 /*IsVolatile*/ true),
10663 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10664 MF.getFunction().getAttributes(), TLI))
10665 return UnableToLegalize;
10666
10667 if (DstAlignCanChange) {
10668 // Get an estimate of the type from the LLT.
10669 Type *IRTy = getTypeForLLT(MemOps[0], C);
10670 Align NewAlign = DL.getABITypeAlign(IRTy);
10671
10672 // Don't promote to an alignment that would require dynamic stack
10673 // realignment.
10674 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10675 if (!TRI->hasStackRealignment(MF))
10676 if (MaybeAlign StackAlign = DL.getStackAlignment())
10677 NewAlign = std::min(NewAlign, *StackAlign);
10678
10679 if (NewAlign > Alignment) {
10680 Alignment = NewAlign;
10681 unsigned FI = FIDef->getOperand(1).getIndex();
10682 // Give the stack frame object a larger alignment if needed.
10683 if (MFI.getObjectAlign(FI) < Alignment)
10684 MFI.setObjectAlignment(FI, Alignment);
10685 }
10686 }
10687
10688 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10689
10690 MachineIRBuilder MIB(MI);
10691 // Memmove requires that we perform the loads first before issuing the stores.
10692 // Apart from that, this loop is pretty much doing the same thing as the
10693 // memcpy codegen function.
10694 unsigned CurrOffset = 0;
10695 SmallVector<Register, 16> LoadVals;
10696 for (auto CopyTy : MemOps) {
10697 // Construct MMO for the load.
10698 auto *LoadMMO =
10699 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10700
10701 // Create the load.
10702 Register LoadPtr = Src;
10703 if (CurrOffset != 0) {
10704 LLT SrcTy = MRI.getType(Src);
10705 auto Offset =
10706 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10707 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10708 }
10709 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10710 CurrOffset += CopyTy.getSizeInBytes();
10711 }
10712
10713 CurrOffset = 0;
10714 for (unsigned I = 0; I < MemOps.size(); ++I) {
10715 LLT CopyTy = MemOps[I];
10716 // Now store the values loaded.
10717 auto *StoreMMO =
10718 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10719
10720 Register StorePtr = Dst;
10721 if (CurrOffset != 0) {
10722 LLT DstTy = MRI.getType(Dst);
10723 auto Offset =
10724 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10725 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10726 }
10727 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10728 CurrOffset += CopyTy.getSizeInBytes();
10729 }
10730 MI.eraseFromParent();
10731 return Legalized;
10732}
10733
10736 const unsigned Opc = MI.getOpcode();
10737 // This combine is fairly complex so it's not written with a separate
10738 // matcher function.
10739 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10740 Opc == TargetOpcode::G_MEMSET) &&
10741 "Expected memcpy like instruction");
10742
10743 auto MMOIt = MI.memoperands_begin();
10744 const MachineMemOperand *MemOp = *MMOIt;
10745
10746 Align DstAlign = MemOp->getBaseAlign();
10747 Align SrcAlign;
10748 auto [Dst, Src, Len] = MI.getFirst3Regs();
10749
10750 if (Opc != TargetOpcode::G_MEMSET) {
10751 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10752 MemOp = *(++MMOIt);
10753 SrcAlign = MemOp->getBaseAlign();
10754 }
10755
10756 // See if this is a constant length copy
10757 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10758 if (!LenVRegAndVal)
10759 return UnableToLegalize;
10760 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10761
10762 if (KnownLen == 0) {
10763 MI.eraseFromParent();
10764 return Legalized;
10765 }
10766
10767 if (MaxLen && KnownLen > MaxLen)
10768 return UnableToLegalize;
10769
10770 bool IsVolatile = MemOp->isVolatile();
10771 if (Opc == TargetOpcode::G_MEMCPY) {
10772 auto &MF = *MI.getParent()->getParent();
10773 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10774 bool OptSize = shouldLowerMemFuncForSize(MF);
10775 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10776 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10777 IsVolatile);
10778 }
10779 if (Opc == TargetOpcode::G_MEMMOVE)
10780 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10781 if (Opc == TargetOpcode::G_MEMSET)
10782 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10783 return UnableToLegalize;
10784}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:316
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:613
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2032
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1621
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1188
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1276
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)