LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FMODF:
475 RTLIBCASE(MODF_F);
476 case TargetOpcode::G_FLOG10:
477 RTLIBCASE(LOG10_F);
478 case TargetOpcode::G_FLOG:
479 RTLIBCASE(LOG_F);
480 case TargetOpcode::G_FLOG2:
481 RTLIBCASE(LOG2_F);
482 case TargetOpcode::G_FLDEXP:
483 RTLIBCASE(LDEXP_F);
484 case TargetOpcode::G_FCEIL:
485 RTLIBCASE(CEIL_F);
486 case TargetOpcode::G_FFLOOR:
487 RTLIBCASE(FLOOR_F);
488 case TargetOpcode::G_FMINNUM:
489 RTLIBCASE(FMIN_F);
490 case TargetOpcode::G_FMAXNUM:
491 RTLIBCASE(FMAX_F);
492 case TargetOpcode::G_FMINIMUMNUM:
493 RTLIBCASE(FMINIMUM_NUM_F);
494 case TargetOpcode::G_FMAXIMUMNUM:
495 RTLIBCASE(FMAXIMUM_NUM_F);
496 case TargetOpcode::G_FSQRT:
497 RTLIBCASE(SQRT_F);
498 case TargetOpcode::G_FRINT:
499 RTLIBCASE(RINT_F);
500 case TargetOpcode::G_FNEARBYINT:
501 RTLIBCASE(NEARBYINT_F);
502 case TargetOpcode::G_INTRINSIC_TRUNC:
503 RTLIBCASE(TRUNC_F);
504 case TargetOpcode::G_INTRINSIC_ROUND:
505 RTLIBCASE(ROUND_F);
506 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
507 RTLIBCASE(ROUNDEVEN_F);
508 case TargetOpcode::G_INTRINSIC_LRINT:
509 RTLIBCASE(LRINT_F);
510 case TargetOpcode::G_INTRINSIC_LLRINT:
511 RTLIBCASE(LLRINT_F);
512 }
513 llvm_unreachable("Unknown libcall function");
514#undef RTLIBCASE_INT
515#undef RTLIBCASE
516}
517
518/// True if an instruction is in tail position in its caller. Intended for
519/// legalizing libcalls as tail calls when possible.
522 const TargetInstrInfo &TII,
524 MachineBasicBlock &MBB = *MI.getParent();
525 const Function &F = MBB.getParent()->getFunction();
526
527 // Conservatively require the attributes of the call to match those of
528 // the return. Ignore NoAlias and NonNull because they don't affect the
529 // call sequence.
530 AttributeList CallerAttrs = F.getAttributes();
531 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
532 .removeAttribute(Attribute::NoAlias)
533 .removeAttribute(Attribute::NonNull)
534 .hasAttributes())
535 return false;
536
537 // It's not safe to eliminate the sign / zero extension of the return value.
538 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
539 CallerAttrs.hasRetAttr(Attribute::SExt))
540 return false;
541
542 // Only tail call if the following instruction is a standard return or if we
543 // have a `thisreturn` callee, and a sequence like:
544 //
545 // G_MEMCPY %0, %1, %2
546 // $x0 = COPY %0
547 // RET_ReallyLR implicit $x0
548 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
549 if (Next != MBB.instr_end() && Next->isCopy()) {
550 if (MI.getOpcode() == TargetOpcode::G_BZERO)
551 return false;
552
553 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
554 // mempy/etc routines return the same parameter. For other it will be the
555 // returned value.
556 Register VReg = MI.getOperand(0).getReg();
557 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
558 return false;
559
560 Register PReg = Next->getOperand(0).getReg();
561 if (!PReg.isPhysical())
562 return false;
563
564 auto Ret = next_nodbg(Next, MBB.instr_end());
565 if (Ret == MBB.instr_end() || !Ret->isReturn())
566 return false;
567
568 if (Ret->getNumImplicitOperands() != 1)
569 return false;
570
571 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
572 return false;
573
574 // Skip over the COPY that we just validated.
575 Next = Ret;
576 }
577
578 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
579 return false;
580
581 return true;
582}
583
585llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
586 const CallLowering::ArgInfo &Result,
588 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
589 MachineInstr *MI) {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
633 const CallLowering::ArgInfo &Result,
635 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
636 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
637 const char *Name = TLI.getLibcallName(Libcall);
638 if (!Name)
640 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
641 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
642}
643
644// Useful for libcalls where all operands have the same type.
647 Type *OpType, LostDebugLocObserver &LocObserver) {
648 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
649
650 // FIXME: What does the original arg index mean here?
652 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
653 Args.push_back({MO.getReg(), OpType, 0});
654 return createLibcall(MIRBuilder, Libcall,
655 {MI.getOperand(0).getReg(), OpType, 0}, Args,
656 LocObserver, &MI);
657}
658
659LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
660 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
661 LostDebugLocObserver &LocObserver) {
662 MachineFunction &MF = *MI.getMF();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664
665 Register DstSin = MI.getOperand(0).getReg();
666 Register DstCos = MI.getOperand(1).getReg();
667 Register Src = MI.getOperand(2).getReg();
668 LLT DstTy = MRI.getType(DstSin);
669
670 int MemSize = DstTy.getSizeInBytes();
671 Align Alignment = getStackTemporaryAlignment(DstTy);
672 const DataLayout &DL = MIRBuilder.getDataLayout();
673 unsigned AddrSpace = DL.getAllocaAddrSpace();
674 MachinePointerInfo PtrInfo;
675
676 Register StackPtrSin =
677 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
678 .getReg(0);
679 Register StackPtrCos =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682
683 auto &Ctx = MF.getFunction().getContext();
684 auto LibcallResult =
686 {{0}, Type::getVoidTy(Ctx), 0},
687 {{Src, OpType, 0},
688 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
689 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
690 LocObserver, &MI);
691
692 if (LibcallResult != LegalizeResult::Legalized)
694
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
699
700 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
701 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
702 MI.eraseFromParent();
703
705}
706
708LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
709 unsigned Size, Type *OpType,
710 LostDebugLocObserver &LocObserver) {
711 MachineFunction &MF = MIRBuilder.getMF();
712 MachineRegisterInfo &MRI = MF.getRegInfo();
713
714 Register DstFrac = MI.getOperand(0).getReg();
715 Register DstInt = MI.getOperand(1).getReg();
716 Register Src = MI.getOperand(2).getReg();
717 LLT DstTy = MRI.getType(DstFrac);
718
719 int MemSize = DstTy.getSizeInBytes();
720 Align Alignment = getStackTemporaryAlignment(DstTy);
721 const DataLayout &DL = MIRBuilder.getDataLayout();
722 unsigned AddrSpace = DL.getAllocaAddrSpace();
723 MachinePointerInfo PtrInfo;
724
725 Register StackPtrInt =
726 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
727 .getReg(0);
728
729 auto &Ctx = MF.getFunction().getContext();
730 auto LibcallResult = createLibcall(
731 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
732 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
733 LocObserver, &MI);
734
735 if (LibcallResult != LegalizeResult::Legalized)
737
739 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
740
741 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
742 MI.eraseFromParent();
743
745}
746
749 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
750 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
751
753 // Add all the args, except for the last which is an imm denoting 'tail'.
754 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
755 Register Reg = MI.getOperand(i).getReg();
756
757 // Need derive an IR type for call lowering.
758 LLT OpLLT = MRI.getType(Reg);
759 Type *OpTy = nullptr;
760 if (OpLLT.isPointer())
761 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
762 else
763 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
764 Args.push_back({Reg, OpTy, 0});
765 }
766
767 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
768 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
769 RTLIB::Libcall RTLibcall;
770 unsigned Opc = MI.getOpcode();
771 const char *Name;
772 switch (Opc) {
773 case TargetOpcode::G_BZERO:
774 RTLibcall = RTLIB::BZERO;
775 Name = TLI.getLibcallName(RTLibcall);
776 break;
777 case TargetOpcode::G_MEMCPY:
778 RTLibcall = RTLIB::MEMCPY;
779 Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
780 Args[0].Flags[0].setReturned();
781 break;
782 case TargetOpcode::G_MEMMOVE:
783 RTLibcall = RTLIB::MEMMOVE;
784 Name = TLI.getLibcallName(RTLibcall);
785 Args[0].Flags[0].setReturned();
786 break;
787 case TargetOpcode::G_MEMSET:
788 RTLibcall = RTLIB::MEMSET;
789 Name = TLI.getLibcallName(RTLibcall);
790 Args[0].Flags[0].setReturned();
791 break;
792 default:
793 llvm_unreachable("unsupported opcode");
794 }
795
796 // Unsupported libcall on the target.
797 if (!Name) {
798 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
799 << MIRBuilder.getTII().getName(Opc) << "\n");
801 }
802
804 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
805 Info.Callee = MachineOperand::CreateES(Name);
806 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
807 Info.IsTailCall =
808 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
809 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
810
811 llvm::append_range(Info.OrigArgs, Args);
812 if (!CLI.lowerCall(MIRBuilder, Info))
814
815 if (Info.LoweredTailCall) {
816 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
817
818 // Check debug locations before removing the return.
819 LocObserver.checkpoint(true);
820
821 // We must have a return following the call (or debug insts) to get past
822 // isLibCallInTailPosition.
823 do {
824 MachineInstr *Next = MI.getNextNode();
825 assert(Next &&
826 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
827 "Expected instr following MI to be return or debug inst?");
828 // We lowered a tail call, so the call is now the return from the block.
829 // Delete the old return.
830 Next->eraseFromParent();
831 } while (MI.getNextNode());
832
833 // We expect to lose the debug location from the return.
834 LocObserver.checkpoint(false);
835 }
836
838}
839
840static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
841 unsigned Opc = MI.getOpcode();
842 auto &AtomicMI = cast<GMemOperation>(MI);
843 auto &MMO = AtomicMI.getMMO();
844 auto Ordering = MMO.getMergedOrdering();
845 LLT MemType = MMO.getMemoryType();
846 uint64_t MemSize = MemType.getSizeInBytes();
847 if (MemType.isVector())
848 return RTLIB::UNKNOWN_LIBCALL;
849
850#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
851#define LCALL5(A) \
852 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
856 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
857 return getOutlineAtomicHelper(LC, Ordering, MemSize);
858 }
859 case TargetOpcode::G_ATOMICRMW_XCHG: {
860 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
861 return getOutlineAtomicHelper(LC, Ordering, MemSize);
862 }
863 case TargetOpcode::G_ATOMICRMW_ADD:
864 case TargetOpcode::G_ATOMICRMW_SUB: {
865 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
866 return getOutlineAtomicHelper(LC, Ordering, MemSize);
867 }
868 case TargetOpcode::G_ATOMICRMW_AND: {
869 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
870 return getOutlineAtomicHelper(LC, Ordering, MemSize);
871 }
872 case TargetOpcode::G_ATOMICRMW_OR: {
873 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
874 return getOutlineAtomicHelper(LC, Ordering, MemSize);
875 }
876 case TargetOpcode::G_ATOMICRMW_XOR: {
877 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
878 return getOutlineAtomicHelper(LC, Ordering, MemSize);
879 }
880 default:
881 return RTLIB::UNKNOWN_LIBCALL;
882 }
883#undef LCALLS
884#undef LCALL5
885}
886
889 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
890
891 Type *RetTy;
892 SmallVector<Register> RetRegs;
894 unsigned Opc = MI.getOpcode();
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 LLT SuccessLLT;
900 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
901 MI.getFirst4RegLLTs();
902 RetRegs.push_back(Ret);
903 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
904 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
905 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
906 NewLLT) = MI.getFirst5RegLLTs();
907 RetRegs.push_back(Success);
908 RetTy = StructType::get(
909 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
910 }
911 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
912 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
913 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
914 break;
915 }
916 case TargetOpcode::G_ATOMICRMW_XCHG:
917 case TargetOpcode::G_ATOMICRMW_ADD:
918 case TargetOpcode::G_ATOMICRMW_SUB:
919 case TargetOpcode::G_ATOMICRMW_AND:
920 case TargetOpcode::G_ATOMICRMW_OR:
921 case TargetOpcode::G_ATOMICRMW_XOR: {
922 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
923 RetRegs.push_back(Ret);
924 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
925 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
926 Val =
927 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
928 .getReg(0);
929 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
930 Val =
931 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
932 .getReg(0);
933 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
934 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
935 break;
936 }
937 default:
938 llvm_unreachable("unsupported opcode");
939 }
940
941 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
942 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
943 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
944 const char *Name = TLI.getLibcallName(RTLibcall);
945
946 // Unsupported libcall on the target.
947 if (!Name) {
948 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
949 << MIRBuilder.getTII().getName(Opc) << "\n");
951 }
952
954 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
955 Info.Callee = MachineOperand::CreateES(Name);
956 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
957
958 llvm::append_range(Info.OrigArgs, Args);
959 if (!CLI.lowerCall(MIRBuilder, Info))
961
963}
964
965static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
966 Type *FromType) {
967 auto ToMVT = MVT::getVT(ToType);
968 auto FromMVT = MVT::getVT(FromType);
969
970 switch (Opcode) {
971 case TargetOpcode::G_FPEXT:
972 return RTLIB::getFPEXT(FromMVT, ToMVT);
973 case TargetOpcode::G_FPTRUNC:
974 return RTLIB::getFPROUND(FromMVT, ToMVT);
975 case TargetOpcode::G_FPTOSI:
976 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
977 case TargetOpcode::G_FPTOUI:
978 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
979 case TargetOpcode::G_SITOFP:
980 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
981 case TargetOpcode::G_UITOFP:
982 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
983 }
984 llvm_unreachable("Unsupported libcall function");
985}
986
989 Type *FromType, LostDebugLocObserver &LocObserver,
990 const TargetLowering &TLI, bool IsSigned = false) {
991 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
992 if (FromType->isIntegerTy()) {
993 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
994 Arg.Flags[0].setSExt();
995 else
996 Arg.Flags[0].setZExt();
997 }
998
999 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
1000 return createLibcall(MIRBuilder, Libcall,
1001 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1002 &MI);
1003}
1004
1005static RTLIB::Libcall
1007 RTLIB::Libcall RTLibcall;
1008 switch (MI.getOpcode()) {
1009 case TargetOpcode::G_GET_FPENV:
1010 RTLibcall = RTLIB::FEGETENV;
1011 break;
1012 case TargetOpcode::G_SET_FPENV:
1013 case TargetOpcode::G_RESET_FPENV:
1014 RTLibcall = RTLIB::FESETENV;
1015 break;
1016 case TargetOpcode::G_GET_FPMODE:
1017 RTLibcall = RTLIB::FEGETMODE;
1018 break;
1019 case TargetOpcode::G_SET_FPMODE:
1020 case TargetOpcode::G_RESET_FPMODE:
1021 RTLibcall = RTLIB::FESETMODE;
1022 break;
1023 default:
1024 llvm_unreachable("Unexpected opcode");
1025 }
1026 return RTLibcall;
1027}
1028
1029// Some library functions that read FP state (fegetmode, fegetenv) write the
1030// state into a region in memory. IR intrinsics that do the same operations
1031// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1032// intrinsics via the library functions, we need to use temporary variable,
1033// for example:
1034//
1035// %0:_(s32) = G_GET_FPMODE
1036//
1037// is transformed to:
1038//
1039// %1:_(p0) = G_FRAME_INDEX %stack.0
1040// BL &fegetmode
1041// %0:_(s32) = G_LOAD % 1
1042//
1044LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1046 LostDebugLocObserver &LocObserver) {
1047 const DataLayout &DL = MIRBuilder.getDataLayout();
1048 auto &MF = MIRBuilder.getMF();
1049 auto &MRI = *MIRBuilder.getMRI();
1050 auto &Ctx = MF.getFunction().getContext();
1051
1052 // Create temporary, where library function will put the read state.
1053 Register Dst = MI.getOperand(0).getReg();
1054 LLT StateTy = MRI.getType(Dst);
1055 TypeSize StateSize = StateTy.getSizeInBytes();
1056 Align TempAlign = getStackTemporaryAlignment(StateTy);
1057 MachinePointerInfo TempPtrInfo;
1058 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1059
1060 // Create a call to library function, with the temporary as an argument.
1061 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1062 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1063 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1064 auto Res =
1065 createLibcall(MIRBuilder, RTLibcall,
1066 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1068 LocObserver, nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1086 LostDebugLocObserver &LocObserver) {
1087 const DataLayout &DL = MIRBuilder.getDataLayout();
1088 auto &MF = MIRBuilder.getMF();
1089 auto &MRI = *MIRBuilder.getMRI();
1090 auto &Ctx = MF.getFunction().getContext();
1091
1092 // Create temporary, where library function will get the new state.
1093 Register Src = MI.getOperand(0).getReg();
1094 LLT StateTy = MRI.getType(Src);
1095 TypeSize StateSize = StateTy.getSizeInBytes();
1096 Align TempAlign = getStackTemporaryAlignment(StateTy);
1097 MachinePointerInfo TempPtrInfo;
1098 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1099
1100 // Put the new state into the temporary.
1101 MachineMemOperand *MMO = MF.getMachineMemOperand(
1102 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1103 MIRBuilder.buildStore(Src, Temp, *MMO);
1104
1105 // Create a call to library function, with the temporary as an argument.
1106 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1107 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1108 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1109 return createLibcall(MIRBuilder, RTLibcall,
1110 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1111 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1112 LocObserver, nullptr);
1113}
1114
1115/// Returns the corresponding libcall for the given Pred and
1116/// the ICMP predicate that should be generated to compare with #0
1117/// after the libcall.
1118static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1120#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1121 do { \
1122 switch (Size) { \
1123 case 32: \
1124 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1125 case 64: \
1126 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1127 case 128: \
1128 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1129 default: \
1130 llvm_unreachable("unexpected size"); \
1131 } \
1132 } while (0)
1133
1134 switch (Pred) {
1135 case CmpInst::FCMP_OEQ:
1137 case CmpInst::FCMP_UNE:
1139 case CmpInst::FCMP_OGE:
1141 case CmpInst::FCMP_OLT:
1143 case CmpInst::FCMP_OLE:
1145 case CmpInst::FCMP_OGT:
1147 case CmpInst::FCMP_UNO:
1149 default:
1150 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1151 }
1152}
1153
1155LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1157 LostDebugLocObserver &LocObserver) {
1158 auto &MF = MIRBuilder.getMF();
1159 auto &Ctx = MF.getFunction().getContext();
1160 const GFCmp *Cmp = cast<GFCmp>(&MI);
1161
1162 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1163 unsigned Size = OpLLT.getSizeInBits();
1164 if ((Size != 32 && Size != 64 && Size != 128) ||
1165 OpLLT != MRI.getType(Cmp->getRHSReg()))
1166 return UnableToLegalize;
1167
1168 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1169
1170 // DstReg type is s32
1171 const Register DstReg = Cmp->getReg(0);
1172 LLT DstTy = MRI.getType(DstReg);
1173 const auto Cond = Cmp->getCond();
1174
1175 // Reference:
1176 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1177 // Generates a libcall followed by ICMP.
1178 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1179 const CmpInst::Predicate ICmpPred,
1180 const DstOp &Res) -> Register {
1181 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1182 constexpr LLT TempLLT = LLT::scalar(32);
1183 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1184 // Generate libcall, holding result in Temp
1185 const auto Status = createLibcall(
1186 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1187 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1188 LocObserver, &MI);
1189 if (!Status)
1190 return {};
1191
1192 // Compare temp with #0 to get the final result.
1193 return MIRBuilder
1194 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1195 .getReg(0);
1196 };
1197
1198 // Simple case if we have a direct mapping from predicate to libcall
1199 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1200 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1201 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1202 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1203 return Legalized;
1204 }
1205 return UnableToLegalize;
1206 }
1207
1208 // No direct mapping found, should be generated as combination of libcalls.
1209
1210 switch (Cond) {
1211 case CmpInst::FCMP_UEQ: {
1212 // FCMP_UEQ: unordered or equal
1213 // Convert into (FCMP_OEQ || FCMP_UNO).
1214
1215 const auto [OeqLibcall, OeqPred] =
1217 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1218
1219 const auto [UnoLibcall, UnoPred] =
1221 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1222 if (Oeq && Uno)
1223 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1224 else
1225 return UnableToLegalize;
1226
1227 break;
1228 }
1229 case CmpInst::FCMP_ONE: {
1230 // FCMP_ONE: ordered and operands are unequal
1231 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1232
1233 // We inverse the predicate instead of generating a NOT
1234 // to save one instruction.
1235 // On AArch64 isel can even select two cmp into a single ccmp.
1236 const auto [OeqLibcall, OeqPred] =
1238 const auto NotOeq =
1239 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1240
1241 const auto [UnoLibcall, UnoPred] =
1243 const auto NotUno =
1244 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1245
1246 if (NotOeq && NotUno)
1247 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1248 else
1249 return UnableToLegalize;
1250
1251 break;
1252 }
1253 case CmpInst::FCMP_ULT:
1254 case CmpInst::FCMP_UGE:
1255 case CmpInst::FCMP_UGT:
1256 case CmpInst::FCMP_ULE:
1257 case CmpInst::FCMP_ORD: {
1258 // Convert into: !(inverse(Pred))
1259 // E.g. FCMP_ULT becomes !FCMP_OGE
1260 // This is equivalent to the following, but saves some instructions.
1261 // MIRBuilder.buildNot(
1262 // PredTy,
1263 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1264 // Op1, Op2));
1265 const auto [InversedLibcall, InversedPred] =
1267 if (!BuildLibcall(InversedLibcall,
1268 CmpInst::getInversePredicate(InversedPred), DstReg))
1269 return UnableToLegalize;
1270 break;
1271 }
1272 default:
1273 return UnableToLegalize;
1274 }
1275
1276 return Legalized;
1277}
1278
1279// The function is used to legalize operations that set default environment
1280// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1281// On most targets supported in glibc FE_DFL_MODE is defined as
1282// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1283// it is not true, the target must provide custom lowering.
1285LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1287 LostDebugLocObserver &LocObserver) {
1288 const DataLayout &DL = MIRBuilder.getDataLayout();
1289 auto &MF = MIRBuilder.getMF();
1290 auto &Ctx = MF.getFunction().getContext();
1291
1292 // Create an argument for the library function.
1293 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1294 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1295 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1296 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1297 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1298 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1299 MIRBuilder.buildIntToPtr(Dest, DefValue);
1300
1301 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1302 return createLibcall(MIRBuilder, RTLibcall,
1303 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1304 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1305 LocObserver, &MI);
1306}
1307
1310 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1311
1312 switch (MI.getOpcode()) {
1313 default:
1314 return UnableToLegalize;
1315 case TargetOpcode::G_MUL:
1316 case TargetOpcode::G_SDIV:
1317 case TargetOpcode::G_UDIV:
1318 case TargetOpcode::G_SREM:
1319 case TargetOpcode::G_UREM:
1320 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1321 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1322 unsigned Size = LLTy.getSizeInBits();
1323 Type *HLTy = IntegerType::get(Ctx, Size);
1324 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1325 if (Status != Legalized)
1326 return Status;
1327 break;
1328 }
1329 case TargetOpcode::G_FADD:
1330 case TargetOpcode::G_FSUB:
1331 case TargetOpcode::G_FMUL:
1332 case TargetOpcode::G_FDIV:
1333 case TargetOpcode::G_FMA:
1334 case TargetOpcode::G_FPOW:
1335 case TargetOpcode::G_FREM:
1336 case TargetOpcode::G_FCOS:
1337 case TargetOpcode::G_FSIN:
1338 case TargetOpcode::G_FTAN:
1339 case TargetOpcode::G_FACOS:
1340 case TargetOpcode::G_FASIN:
1341 case TargetOpcode::G_FATAN:
1342 case TargetOpcode::G_FATAN2:
1343 case TargetOpcode::G_FCOSH:
1344 case TargetOpcode::G_FSINH:
1345 case TargetOpcode::G_FTANH:
1346 case TargetOpcode::G_FLOG10:
1347 case TargetOpcode::G_FLOG:
1348 case TargetOpcode::G_FLOG2:
1349 case TargetOpcode::G_FEXP:
1350 case TargetOpcode::G_FEXP2:
1351 case TargetOpcode::G_FEXP10:
1352 case TargetOpcode::G_FCEIL:
1353 case TargetOpcode::G_FFLOOR:
1354 case TargetOpcode::G_FMINNUM:
1355 case TargetOpcode::G_FMAXNUM:
1356 case TargetOpcode::G_FMINIMUMNUM:
1357 case TargetOpcode::G_FMAXIMUMNUM:
1358 case TargetOpcode::G_FSQRT:
1359 case TargetOpcode::G_FRINT:
1360 case TargetOpcode::G_FNEARBYINT:
1361 case TargetOpcode::G_INTRINSIC_TRUNC:
1362 case TargetOpcode::G_INTRINSIC_ROUND:
1363 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1364 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1365 unsigned Size = LLTy.getSizeInBits();
1366 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1372 if (Status != Legalized)
1373 return Status;
1374 break;
1375 }
1376 case TargetOpcode::G_FSINCOS: {
1377 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1378 unsigned Size = LLTy.getSizeInBits();
1379 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1380 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1381 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1382 return UnableToLegalize;
1383 }
1384 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1385 }
1386 case TargetOpcode::G_FMODF: {
1387 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1388 unsigned Size = LLTy.getSizeInBits();
1389 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1390 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1391 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1392 return UnableToLegalize;
1393 }
1394 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1395 }
1396 case TargetOpcode::G_LROUND:
1397 case TargetOpcode::G_LLROUND:
1398 case TargetOpcode::G_INTRINSIC_LRINT:
1399 case TargetOpcode::G_INTRINSIC_LLRINT: {
1400 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1401 unsigned Size = LLTy.getSizeInBits();
1402 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1403 Type *ITy = IntegerType::get(
1404 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1405 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1406 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1407 return UnableToLegalize;
1408 }
1409 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1411 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1412 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1413 if (Status != Legalized)
1414 return Status;
1415 MI.eraseFromParent();
1416 return Legalized;
1417 }
1418 case TargetOpcode::G_FPOWI:
1419 case TargetOpcode::G_FLDEXP: {
1420 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1421 unsigned Size = LLTy.getSizeInBits();
1422 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1423 Type *ITy = IntegerType::get(
1424 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1425 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1426 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1427 return UnableToLegalize;
1428 }
1429 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1431 {MI.getOperand(1).getReg(), HLTy, 0},
1432 {MI.getOperand(2).getReg(), ITy, 1}};
1433 Args[1].Flags[0].setSExt();
1435 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1436 Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1448 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1484 LocObserver, TLI, IsSigned);
1485 if (Status != Legalized)
1486 return Status;
1487 break;
1488 }
1489 case TargetOpcode::G_ATOMICRMW_XCHG:
1490 case TargetOpcode::G_ATOMICRMW_ADD:
1491 case TargetOpcode::G_ATOMICRMW_SUB:
1492 case TargetOpcode::G_ATOMICRMW_AND:
1493 case TargetOpcode::G_ATOMICRMW_OR:
1494 case TargetOpcode::G_ATOMICRMW_XOR:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG:
1496 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1498 if (Status != Legalized)
1499 return Status;
1500 break;
1501 }
1502 case TargetOpcode::G_BZERO:
1503 case TargetOpcode::G_MEMCPY:
1504 case TargetOpcode::G_MEMMOVE:
1505 case TargetOpcode::G_MEMSET: {
1506 LegalizeResult Result =
1507 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1508 if (Result != Legalized)
1509 return Result;
1510 MI.eraseFromParent();
1511 return Result;
1512 }
1513 case TargetOpcode::G_GET_FPENV:
1514 case TargetOpcode::G_GET_FPMODE: {
1515 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_SET_FPENV:
1521 case TargetOpcode::G_SET_FPMODE: {
1522 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 case TargetOpcode::G_RESET_FPENV:
1528 case TargetOpcode::G_RESET_FPMODE: {
1529 LegalizeResult Result =
1530 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1531 if (Result != Legalized)
1532 return Result;
1533 break;
1534 }
1535 }
1536
1537 MI.eraseFromParent();
1538 return Legalized;
1539}
1540
1542 unsigned TypeIdx,
1543 LLT NarrowTy) {
1544 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1545 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 return UnableToLegalize;
1550 case TargetOpcode::G_IMPLICIT_DEF: {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 LLT DstTy = MRI.getType(DstReg);
1553
1554 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1555 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1556 // FIXME: Although this would also be legal for the general case, it causes
1557 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1558 // combines not being hit). This seems to be a problem related to the
1559 // artifact combiner.
1560 if (SizeOp0 % NarrowSize != 0) {
1561 LLT ImplicitTy = NarrowTy;
1562 if (DstTy.isVector())
1563 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1564
1565 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1566 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1567
1568 MI.eraseFromParent();
1569 return Legalized;
1570 }
1571
1572 int NumParts = SizeOp0 / NarrowSize;
1573
1575 for (int i = 0; i < NumParts; ++i)
1576 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1577
1578 if (DstTy.isVector())
1579 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1580 else
1581 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_CONSTANT: {
1586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1587 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1588 unsigned TotalSize = Ty.getSizeInBits();
1589 unsigned NarrowSize = NarrowTy.getSizeInBits();
1590 int NumParts = TotalSize / NarrowSize;
1591
1592 SmallVector<Register, 4> PartRegs;
1593 for (int I = 0; I != NumParts; ++I) {
1594 unsigned Offset = I * NarrowSize;
1595 auto K = MIRBuilder.buildConstant(NarrowTy,
1596 Val.lshr(Offset).trunc(NarrowSize));
1597 PartRegs.push_back(K.getReg(0));
1598 }
1599
1600 LLT LeftoverTy;
1601 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1602 SmallVector<Register, 1> LeftoverRegs;
1603 if (LeftoverBits != 0) {
1604 LeftoverTy = LLT::scalar(LeftoverBits);
1605 auto K = MIRBuilder.buildConstant(
1606 LeftoverTy,
1607 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1608 LeftoverRegs.push_back(K.getReg(0));
1609 }
1610
1611 insertParts(MI.getOperand(0).getReg(),
1612 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1613
1614 MI.eraseFromParent();
1615 return Legalized;
1616 }
1617 case TargetOpcode::G_SEXT:
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_ANYEXT:
1620 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1621 case TargetOpcode::G_TRUNC: {
1622 if (TypeIdx != 1)
1623 return UnableToLegalize;
1624
1625 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1626 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1627 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1628 return UnableToLegalize;
1629 }
1630
1631 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1632 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1633 MI.eraseFromParent();
1634 return Legalized;
1635 }
1636 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1637 case TargetOpcode::G_FREEZE: {
1638 if (TypeIdx != 0)
1639 return UnableToLegalize;
1640
1641 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1642 // Should widen scalar first
1643 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1644 return UnableToLegalize;
1645
1646 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1648 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1649 Parts.push_back(
1650 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1651 .getReg(0));
1652 }
1653
1654 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1655 MI.eraseFromParent();
1656 return Legalized;
1657 }
1658 case TargetOpcode::G_ADD:
1659 case TargetOpcode::G_SUB:
1660 case TargetOpcode::G_SADDO:
1661 case TargetOpcode::G_SSUBO:
1662 case TargetOpcode::G_SADDE:
1663 case TargetOpcode::G_SSUBE:
1664 case TargetOpcode::G_UADDO:
1665 case TargetOpcode::G_USUBO:
1666 case TargetOpcode::G_UADDE:
1667 case TargetOpcode::G_USUBE:
1668 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_MUL:
1670 case TargetOpcode::G_UMULH:
1671 return narrowScalarMul(MI, NarrowTy);
1672 case TargetOpcode::G_EXTRACT:
1673 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1674 case TargetOpcode::G_INSERT:
1675 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1676 case TargetOpcode::G_LOAD: {
1677 auto &LoadMI = cast<GLoad>(MI);
1678 Register DstReg = LoadMI.getDstReg();
1679 LLT DstTy = MRI.getType(DstReg);
1680 if (DstTy.isVector())
1681 return UnableToLegalize;
1682
1683 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1684 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1685 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1686 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1687 LoadMI.eraseFromParent();
1688 return Legalized;
1689 }
1690
1691 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1692 }
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_SEXTLOAD: {
1695 auto &LoadMI = cast<GExtLoad>(MI);
1696 Register DstReg = LoadMI.getDstReg();
1697 Register PtrReg = LoadMI.getPointerReg();
1698
1699 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1700 auto &MMO = LoadMI.getMMO();
1701 unsigned MemSize = MMO.getSizeInBits().getValue();
1702
1703 if (MemSize == NarrowSize) {
1704 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1705 } else if (MemSize < NarrowSize) {
1706 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1707 } else if (MemSize > NarrowSize) {
1708 // FIXME: Need to split the load.
1709 return UnableToLegalize;
1710 }
1711
1712 if (isa<GZExtLoad>(LoadMI))
1713 MIRBuilder.buildZExt(DstReg, TmpReg);
1714 else
1715 MIRBuilder.buildSExt(DstReg, TmpReg);
1716
1717 LoadMI.eraseFromParent();
1718 return Legalized;
1719 }
1720 case TargetOpcode::G_STORE: {
1721 auto &StoreMI = cast<GStore>(MI);
1722
1723 Register SrcReg = StoreMI.getValueReg();
1724 LLT SrcTy = MRI.getType(SrcReg);
1725 if (SrcTy.isVector())
1726 return UnableToLegalize;
1727
1728 int NumParts = SizeOp0 / NarrowSize;
1729 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1730 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1731 if (SrcTy.isVector() && LeftoverBits != 0)
1732 return UnableToLegalize;
1733
1734 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1735 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1736 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1737 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1738 StoreMI.eraseFromParent();
1739 return Legalized;
1740 }
1741
1742 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1743 }
1744 case TargetOpcode::G_SELECT:
1745 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1746 case TargetOpcode::G_AND:
1747 case TargetOpcode::G_OR:
1748 case TargetOpcode::G_XOR: {
1749 // Legalize bitwise operation:
1750 // A = BinOp<Ty> B, C
1751 // into:
1752 // B1, ..., BN = G_UNMERGE_VALUES B
1753 // C1, ..., CN = G_UNMERGE_VALUES C
1754 // A1 = BinOp<Ty/N> B1, C2
1755 // ...
1756 // AN = BinOp<Ty/N> BN, CN
1757 // A = G_MERGE_VALUES A1, ..., AN
1758 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1759 }
1760 case TargetOpcode::G_SHL:
1761 case TargetOpcode::G_LSHR:
1762 case TargetOpcode::G_ASHR:
1763 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTLZ:
1765 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1766 case TargetOpcode::G_CTTZ:
1767 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1768 case TargetOpcode::G_CTPOP:
1769 if (TypeIdx == 1)
1770 switch (MI.getOpcode()) {
1771 case TargetOpcode::G_CTLZ:
1772 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1773 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTTZ:
1775 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1776 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1777 case TargetOpcode::G_CTPOP:
1778 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1779 default:
1780 return UnableToLegalize;
1781 }
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_INTTOPTR:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarSrc(MI, NarrowTy, 1);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PTRTOINT:
1796 if (TypeIdx != 0)
1797 return UnableToLegalize;
1798
1799 Observer.changingInstr(MI);
1800 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 case TargetOpcode::G_PHI: {
1804 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1805 // NarrowSize.
1806 if (SizeOp0 % NarrowSize != 0)
1807 return UnableToLegalize;
1808
1809 unsigned NumParts = SizeOp0 / NarrowSize;
1810 SmallVector<Register, 2> DstRegs(NumParts);
1811 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1812 Observer.changingInstr(MI);
1813 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1814 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1815 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1816 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1817 SrcRegs[i / 2], MIRBuilder, MRI);
1818 }
1819 MachineBasicBlock &MBB = *MI.getParent();
1820 MIRBuilder.setInsertPt(MBB, MI);
1821 for (unsigned i = 0; i < NumParts; ++i) {
1822 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1824 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1825 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1826 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1827 }
1828 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1829 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1830 Observer.changedInstr(MI);
1831 MI.eraseFromParent();
1832 return Legalized;
1833 }
1834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1835 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 if (TypeIdx != 2)
1837 return UnableToLegalize;
1838
1839 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1840 Observer.changingInstr(MI);
1841 narrowScalarSrc(MI, NarrowTy, OpIdx);
1842 Observer.changedInstr(MI);
1843 return Legalized;
1844 }
1845 case TargetOpcode::G_ICMP: {
1846 Register LHS = MI.getOperand(2).getReg();
1847 LLT SrcTy = MRI.getType(LHS);
1848 CmpInst::Predicate Pred =
1849 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1850
1851 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1852 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1853 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1854 LHSLeftoverRegs, MIRBuilder, MRI))
1855 return UnableToLegalize;
1856
1857 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1858 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1859 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1860 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1861 return UnableToLegalize;
1862
1863 // We now have the LHS and RHS of the compare split into narrow-type
1864 // registers, plus potentially some leftover type.
1865 Register Dst = MI.getOperand(0).getReg();
1866 LLT ResTy = MRI.getType(Dst);
1867 if (ICmpInst::isEquality(Pred)) {
1868 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1869 // them together. For each equal part, the result should be all 0s. For
1870 // each non-equal part, we'll get at least one 1.
1871 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1873 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1874 auto LHS = std::get<0>(LHSAndRHS);
1875 auto RHS = std::get<1>(LHSAndRHS);
1876 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1877 Xors.push_back(Xor);
1878 }
1879
1880 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1881 // to the desired narrow type so that we can OR them together later.
1882 SmallVector<Register, 4> WidenedXors;
1883 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1884 auto LHS = std::get<0>(LHSAndRHS);
1885 auto RHS = std::get<1>(LHSAndRHS);
1886 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1887 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1888 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1889 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1890 llvm::append_range(Xors, WidenedXors);
1891 }
1892
1893 // Now, for each part we broke up, we know if they are equal/not equal
1894 // based off the G_XOR. We can OR these all together and compare against
1895 // 0 to get the result.
1896 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1897 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1898 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1899 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1900 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1901 } else {
1902 Register CmpIn;
1903 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1904 Register CmpOut;
1905 CmpInst::Predicate PartPred;
1906
1907 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1908 PartPred = Pred;
1909 CmpOut = Dst;
1910 } else {
1911 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1912 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 }
1914
1915 if (!CmpIn) {
1916 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 } else {
1919 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1920 RHSPartRegs[I]);
1921 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1922 LHSPartRegs[I], RHSPartRegs[I]);
1923 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1924 }
1925
1926 CmpIn = CmpOut;
1927 }
1928
1929 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1930 Register CmpOut;
1931 CmpInst::Predicate PartPred;
1932
1933 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1934 PartPred = Pred;
1935 CmpOut = Dst;
1936 } else {
1937 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1938 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 }
1940
1941 if (!CmpIn) {
1942 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 } else {
1945 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1946 RHSLeftoverRegs[I]);
1947 auto CmpEq =
1948 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1949 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1950 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 }
1952
1953 CmpIn = CmpOut;
1954 }
1955 }
1956 MI.eraseFromParent();
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FCMP:
1960 if (TypeIdx != 0)
1961 return UnableToLegalize;
1962
1963 Observer.changingInstr(MI);
1964 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1965 Observer.changedInstr(MI);
1966 return Legalized;
1967
1968 case TargetOpcode::G_SEXT_INREG: {
1969 if (TypeIdx != 0)
1970 return UnableToLegalize;
1971
1972 int64_t SizeInBits = MI.getOperand(2).getImm();
1973
1974 // So long as the new type has more bits than the bits we're extending we
1975 // don't need to break it apart.
1976 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1977 Observer.changingInstr(MI);
1978 // We don't lose any non-extension bits by truncating the src and
1979 // sign-extending the dst.
1980 MachineOperand &MO1 = MI.getOperand(1);
1981 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1982 MO1.setReg(TruncMIB.getReg(0));
1983
1984 MachineOperand &MO2 = MI.getOperand(0);
1985 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1987 MIRBuilder.buildSExt(MO2, DstExt);
1988 MO2.setReg(DstExt);
1989 Observer.changedInstr(MI);
1990 return Legalized;
1991 }
1992
1993 // Break it apart. Components below the extension point are unmodified. The
1994 // component containing the extension point becomes a narrower SEXT_INREG.
1995 // Components above it are ashr'd from the component containing the
1996 // extension point.
1997 if (SizeOp0 % NarrowSize != 0)
1998 return UnableToLegalize;
1999 int NumParts = SizeOp0 / NarrowSize;
2000
2001 // List the registers where the destination will be scattered.
2003 // List the registers where the source will be split.
2005
2006 // Create all the temporary registers.
2007 for (int i = 0; i < NumParts; ++i) {
2008 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2009
2010 SrcRegs.push_back(SrcReg);
2011 }
2012
2013 // Explode the big arguments into smaller chunks.
2014 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2015
2016 Register AshrCstReg =
2017 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2018 .getReg(0);
2019 Register FullExtensionReg;
2020 Register PartialExtensionReg;
2021
2022 // Do the operation on each small part.
2023 for (int i = 0; i < NumParts; ++i) {
2024 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2025 DstRegs.push_back(SrcRegs[i]);
2026 PartialExtensionReg = DstRegs.back();
2027 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2028 assert(PartialExtensionReg &&
2029 "Expected to visit partial extension before full");
2030 if (FullExtensionReg) {
2031 DstRegs.push_back(FullExtensionReg);
2032 continue;
2033 }
2034 DstRegs.push_back(
2035 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2036 .getReg(0));
2037 FullExtensionReg = DstRegs.back();
2038 } else {
2039 DstRegs.push_back(
2041 .buildInstr(
2042 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2043 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2044 .getReg(0));
2045 PartialExtensionReg = DstRegs.back();
2046 }
2047 }
2048
2049 // Gather the destination registers into the final destination.
2050 Register DstReg = MI.getOperand(0).getReg();
2051 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2052 MI.eraseFromParent();
2053 return Legalized;
2054 }
2055 case TargetOpcode::G_BSWAP:
2056 case TargetOpcode::G_BITREVERSE: {
2057 if (SizeOp0 % NarrowSize != 0)
2058 return UnableToLegalize;
2059
2060 Observer.changingInstr(MI);
2061 SmallVector<Register, 2> SrcRegs, DstRegs;
2062 unsigned NumParts = SizeOp0 / NarrowSize;
2063 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2064 MIRBuilder, MRI);
2065
2066 for (unsigned i = 0; i < NumParts; ++i) {
2067 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2068 {SrcRegs[NumParts - 1 - i]});
2069 DstRegs.push_back(DstPart.getReg(0));
2070 }
2071
2072 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2073
2074 Observer.changedInstr(MI);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 case TargetOpcode::G_PTRMASK: {
2080 if (TypeIdx != 1)
2081 return UnableToLegalize;
2082 Observer.changingInstr(MI);
2083 narrowScalarSrc(MI, NarrowTy, 2);
2084 Observer.changedInstr(MI);
2085 return Legalized;
2086 }
2087 case TargetOpcode::G_FPTOUI:
2088 case TargetOpcode::G_FPTOSI:
2089 case TargetOpcode::G_FPTOUI_SAT:
2090 case TargetOpcode::G_FPTOSI_SAT:
2091 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2092 case TargetOpcode::G_FPEXT:
2093 if (TypeIdx != 0)
2094 return UnableToLegalize;
2095 Observer.changingInstr(MI);
2096 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2097 Observer.changedInstr(MI);
2098 return Legalized;
2099 case TargetOpcode::G_FLDEXP:
2100 case TargetOpcode::G_STRICT_FLDEXP:
2101 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2102 case TargetOpcode::G_VSCALE: {
2103 Register Dst = MI.getOperand(0).getReg();
2104 LLT Ty = MRI.getType(Dst);
2105
2106 // Assume VSCALE(1) fits into a legal integer
2107 const APInt One(NarrowTy.getSizeInBits(), 1);
2108 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2109 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2110 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2111 MIRBuilder.buildMul(Dst, ZExt, C);
2112
2113 MI.eraseFromParent();
2114 return Legalized;
2115 }
2116 }
2117}
2118
2120 LLT Ty = MRI.getType(Val);
2121 if (Ty.isScalar())
2122 return Val;
2123
2124 const DataLayout &DL = MIRBuilder.getDataLayout();
2125 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2126 if (Ty.isPointer()) {
2127 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2128 return Register();
2129 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2130 }
2131
2132 Register NewVal = Val;
2133
2134 assert(Ty.isVector());
2135 if (Ty.isPointerVector())
2136 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2137 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138}
2139
2141 unsigned OpIdx, unsigned ExtOpcode) {
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2144 MO.setReg(ExtB.getReg(0));
2145}
2146
2148 unsigned OpIdx) {
2149 MachineOperand &MO = MI.getOperand(OpIdx);
2150 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2151 MO.setReg(ExtB.getReg(0));
2152}
2153
2155 unsigned OpIdx, unsigned TruncOpcode) {
2156 MachineOperand &MO = MI.getOperand(OpIdx);
2157 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2158 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2159 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2160 MO.setReg(DstExt);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2169 MO.setReg(DstTrunc);
2170}
2171
2173 unsigned OpIdx) {
2174 MachineOperand &MO = MI.getOperand(OpIdx);
2175 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2176 Register Dst = MO.getReg();
2177 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178 MO.setReg(DstExt);
2179 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2180}
2181
2183 unsigned OpIdx) {
2184 MachineOperand &MO = MI.getOperand(OpIdx);
2185 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2186}
2187
2189 MachineOperand &Op = MI.getOperand(OpIdx);
2190 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2191}
2192
2194 MachineOperand &MO = MI.getOperand(OpIdx);
2195 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2196 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2197 MIRBuilder.buildBitcast(MO, CastDst);
2198 MO.setReg(CastDst);
2199}
2200
2202LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 1)
2205 return UnableToLegalize;
2206
2207 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2208 if (DstTy.isVector())
2209 return UnableToLegalize;
2210
2211 LLT SrcTy = MRI.getType(Src1Reg);
2212 const int DstSize = DstTy.getSizeInBits();
2213 const int SrcSize = SrcTy.getSizeInBits();
2214 const int WideSize = WideTy.getSizeInBits();
2215 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2216
2217 unsigned NumOps = MI.getNumOperands();
2218 unsigned NumSrc = MI.getNumOperands() - 1;
2219 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2220
2221 if (WideSize >= DstSize) {
2222 // Directly pack the bits in the target type.
2223 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2224
2225 for (unsigned I = 2; I != NumOps; ++I) {
2226 const unsigned Offset = (I - 1) * PartSize;
2227
2228 Register SrcReg = MI.getOperand(I).getReg();
2229 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2230
2231 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2232
2233 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2234 MRI.createGenericVirtualRegister(WideTy);
2235
2236 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2237 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2238 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2239 ResultReg = NextResult;
2240 }
2241
2242 if (WideSize > DstSize)
2243 MIRBuilder.buildTrunc(DstReg, ResultReg);
2244 else if (DstTy.isPointer())
2245 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2246
2247 MI.eraseFromParent();
2248 return Legalized;
2249 }
2250
2251 // Unmerge the original values to the GCD type, and recombine to the next
2252 // multiple greater than the original type.
2253 //
2254 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2255 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2256 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2257 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2258 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2259 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2260 // %12:_(s12) = G_MERGE_VALUES %10, %11
2261 //
2262 // Padding with undef if necessary:
2263 //
2264 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2265 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2266 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2267 // %7:_(s2) = G_IMPLICIT_DEF
2268 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2269 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2270 // %10:_(s12) = G_MERGE_VALUES %8, %9
2271
2272 const int GCD = std::gcd(SrcSize, WideSize);
2273 LLT GCDTy = LLT::scalar(GCD);
2274
2275 SmallVector<Register, 8> NewMergeRegs;
2276 SmallVector<Register, 8> Unmerges;
2277 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2278
2279 // Decompose the original operands if they don't evenly divide.
2280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2281 Register SrcReg = MO.getReg();
2282 if (GCD == SrcSize) {
2283 Unmerges.push_back(SrcReg);
2284 } else {
2285 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2286 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2287 Unmerges.push_back(Unmerge.getReg(J));
2288 }
2289 }
2290
2291 // Pad with undef to the next size that is a multiple of the requested size.
2292 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2293 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2294 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2295 Unmerges.push_back(UndefReg);
2296 }
2297
2298 const int PartsPerGCD = WideSize / GCD;
2299
2300 // Build merges of each piece.
2301 ArrayRef<Register> Slicer(Unmerges);
2302 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2303 auto Merge =
2304 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2305 NewMergeRegs.push_back(Merge.getReg(0));
2306 }
2307
2308 // A truncate may be necessary if the requested type doesn't evenly divide the
2309 // original result type.
2310 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2311 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2312 } else {
2313 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2314 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2315 }
2316
2317 MI.eraseFromParent();
2318 return Legalized;
2319}
2320
2322LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2323 LLT WideTy) {
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 int NumDst = MI.getNumOperands() - 1;
2328 Register SrcReg = MI.getOperand(NumDst).getReg();
2329 LLT SrcTy = MRI.getType(SrcReg);
2330 if (SrcTy.isVector())
2331 return UnableToLegalize;
2332
2333 Register Dst0Reg = MI.getOperand(0).getReg();
2334 LLT DstTy = MRI.getType(Dst0Reg);
2335 if (!DstTy.isScalar())
2336 return UnableToLegalize;
2337
2338 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2339 if (SrcTy.isPointer()) {
2340 const DataLayout &DL = MIRBuilder.getDataLayout();
2341 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2342 LLVM_DEBUG(
2343 dbgs() << "Not casting non-integral address space integer\n");
2344 return UnableToLegalize;
2345 }
2346
2347 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2348 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2349 }
2350
2351 // Widen SrcTy to WideTy. This does not affect the result, but since the
2352 // user requested this size, it is probably better handled than SrcTy and
2353 // should reduce the total number of legalization artifacts.
2354 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2355 SrcTy = WideTy;
2356 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2357 }
2358
2359 // Theres no unmerge type to target. Directly extract the bits from the
2360 // source type
2361 unsigned DstSize = DstTy.getSizeInBits();
2362
2363 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2364 for (int I = 1; I != NumDst; ++I) {
2365 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2366 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2368 }
2369
2370 MI.eraseFromParent();
2371 return Legalized;
2372 }
2373
2374 // Extend the source to a wider type.
2375 LLT LCMTy = getLCMType(SrcTy, WideTy);
2376
2377 Register WideSrc = SrcReg;
2378 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2379 // TODO: If this is an integral address space, cast to integer and anyext.
2380 if (SrcTy.isPointer()) {
2381 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2382 return UnableToLegalize;
2383 }
2384
2385 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2386 }
2387
2388 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2389
2390 // Create a sequence of unmerges and merges to the original results. Since we
2391 // may have widened the source, we will need to pad the results with dead defs
2392 // to cover the source register.
2393 // e.g. widen s48 to s64:
2394 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2395 //
2396 // =>
2397 // %4:_(s192) = G_ANYEXT %0:_(s96)
2398 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2399 // ; unpack to GCD type, with extra dead defs
2400 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2401 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2402 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2403 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2404 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2405 const LLT GCDTy = getGCDType(WideTy, DstTy);
2406 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2407 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2408
2409 // Directly unmerge to the destination without going through a GCD type
2410 // if possible
2411 if (PartsPerRemerge == 1) {
2412 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2413
2414 for (int I = 0; I != NumUnmerge; ++I) {
2415 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2416
2417 for (int J = 0; J != PartsPerUnmerge; ++J) {
2418 int Idx = I * PartsPerUnmerge + J;
2419 if (Idx < NumDst)
2420 MIB.addDef(MI.getOperand(Idx).getReg());
2421 else {
2422 // Create dead def for excess components.
2423 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 }
2425 }
2426
2427 MIB.addUse(Unmerge.getReg(I));
2428 }
2429 } else {
2430 SmallVector<Register, 16> Parts;
2431 for (int J = 0; J != NumUnmerge; ++J)
2432 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2433
2434 SmallVector<Register, 8> RemergeParts;
2435 for (int I = 0; I != NumDst; ++I) {
2436 for (int J = 0; J < PartsPerRemerge; ++J) {
2437 const int Idx = I * PartsPerRemerge + J;
2438 RemergeParts.emplace_back(Parts[Idx]);
2439 }
2440
2441 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2442 RemergeParts.clear();
2443 }
2444 }
2445
2446 MI.eraseFromParent();
2447 return Legalized;
2448}
2449
2451LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2452 LLT WideTy) {
2453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2454 unsigned Offset = MI.getOperand(2).getImm();
2455
2456 if (TypeIdx == 0) {
2457 if (SrcTy.isVector() || DstTy.isVector())
2458 return UnableToLegalize;
2459
2460 SrcOp Src(SrcReg);
2461 if (SrcTy.isPointer()) {
2462 // Extracts from pointers can be handled only if they are really just
2463 // simple integers.
2464 const DataLayout &DL = MIRBuilder.getDataLayout();
2465 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2466 return UnableToLegalize;
2467
2468 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2469 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 SrcTy = SrcAsIntTy;
2471 }
2472
2473 if (DstTy.isPointer())
2474 return UnableToLegalize;
2475
2476 if (Offset == 0) {
2477 // Avoid a shift in the degenerate case.
2478 MIRBuilder.buildTrunc(DstReg,
2479 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2480 MI.eraseFromParent();
2481 return Legalized;
2482 }
2483
2484 // Do a shift in the source type.
2485 LLT ShiftTy = SrcTy;
2486 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2487 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2488 ShiftTy = WideTy;
2489 }
2490
2491 auto LShr = MIRBuilder.buildLShr(
2492 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2493 MIRBuilder.buildTrunc(DstReg, LShr);
2494 MI.eraseFromParent();
2495 return Legalized;
2496 }
2497
2498 if (SrcTy.isScalar()) {
2499 Observer.changingInstr(MI);
2500 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2501 Observer.changedInstr(MI);
2502 return Legalized;
2503 }
2504
2505 if (!SrcTy.isVector())
2506 return UnableToLegalize;
2507
2508 if (DstTy != SrcTy.getElementType())
2509 return UnableToLegalize;
2510
2511 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2512 return UnableToLegalize;
2513
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2516
2517 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2518 Offset);
2519 widenScalarDst(MI, WideTy.getScalarType(), 0);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522}
2523
2525LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2526 LLT WideTy) {
2527 if (TypeIdx != 0 || WideTy.isVector())
2528 return UnableToLegalize;
2529 Observer.changingInstr(MI);
2530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2531 widenScalarDst(MI, WideTy);
2532 Observer.changedInstr(MI);
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 unsigned Opcode;
2540 unsigned ExtOpcode;
2541 std::optional<Register> CarryIn;
2542 switch (MI.getOpcode()) {
2543 default:
2544 llvm_unreachable("Unexpected opcode!");
2545 case TargetOpcode::G_SADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_SSUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_SEXT;
2552 break;
2553 case TargetOpcode::G_UADDO:
2554 Opcode = TargetOpcode::G_ADD;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_USUBO:
2558 Opcode = TargetOpcode::G_SUB;
2559 ExtOpcode = TargetOpcode::G_ZEXT;
2560 break;
2561 case TargetOpcode::G_SADDE:
2562 Opcode = TargetOpcode::G_UADDE;
2563 ExtOpcode = TargetOpcode::G_SEXT;
2564 CarryIn = MI.getOperand(4).getReg();
2565 break;
2566 case TargetOpcode::G_SSUBE:
2567 Opcode = TargetOpcode::G_USUBE;
2568 ExtOpcode = TargetOpcode::G_SEXT;
2569 CarryIn = MI.getOperand(4).getReg();
2570 break;
2571 case TargetOpcode::G_UADDE:
2572 Opcode = TargetOpcode::G_UADDE;
2573 ExtOpcode = TargetOpcode::G_ZEXT;
2574 CarryIn = MI.getOperand(4).getReg();
2575 break;
2576 case TargetOpcode::G_USUBE:
2577 Opcode = TargetOpcode::G_USUBE;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 CarryIn = MI.getOperand(4).getReg();
2580 break;
2581 }
2582
2583 if (TypeIdx == 1) {
2584 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2585
2586 Observer.changingInstr(MI);
2587 if (CarryIn)
2588 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2589 widenScalarDst(MI, WideTy, 1);
2590
2591 Observer.changedInstr(MI);
2592 return Legalized;
2593 }
2594
2595 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2596 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2597 // Do the arithmetic in the larger type.
2598 Register NewOp;
2599 if (CarryIn) {
2600 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2601 NewOp = MIRBuilder
2602 .buildInstr(Opcode, {WideTy, CarryOutTy},
2603 {LHSExt, RHSExt, *CarryIn})
2604 .getReg(0);
2605 } else {
2606 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2607 }
2608 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2609 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2610 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2611 // There is no overflow if the ExtOp is the same as NewOp.
2612 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2613 // Now trunc the NewOp to the original result.
2614 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2615 MI.eraseFromParent();
2616 return Legalized;
2617}
2618
2620LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2621 LLT WideTy) {
2622 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2623 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2624 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2625 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2626 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2627 // We can convert this to:
2628 // 1. Any extend iN to iM
2629 // 2. SHL by M-N
2630 // 3. [US][ADD|SUB|SHL]SAT
2631 // 4. L/ASHR by M-N
2632 //
2633 // It may be more efficient to lower this to a min and a max operation in
2634 // the higher precision arithmetic if the promoted operation isn't legal,
2635 // but this decision is up to the target's lowering request.
2636 Register DstReg = MI.getOperand(0).getReg();
2637
2638 unsigned NewBits = WideTy.getScalarSizeInBits();
2639 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2640
2641 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2642 // must not left shift the RHS to preserve the shift amount.
2643 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2644 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2645 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2646 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2648 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2649
2650 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2651 {ShiftL, ShiftR}, MI.getFlags());
2652
2653 // Use a shift that will preserve the number of sign bits when the trunc is
2654 // folded away.
2655 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2656 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2657
2658 MIRBuilder.buildTrunc(DstReg, Result);
2659 MI.eraseFromParent();
2660 return Legalized;
2661}
2662
2664LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2665 LLT WideTy) {
2666 if (TypeIdx == 1) {
2667 Observer.changingInstr(MI);
2668 widenScalarDst(MI, WideTy, 1);
2669 Observer.changedInstr(MI);
2670 return Legalized;
2671 }
2672
2673 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2674 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2675 LLT SrcTy = MRI.getType(LHS);
2676 LLT OverflowTy = MRI.getType(OriginalOverflow);
2677 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2678
2679 // To determine if the result overflowed in the larger type, we extend the
2680 // input to the larger type, do the multiply (checking if it overflows),
2681 // then also check the high bits of the result to see if overflow happened
2682 // there.
2683 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2684 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2685 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2686
2687 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2688 // so we don't need to check the overflow result of larger type Mulo.
2689 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2690
2691 unsigned MulOpc =
2692 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2693
2694 MachineInstrBuilder Mulo;
2695 if (WideMulCanOverflow)
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2697 {LeftOperand, RightOperand});
2698 else
2699 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2700
2701 auto Mul = Mulo->getOperand(0);
2702 MIRBuilder.buildTrunc(Result, Mul);
2703
2704 MachineInstrBuilder ExtResult;
2705 // Overflow occurred if it occurred in the larger type, or if the high part
2706 // of the result does not zero/sign-extend the low part. Check this second
2707 // possibility first.
2708 if (IsSigned) {
2709 // For signed, overflow occurred when the high part does not sign-extend
2710 // the low part.
2711 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2712 } else {
2713 // Unsigned overflow occurred when the high part does not zero-extend the
2714 // low part.
2715 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2716 }
2717
2718 if (WideMulCanOverflow) {
2719 auto Overflow =
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2721 // Finally check if the multiplication in the larger type itself overflowed.
2722 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2723 } else {
2724 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2725 }
2726 MI.eraseFromParent();
2727 return Legalized;
2728}
2729
2732 unsigned Opcode = MI.getOpcode();
2733 switch (Opcode) {
2734 default:
2735 return UnableToLegalize;
2736 case TargetOpcode::G_ATOMICRMW_XCHG:
2737 case TargetOpcode::G_ATOMICRMW_ADD:
2738 case TargetOpcode::G_ATOMICRMW_SUB:
2739 case TargetOpcode::G_ATOMICRMW_AND:
2740 case TargetOpcode::G_ATOMICRMW_OR:
2741 case TargetOpcode::G_ATOMICRMW_XOR:
2742 case TargetOpcode::G_ATOMICRMW_MIN:
2743 case TargetOpcode::G_ATOMICRMW_MAX:
2744 case TargetOpcode::G_ATOMICRMW_UMIN:
2745 case TargetOpcode::G_ATOMICRMW_UMAX:
2746 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2747 Observer.changingInstr(MI);
2748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG:
2753 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2761 if (TypeIdx == 0) {
2762 Observer.changingInstr(MI);
2763 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2764 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2765 widenScalarDst(MI, WideTy, 0);
2766 Observer.changedInstr(MI);
2767 return Legalized;
2768 }
2769 assert(TypeIdx == 1 &&
2770 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2771 Observer.changingInstr(MI);
2772 widenScalarDst(MI, WideTy, 1);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_EXTRACT:
2776 return widenScalarExtract(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_INSERT:
2778 return widenScalarInsert(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_MERGE_VALUES:
2780 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2781 case TargetOpcode::G_UNMERGE_VALUES:
2782 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2783 case TargetOpcode::G_SADDO:
2784 case TargetOpcode::G_SSUBO:
2785 case TargetOpcode::G_UADDO:
2786 case TargetOpcode::G_USUBO:
2787 case TargetOpcode::G_SADDE:
2788 case TargetOpcode::G_SSUBE:
2789 case TargetOpcode::G_UADDE:
2790 case TargetOpcode::G_USUBE:
2791 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_UMULO:
2793 case TargetOpcode::G_SMULO:
2794 return widenScalarMulo(MI, TypeIdx, WideTy);
2795 case TargetOpcode::G_SADDSAT:
2796 case TargetOpcode::G_SSUBSAT:
2797 case TargetOpcode::G_SSHLSAT:
2798 case TargetOpcode::G_UADDSAT:
2799 case TargetOpcode::G_USUBSAT:
2800 case TargetOpcode::G_USHLSAT:
2801 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_CTTZ:
2803 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2804 case TargetOpcode::G_CTLZ:
2805 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2806 case TargetOpcode::G_CTPOP: {
2807 if (TypeIdx == 0) {
2808 Observer.changingInstr(MI);
2809 widenScalarDst(MI, WideTy, 0);
2810 Observer.changedInstr(MI);
2811 return Legalized;
2812 }
2813
2814 Register SrcReg = MI.getOperand(1).getReg();
2815
2816 // First extend the input.
2817 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2818 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2819 ? TargetOpcode::G_ANYEXT
2820 : TargetOpcode::G_ZEXT;
2821 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2822 LLT CurTy = MRI.getType(SrcReg);
2823 unsigned NewOpc = Opcode;
2824 if (NewOpc == TargetOpcode::G_CTTZ) {
2825 // The count is the same in the larger type except if the original
2826 // value was zero. This can be handled by setting the bit just off
2827 // the top of the original type.
2828 auto TopBit =
2830 MIBSrc = MIRBuilder.buildOr(
2831 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2832 // Now we know the operand is non-zero, use the more relaxed opcode.
2833 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2834 }
2835
2836 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2837
2838 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2839 // An optimization where the result is the CTLZ after the left shift by
2840 // (Difference in widety and current ty), that is,
2841 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2842 // Result = ctlz MIBSrc
2843 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2844 MIRBuilder.buildConstant(WideTy, SizeDiff));
2845 }
2846
2847 // Perform the operation at the larger size.
2848 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2849 // This is already the correct result for CTPOP and CTTZs
2850 if (Opcode == TargetOpcode::G_CTLZ) {
2851 // The correct result is NewOp - (Difference in widety and current ty).
2852 MIBNewOp = MIRBuilder.buildSub(
2853 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2857 MI.eraseFromParent();
2858 return Legalized;
2859 }
2860 case TargetOpcode::G_BSWAP: {
2861 Observer.changingInstr(MI);
2862 Register DstReg = MI.getOperand(0).getReg();
2863
2864 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2865 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2866 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2867 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2868
2869 MI.getOperand(0).setReg(DstExt);
2870
2871 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2872
2873 LLT Ty = MRI.getType(DstReg);
2874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2875 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2876 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2877
2878 MIRBuilder.buildTrunc(DstReg, ShrReg);
2879 Observer.changedInstr(MI);
2880 return Legalized;
2881 }
2882 case TargetOpcode::G_BITREVERSE: {
2883 Observer.changingInstr(MI);
2884
2885 Register DstReg = MI.getOperand(0).getReg();
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888
2889 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2890 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2891 MI.getOperand(0).setReg(DstExt);
2892 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2893
2894 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2895 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2896 MIRBuilder.buildTrunc(DstReg, Shift);
2897 Observer.changedInstr(MI);
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_FREEZE:
2901 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2902 Observer.changingInstr(MI);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 widenScalarDst(MI, WideTy);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907
2908 case TargetOpcode::G_ABS:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ADD:
2916 case TargetOpcode::G_AND:
2917 case TargetOpcode::G_MUL:
2918 case TargetOpcode::G_OR:
2919 case TargetOpcode::G_XOR:
2920 case TargetOpcode::G_SUB:
2921 case TargetOpcode::G_SHUFFLE_VECTOR:
2922 // Perform operation at larger width (any extension is fines here, high bits
2923 // don't affect the result) and then truncate the result back to the
2924 // original type.
2925 Observer.changingInstr(MI);
2926 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 Observer.changedInstr(MI);
2930 return Legalized;
2931
2932 case TargetOpcode::G_SBFX:
2933 case TargetOpcode::G_UBFX:
2934 Observer.changingInstr(MI);
2935
2936 if (TypeIdx == 0) {
2937 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2938 widenScalarDst(MI, WideTy);
2939 } else {
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2941 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2942 }
2943
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_SHL:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962
2963 case TargetOpcode::G_ROTR:
2964 case TargetOpcode::G_ROTL:
2965 if (TypeIdx != 1)
2966 return UnableToLegalize;
2967
2968 Observer.changingInstr(MI);
2969 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972
2973 case TargetOpcode::G_SDIV:
2974 case TargetOpcode::G_SREM:
2975 case TargetOpcode::G_SMIN:
2976 case TargetOpcode::G_SMAX:
2977 case TargetOpcode::G_ABDS:
2978 Observer.changingInstr(MI);
2979 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2981 widenScalarDst(MI, WideTy);
2982 Observer.changedInstr(MI);
2983 return Legalized;
2984
2985 case TargetOpcode::G_SDIVREM:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2991 widenScalarDst(MI, WideTy, 1);
2992 Observer.changedInstr(MI);
2993 return Legalized;
2994
2995 case TargetOpcode::G_ASHR:
2996 case TargetOpcode::G_LSHR:
2997 Observer.changingInstr(MI);
2998
2999 if (TypeIdx == 0) {
3000 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3001 : TargetOpcode::G_ZEXT;
3002
3003 widenScalarSrc(MI, WideTy, 1, CvtOp);
3004 widenScalarDst(MI, WideTy);
3005 } else {
3006 assert(TypeIdx == 1);
3007 // The "number of bits to shift" operand must preserve its value as an
3008 // unsigned integer:
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 }
3011
3012 Observer.changedInstr(MI);
3013 return Legalized;
3014 case TargetOpcode::G_UDIV:
3015 case TargetOpcode::G_UREM:
3016 case TargetOpcode::G_ABDU:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023 case TargetOpcode::G_UDIVREM:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3029 widenScalarDst(MI, WideTy, 1);
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UMIN:
3033 case TargetOpcode::G_UMAX: {
3034 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3035
3036 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3037 unsigned ExtOpc =
3038 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3039 getApproximateEVTForLLT(WideTy, Ctx))
3040 ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 Observer.changingInstr(MI);
3044 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3045 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3046 widenScalarDst(MI, WideTy);
3047 Observer.changedInstr(MI);
3048 return Legalized;
3049 }
3050
3051 case TargetOpcode::G_SELECT:
3052 Observer.changingInstr(MI);
3053 if (TypeIdx == 0) {
3054 // Perform operation at larger width (any extension is fine here, high
3055 // bits don't affect the result) and then truncate the result back to the
3056 // original type.
3057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3059 widenScalarDst(MI, WideTy);
3060 } else {
3061 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3062 // Explicit extension is required here since high bits affect the result.
3063 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3064 }
3065 Observer.changedInstr(MI);
3066 return Legalized;
3067
3068 case TargetOpcode::G_FPEXT:
3069 if (TypeIdx != 1)
3070 return UnableToLegalize;
3071
3072 Observer.changingInstr(MI);
3073 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3074 Observer.changedInstr(MI);
3075 return Legalized;
3076 case TargetOpcode::G_FPTOSI:
3077 case TargetOpcode::G_FPTOUI:
3078 case TargetOpcode::G_INTRINSIC_LRINT:
3079 case TargetOpcode::G_INTRINSIC_LLRINT:
3080 case TargetOpcode::G_IS_FPCLASS:
3081 Observer.changingInstr(MI);
3082
3083 if (TypeIdx == 0)
3084 widenScalarDst(MI, WideTy);
3085 else
3086 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3087
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090 case TargetOpcode::G_SITOFP:
3091 Observer.changingInstr(MI);
3092
3093 if (TypeIdx == 0)
3094 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3095 else
3096 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3097
3098 Observer.changedInstr(MI);
3099 return Legalized;
3100 case TargetOpcode::G_UITOFP:
3101 Observer.changingInstr(MI);
3102
3103 if (TypeIdx == 0)
3104 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3105 else
3106 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3107
3108 Observer.changedInstr(MI);
3109 return Legalized;
3110 case TargetOpcode::G_FPTOSI_SAT:
3111 case TargetOpcode::G_FPTOUI_SAT:
3112 Observer.changingInstr(MI);
3113
3114 if (TypeIdx == 0) {
3115 Register OldDst = MI.getOperand(0).getReg();
3116 LLT Ty = MRI.getType(OldDst);
3117 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3118 Register NewDst;
3119 MI.getOperand(0).setReg(ExtReg);
3120 uint64_t ShortBits = Ty.getScalarSizeInBits();
3121 uint64_t WideBits = WideTy.getScalarSizeInBits();
3122 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3123 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3124 // z = i16 fptosi_sat(a)
3125 // ->
3126 // x = i32 fptosi_sat(a)
3127 // y = smin(x, 32767)
3128 // z = smax(y, -32768)
3129 auto MaxVal = MIRBuilder.buildConstant(
3130 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3131 auto MinVal = MIRBuilder.buildConstant(
3132 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3133 Register MidReg =
3134 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3135 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3136 } else {
3137 // z = i16 fptoui_sat(a)
3138 // ->
3139 // x = i32 fptoui_sat(a)
3140 // y = smin(x, 65535)
3141 auto MaxVal = MIRBuilder.buildConstant(
3142 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3143 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3144 }
3145 MIRBuilder.buildTrunc(OldDst, NewDst);
3146 } else
3147 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3148
3149 Observer.changedInstr(MI);
3150 return Legalized;
3151 case TargetOpcode::G_LOAD:
3152 case TargetOpcode::G_SEXTLOAD:
3153 case TargetOpcode::G_ZEXTLOAD:
3154 Observer.changingInstr(MI);
3155 widenScalarDst(MI, WideTy);
3156 Observer.changedInstr(MI);
3157 return Legalized;
3158
3159 case TargetOpcode::G_STORE: {
3160 if (TypeIdx != 0)
3161 return UnableToLegalize;
3162
3163 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3164 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3165 if (!Ty.isScalar()) {
3166 // We need to widen the vector element type.
3167 Observer.changingInstr(MI);
3168 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3169 // We also need to adjust the MMO to turn this into a truncating store.
3170 MachineMemOperand &MMO = **MI.memoperands_begin();
3171 MachineFunction &MF = MIRBuilder.getMF();
3172 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3173 MI.setMemRefs(MF, {NewMMO});
3174 Observer.changedInstr(MI);
3175 return Legalized;
3176 }
3177
3178 Observer.changingInstr(MI);
3179
3180 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3181 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3182 widenScalarSrc(MI, WideTy, 0, ExtType);
3183
3184 Observer.changedInstr(MI);
3185 return Legalized;
3186 }
3187 case TargetOpcode::G_CONSTANT: {
3188 MachineOperand &SrcMO = MI.getOperand(1);
3189 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3190 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3191 MRI.getType(MI.getOperand(0).getReg()));
3192 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3193 ExtOpc == TargetOpcode::G_ANYEXT) &&
3194 "Illegal Extend");
3195 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3196 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3197 ? SrcVal.sext(WideTy.getSizeInBits())
3198 : SrcVal.zext(WideTy.getSizeInBits());
3199 Observer.changingInstr(MI);
3200 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3201
3202 widenScalarDst(MI, WideTy);
3203 Observer.changedInstr(MI);
3204 return Legalized;
3205 }
3206 case TargetOpcode::G_FCONSTANT: {
3207 // To avoid changing the bits of the constant due to extension to a larger
3208 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3209 MachineOperand &SrcMO = MI.getOperand(1);
3210 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3211 MIRBuilder.setInstrAndDebugLoc(MI);
3212 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3213 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3214 MI.eraseFromParent();
3215 return Legalized;
3216 }
3217 case TargetOpcode::G_IMPLICIT_DEF: {
3218 Observer.changingInstr(MI);
3219 widenScalarDst(MI, WideTy);
3220 Observer.changedInstr(MI);
3221 return Legalized;
3222 }
3223 case TargetOpcode::G_BRCOND:
3224 Observer.changingInstr(MI);
3225 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3226 Observer.changedInstr(MI);
3227 return Legalized;
3228
3229 case TargetOpcode::G_FCMP:
3230 Observer.changingInstr(MI);
3231 if (TypeIdx == 0)
3232 widenScalarDst(MI, WideTy);
3233 else {
3234 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3235 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3236 }
3237 Observer.changedInstr(MI);
3238 return Legalized;
3239
3240 case TargetOpcode::G_ICMP:
3241 Observer.changingInstr(MI);
3242 if (TypeIdx == 0)
3243 widenScalarDst(MI, WideTy);
3244 else {
3245 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3246 CmpInst::Predicate Pred =
3247 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3248
3249 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3250 unsigned ExtOpcode =
3251 (CmpInst::isSigned(Pred) ||
3252 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3253 getApproximateEVTForLLT(WideTy, Ctx)))
3254 ? TargetOpcode::G_SEXT
3255 : TargetOpcode::G_ZEXT;
3256 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3257 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3258 }
3259 Observer.changedInstr(MI);
3260 return Legalized;
3261
3262 case TargetOpcode::G_PTR_ADD:
3263 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3264 Observer.changingInstr(MI);
3265 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3266 Observer.changedInstr(MI);
3267 return Legalized;
3268
3269 case TargetOpcode::G_PHI: {
3270 assert(TypeIdx == 0 && "Expecting only Idx 0");
3271
3272 Observer.changingInstr(MI);
3273 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3274 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3275 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3276 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3277 }
3278
3279 MachineBasicBlock &MBB = *MI.getParent();
3280 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3281 widenScalarDst(MI, WideTy);
3282 Observer.changedInstr(MI);
3283 return Legalized;
3284 }
3285 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3286 if (TypeIdx == 0) {
3287 Register VecReg = MI.getOperand(1).getReg();
3288 LLT VecTy = MRI.getType(VecReg);
3289 Observer.changingInstr(MI);
3290
3292 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3293 TargetOpcode::G_ANYEXT);
3294
3295 widenScalarDst(MI, WideTy, 0);
3296 Observer.changedInstr(MI);
3297 return Legalized;
3298 }
3299
3300 if (TypeIdx != 2)
3301 return UnableToLegalize;
3302 Observer.changingInstr(MI);
3303 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3304 Observer.changedInstr(MI);
3305 return Legalized;
3306 }
3307 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3308 if (TypeIdx == 0) {
3309 Observer.changingInstr(MI);
3310 const LLT WideEltTy = WideTy.getElementType();
3311
3312 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3313 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3314 widenScalarDst(MI, WideTy, 0);
3315 Observer.changedInstr(MI);
3316 return Legalized;
3317 }
3318
3319 if (TypeIdx == 1) {
3320 Observer.changingInstr(MI);
3321
3322 Register VecReg = MI.getOperand(1).getReg();
3323 LLT VecTy = MRI.getType(VecReg);
3324 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3325
3326 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3327 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3328 widenScalarDst(MI, WideVecTy, 0);
3329 Observer.changedInstr(MI);
3330 return Legalized;
3331 }
3332
3333 if (TypeIdx == 2) {
3334 Observer.changingInstr(MI);
3335 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3336 Observer.changedInstr(MI);
3337 return Legalized;
3338 }
3339
3340 return UnableToLegalize;
3341 }
3342 case TargetOpcode::G_FADD:
3343 case TargetOpcode::G_FMUL:
3344 case TargetOpcode::G_FSUB:
3345 case TargetOpcode::G_FMA:
3346 case TargetOpcode::G_FMAD:
3347 case TargetOpcode::G_FNEG:
3348 case TargetOpcode::G_FABS:
3349 case TargetOpcode::G_FCANONICALIZE:
3350 case TargetOpcode::G_FMINNUM:
3351 case TargetOpcode::G_FMAXNUM:
3352 case TargetOpcode::G_FMINNUM_IEEE:
3353 case TargetOpcode::G_FMAXNUM_IEEE:
3354 case TargetOpcode::G_FMINIMUM:
3355 case TargetOpcode::G_FMAXIMUM:
3356 case TargetOpcode::G_FMINIMUMNUM:
3357 case TargetOpcode::G_FMAXIMUMNUM:
3358 case TargetOpcode::G_FDIV:
3359 case TargetOpcode::G_FREM:
3360 case TargetOpcode::G_FCEIL:
3361 case TargetOpcode::G_FFLOOR:
3362 case TargetOpcode::G_FCOS:
3363 case TargetOpcode::G_FSIN:
3364 case TargetOpcode::G_FTAN:
3365 case TargetOpcode::G_FACOS:
3366 case TargetOpcode::G_FASIN:
3367 case TargetOpcode::G_FATAN:
3368 case TargetOpcode::G_FATAN2:
3369 case TargetOpcode::G_FCOSH:
3370 case TargetOpcode::G_FSINH:
3371 case TargetOpcode::G_FTANH:
3372 case TargetOpcode::G_FLOG10:
3373 case TargetOpcode::G_FLOG:
3374 case TargetOpcode::G_FLOG2:
3375 case TargetOpcode::G_FRINT:
3376 case TargetOpcode::G_FNEARBYINT:
3377 case TargetOpcode::G_FSQRT:
3378 case TargetOpcode::G_FEXP:
3379 case TargetOpcode::G_FEXP2:
3380 case TargetOpcode::G_FEXP10:
3381 case TargetOpcode::G_FPOW:
3382 case TargetOpcode::G_INTRINSIC_TRUNC:
3383 case TargetOpcode::G_INTRINSIC_ROUND:
3384 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3385 assert(TypeIdx == 0);
3386 Observer.changingInstr(MI);
3387
3388 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3389 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3390
3391 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3392 Observer.changedInstr(MI);
3393 return Legalized;
3394 case TargetOpcode::G_FMODF: {
3395 Observer.changingInstr(MI);
3396 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3397
3398 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3399 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3400 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3401 Observer.changedInstr(MI);
3402 return Legalized;
3403 }
3404 case TargetOpcode::G_FPOWI:
3405 case TargetOpcode::G_FLDEXP:
3406 case TargetOpcode::G_STRICT_FLDEXP: {
3407 if (TypeIdx == 0) {
3408 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3409 return UnableToLegalize;
3410
3411 Observer.changingInstr(MI);
3412 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3413 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3414 Observer.changedInstr(MI);
3415 return Legalized;
3416 }
3417
3418 if (TypeIdx == 1) {
3419 // For some reason SelectionDAG tries to promote to a libcall without
3420 // actually changing the integer type for promotion.
3421 Observer.changingInstr(MI);
3422 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3423 Observer.changedInstr(MI);
3424 return Legalized;
3425 }
3426
3427 return UnableToLegalize;
3428 }
3429 case TargetOpcode::G_FFREXP: {
3430 Observer.changingInstr(MI);
3431
3432 if (TypeIdx == 0) {
3433 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3434 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3435 } else {
3436 widenScalarDst(MI, WideTy, 1);
3437 }
3438
3439 Observer.changedInstr(MI);
3440 return Legalized;
3441 }
3442 case TargetOpcode::G_LROUND:
3443 case TargetOpcode::G_LLROUND:
3444 Observer.changingInstr(MI);
3445
3446 if (TypeIdx == 0)
3447 widenScalarDst(MI, WideTy);
3448 else
3449 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3450
3451 Observer.changedInstr(MI);
3452 return Legalized;
3453
3454 case TargetOpcode::G_INTTOPTR:
3455 if (TypeIdx != 1)
3456 return UnableToLegalize;
3457
3458 Observer.changingInstr(MI);
3459 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462 case TargetOpcode::G_PTRTOINT:
3463 if (TypeIdx != 0)
3464 return UnableToLegalize;
3465
3466 Observer.changingInstr(MI);
3467 widenScalarDst(MI, WideTy, 0);
3468 Observer.changedInstr(MI);
3469 return Legalized;
3470 case TargetOpcode::G_BUILD_VECTOR: {
3471 Observer.changingInstr(MI);
3472
3473 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3474 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3475 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3476
3477 // Avoid changing the result vector type if the source element type was
3478 // requested.
3479 if (TypeIdx == 1) {
3480 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3481 } else {
3482 widenScalarDst(MI, WideTy, 0);
3483 }
3484
3485 Observer.changedInstr(MI);
3486 return Legalized;
3487 }
3488 case TargetOpcode::G_SEXT_INREG:
3489 if (TypeIdx != 0)
3490 return UnableToLegalize;
3491
3492 Observer.changingInstr(MI);
3493 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3494 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3495 Observer.changedInstr(MI);
3496 return Legalized;
3497 case TargetOpcode::G_PTRMASK: {
3498 if (TypeIdx != 1)
3499 return UnableToLegalize;
3500 Observer.changingInstr(MI);
3501 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3502 Observer.changedInstr(MI);
3503 return Legalized;
3504 }
3505 case TargetOpcode::G_VECREDUCE_ADD: {
3506 if (TypeIdx != 1)
3507 return UnableToLegalize;
3508 Observer.changingInstr(MI);
3509 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3510 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3511 Observer.changedInstr(MI);
3512 return Legalized;
3513 }
3514 case TargetOpcode::G_VECREDUCE_FADD:
3515 case TargetOpcode::G_VECREDUCE_FMUL:
3516 case TargetOpcode::G_VECREDUCE_FMIN:
3517 case TargetOpcode::G_VECREDUCE_FMAX:
3518 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3519 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3520 if (TypeIdx != 0)
3521 return UnableToLegalize;
3522 Observer.changingInstr(MI);
3523 Register VecReg = MI.getOperand(1).getReg();
3524 LLT VecTy = MRI.getType(VecReg);
3525 LLT WideVecTy = VecTy.isVector()
3526 ? LLT::vector(VecTy.getElementCount(), WideTy)
3527 : WideTy;
3528 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3529 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3530 Observer.changedInstr(MI);
3531 return Legalized;
3532 }
3533 case TargetOpcode::G_VSCALE: {
3534 MachineOperand &SrcMO = MI.getOperand(1);
3535 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3536 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3537 // The CImm is always a signed value
3538 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3539 Observer.changingInstr(MI);
3540 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3541 widenScalarDst(MI, WideTy);
3542 Observer.changedInstr(MI);
3543 return Legalized;
3544 }
3545 case TargetOpcode::G_SPLAT_VECTOR: {
3546 if (TypeIdx != 1)
3547 return UnableToLegalize;
3548
3549 Observer.changingInstr(MI);
3550 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3551 Observer.changedInstr(MI);
3552 return Legalized;
3553 }
3554 case TargetOpcode::G_INSERT_SUBVECTOR: {
3555 if (TypeIdx != 0)
3556 return UnableToLegalize;
3557
3559 Register BigVec = IS.getBigVec();
3560 Register SubVec = IS.getSubVec();
3561
3562 LLT SubVecTy = MRI.getType(SubVec);
3563 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3564
3565 // Widen the G_INSERT_SUBVECTOR
3566 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3567 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3568 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3569 IS.getIndexImm());
3570
3571 // Truncate back down
3572 auto SplatZero = MIRBuilder.buildSplatVector(
3573 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3574 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3575 SplatZero);
3576
3577 MI.eraseFromParent();
3578
3579 return Legalized;
3580 }
3581 }
3582}
3583
3585 MachineIRBuilder &B, Register Src, LLT Ty) {
3586 auto Unmerge = B.buildUnmerge(Ty, Src);
3587 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3588 Pieces.push_back(Unmerge.getReg(I));
3589}
3590
3591static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3592 MachineIRBuilder &MIRBuilder) {
3593 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3594 MachineFunction &MF = MIRBuilder.getMF();
3595 const DataLayout &DL = MIRBuilder.getDataLayout();
3596 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3597 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3598 LLT DstLLT = MRI.getType(DstReg);
3599
3600 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3601
3602 auto Addr = MIRBuilder.buildConstantPool(
3603 AddrPtrTy,
3604 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3605
3606 MachineMemOperand *MMO =
3608 MachineMemOperand::MOLoad, DstLLT, Alignment);
3609
3610 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3611}
3612
3615 const MachineOperand &ConstOperand = MI.getOperand(1);
3616 const Constant *ConstantVal = ConstOperand.getCImm();
3617
3618 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3619 MI.eraseFromParent();
3620
3621 return Legalized;
3622}
3623
3626 const MachineOperand &ConstOperand = MI.getOperand(1);
3627 const Constant *ConstantVal = ConstOperand.getFPImm();
3628
3629 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3630 MI.eraseFromParent();
3631
3632 return Legalized;
3633}
3634
3637 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3638 if (SrcTy.isVector()) {
3639 LLT SrcEltTy = SrcTy.getElementType();
3641
3642 if (DstTy.isVector()) {
3643 int NumDstElt = DstTy.getNumElements();
3644 int NumSrcElt = SrcTy.getNumElements();
3645
3646 LLT DstEltTy = DstTy.getElementType();
3647 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3648 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3649
3650 // If there's an element size mismatch, insert intermediate casts to match
3651 // the result element type.
3652 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3653 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3654 //
3655 // =>
3656 //
3657 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3658 // %3:_(<2 x s8>) = G_BITCAST %2
3659 // %4:_(<2 x s8>) = G_BITCAST %3
3660 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3661 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3662 SrcPartTy = SrcEltTy;
3663 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3664 //
3665 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3666 //
3667 // =>
3668 //
3669 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3670 // %3:_(s16) = G_BITCAST %2
3671 // %4:_(s16) = G_BITCAST %3
3672 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3673 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3674 DstCastTy = DstEltTy;
3675 }
3676
3677 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3678 for (Register &SrcReg : SrcRegs)
3679 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3680 } else
3681 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3682
3683 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3684 MI.eraseFromParent();
3685 return Legalized;
3686 }
3687
3688 if (DstTy.isVector()) {
3690 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3691 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3692 MI.eraseFromParent();
3693 return Legalized;
3694 }
3695
3696 return UnableToLegalize;
3697}
3698
3699/// Figure out the bit offset into a register when coercing a vector index for
3700/// the wide element type. This is only for the case when promoting vector to
3701/// one with larger elements.
3702//
3703///
3704/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3705/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3707 Register Idx,
3708 unsigned NewEltSize,
3709 unsigned OldEltSize) {
3710 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3711 LLT IdxTy = B.getMRI()->getType(Idx);
3712
3713 // Now figure out the amount we need to shift to get the target bits.
3714 auto OffsetMask = B.buildConstant(
3715 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3716 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3717 return B.buildShl(IdxTy, OffsetIdx,
3718 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3719}
3720
3721/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3722/// is casting to a vector with a smaller element size, perform multiple element
3723/// extracts and merge the results. If this is coercing to a vector with larger
3724/// elements, index the bitcasted vector and extract the target element with bit
3725/// operations. This is intended to force the indexing in the native register
3726/// size for architectures that can dynamically index the register file.
3729 LLT CastTy) {
3730 if (TypeIdx != 1)
3731 return UnableToLegalize;
3732
3733 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3734
3735 LLT SrcEltTy = SrcVecTy.getElementType();
3736 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3737 unsigned OldNumElts = SrcVecTy.getNumElements();
3738
3739 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3740 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3741
3742 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3743 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3744 if (NewNumElts > OldNumElts) {
3745 // Decreasing the vector element size
3746 //
3747 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3748 // =>
3749 // v4i32:castx = bitcast x:v2i64
3750 //
3751 // i64 = bitcast
3752 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3753 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3754 //
3755 if (NewNumElts % OldNumElts != 0)
3756 return UnableToLegalize;
3757
3758 // Type of the intermediate result vector.
3759 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3760 LLT MidTy =
3761 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3762
3763 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3764
3765 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3766 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3767
3768 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3769 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3770 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3771 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3772 NewOps[I] = Elt.getReg(0);
3773 }
3774
3775 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3776 MIRBuilder.buildBitcast(Dst, NewVec);
3777 MI.eraseFromParent();
3778 return Legalized;
3779 }
3780
3781 if (NewNumElts < OldNumElts) {
3782 if (NewEltSize % OldEltSize != 0)
3783 return UnableToLegalize;
3784
3785 // This only depends on powers of 2 because we use bit tricks to figure out
3786 // the bit offset we need to shift to get the target element. A general
3787 // expansion could emit division/multiply.
3788 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3789 return UnableToLegalize;
3790
3791 // Increasing the vector element size.
3792 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3793 //
3794 // =>
3795 //
3796 // %cast = G_BITCAST %vec
3797 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3798 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3799 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3800 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3801 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3802 // %elt = G_TRUNC %elt_bits
3803
3804 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3805 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3806
3807 // Divide to get the index in the wider element type.
3808 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3809
3810 Register WideElt = CastVec;
3811 if (CastTy.isVector()) {
3812 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3813 ScaledIdx).getReg(0);
3814 }
3815
3816 // Compute the bit offset into the register of the target element.
3818 MIRBuilder, Idx, NewEltSize, OldEltSize);
3819
3820 // Shift the wide element to get the target element.
3821 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3822 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3823 MI.eraseFromParent();
3824 return Legalized;
3825 }
3826
3827 return UnableToLegalize;
3828}
3829
3830/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3831/// TargetReg, while preserving other bits in \p TargetReg.
3832///
3833/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3835 Register TargetReg, Register InsertReg,
3836 Register OffsetBits) {
3837 LLT TargetTy = B.getMRI()->getType(TargetReg);
3838 LLT InsertTy = B.getMRI()->getType(InsertReg);
3839 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3840 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3841
3842 // Produce a bitmask of the value to insert
3843 auto EltMask = B.buildConstant(
3844 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3845 InsertTy.getSizeInBits()));
3846 // Shift it into position
3847 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3848 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3849
3850 // Clear out the bits in the wide element
3851 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3852
3853 // The value to insert has all zeros already, so stick it into the masked
3854 // wide element.
3855 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3856}
3857
3858/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3859/// is increasing the element size, perform the indexing in the target element
3860/// type, and use bit operations to insert at the element position. This is
3861/// intended for architectures that can dynamically index the register file and
3862/// want to force indexing in the native register size.
3865 LLT CastTy) {
3866 if (TypeIdx != 0)
3867 return UnableToLegalize;
3868
3869 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3870 MI.getFirst4RegLLTs();
3871 LLT VecTy = DstTy;
3872
3873 LLT VecEltTy = VecTy.getElementType();
3874 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3875 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3876 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3877
3878 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3879 unsigned OldNumElts = VecTy.getNumElements();
3880
3881 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3882 if (NewNumElts < OldNumElts) {
3883 if (NewEltSize % OldEltSize != 0)
3884 return UnableToLegalize;
3885
3886 // This only depends on powers of 2 because we use bit tricks to figure out
3887 // the bit offset we need to shift to get the target element. A general
3888 // expansion could emit division/multiply.
3889 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3890 return UnableToLegalize;
3891
3892 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3893 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3894
3895 // Divide to get the index in the wider element type.
3896 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3897
3898 Register ExtractedElt = CastVec;
3899 if (CastTy.isVector()) {
3900 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3901 ScaledIdx).getReg(0);
3902 }
3903
3904 // Compute the bit offset into the register of the target element.
3906 MIRBuilder, Idx, NewEltSize, OldEltSize);
3907
3908 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3909 Val, OffsetBits);
3910 if (CastTy.isVector()) {
3911 InsertedElt = MIRBuilder.buildInsertVectorElement(
3912 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3913 }
3914
3915 MIRBuilder.buildBitcast(Dst, InsertedElt);
3916 MI.eraseFromParent();
3917 return Legalized;
3918 }
3919
3920 return UnableToLegalize;
3921}
3922
3923// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3924// those that have smaller than legal operands.
3925//
3926// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3927//
3928// ===>
3929//
3930// s32 = G_BITCAST <4 x s8>
3931// s32 = G_BITCAST <4 x s8>
3932// s32 = G_BITCAST <4 x s8>
3933// s32 = G_BITCAST <4 x s8>
3934// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3935// <16 x s8> = G_BITCAST <4 x s32>
3938 LLT CastTy) {
3939 // Convert it to CONCAT instruction
3940 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3941 if (!ConcatMI) {
3942 return UnableToLegalize;
3943 }
3944
3945 // Check if bitcast is Legal
3946 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3947 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3948
3949 // Check if the build vector is Legal
3950 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3951 return UnableToLegalize;
3952 }
3953
3954 // Bitcast the sources
3955 SmallVector<Register> BitcastRegs;
3956 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3957 BitcastRegs.push_back(
3958 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3959 .getReg(0));
3960 }
3961
3962 // Build the scalar values into a vector
3963 Register BuildReg =
3964 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3965 MIRBuilder.buildBitcast(DstReg, BuildReg);
3966
3967 MI.eraseFromParent();
3968 return Legalized;
3969}
3970
3971// This bitcasts a shuffle vector to a different type currently of the same
3972// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3973// will be used instead.
3974//
3975// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3976// ===>
3977// <4 x s64> = G_PTRTOINT <4 x p0>
3978// <4 x s64> = G_PTRTOINT <4 x p0>
3979// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3980// <16 x p0> = G_INTTOPTR <16 x s64>
3983 LLT CastTy) {
3984 auto ShuffleMI = cast<GShuffleVector>(&MI);
3985 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3986 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3987
3988 // We currently only handle vectors of the same size.
3989 if (TypeIdx != 0 ||
3990 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3991 CastTy.getElementCount() != DstTy.getElementCount())
3992 return UnableToLegalize;
3993
3994 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3995
3996 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3997 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3998 auto Shuf =
3999 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4000 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4001
4002 MI.eraseFromParent();
4003 return Legalized;
4004}
4005
4006/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4007///
4008/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4009///
4010/// ===>
4011///
4012/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4013/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4014/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4017 LLT CastTy) {
4018 auto ES = cast<GExtractSubvector>(&MI);
4019
4020 if (!CastTy.isVector())
4021 return UnableToLegalize;
4022
4023 if (TypeIdx != 0)
4024 return UnableToLegalize;
4025
4026 Register Dst = ES->getReg(0);
4027 Register Src = ES->getSrcVec();
4028 uint64_t Idx = ES->getIndexImm();
4029
4030 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4031
4032 LLT DstTy = MRI.getType(Dst);
4033 LLT SrcTy = MRI.getType(Src);
4034 ElementCount DstTyEC = DstTy.getElementCount();
4035 ElementCount SrcTyEC = SrcTy.getElementCount();
4036 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4037 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4038
4039 if (DstTy == CastTy)
4040 return Legalized;
4041
4042 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4043 return UnableToLegalize;
4044
4045 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4046 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4047 if (CastEltSize < DstEltSize)
4048 return UnableToLegalize;
4049
4050 auto AdjustAmt = CastEltSize / DstEltSize;
4051 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4052 SrcTyMinElts % AdjustAmt != 0)
4053 return UnableToLegalize;
4054
4055 Idx /= AdjustAmt;
4056 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4057 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4058 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4059 MIRBuilder.buildBitcast(Dst, PromotedES);
4060
4061 ES->eraseFromParent();
4062 return Legalized;
4063}
4064
4065/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4066///
4067/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4068/// <vscale x 8 x i1>,
4069/// N
4070///
4071/// ===>
4072///
4073/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4074/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4075/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4076/// <vscale x 1 x i8>, N / 8
4077/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4080 LLT CastTy) {
4081 auto ES = cast<GInsertSubvector>(&MI);
4082
4083 if (!CastTy.isVector())
4084 return UnableToLegalize;
4085
4086 if (TypeIdx != 0)
4087 return UnableToLegalize;
4088
4089 Register Dst = ES->getReg(0);
4090 Register BigVec = ES->getBigVec();
4091 Register SubVec = ES->getSubVec();
4092 uint64_t Idx = ES->getIndexImm();
4093
4094 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4095
4096 LLT DstTy = MRI.getType(Dst);
4097 LLT BigVecTy = MRI.getType(BigVec);
4098 LLT SubVecTy = MRI.getType(SubVec);
4099
4100 if (DstTy == CastTy)
4101 return Legalized;
4102
4103 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4104 return UnableToLegalize;
4105
4106 ElementCount DstTyEC = DstTy.getElementCount();
4107 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4108 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4109 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4110 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4111 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4112
4113 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4114 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4115 if (CastEltSize < DstEltSize)
4116 return UnableToLegalize;
4117
4118 auto AdjustAmt = CastEltSize / DstEltSize;
4119 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4120 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4121 return UnableToLegalize;
4122
4123 Idx /= AdjustAmt;
4124 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4125 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4126 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4127 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4128 auto PromotedIS =
4129 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4130 MIRBuilder.buildBitcast(Dst, PromotedIS);
4131
4132 ES->eraseFromParent();
4133 return Legalized;
4134}
4135
4137 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4138 Register DstReg = LoadMI.getDstReg();
4139 Register PtrReg = LoadMI.getPointerReg();
4140 LLT DstTy = MRI.getType(DstReg);
4141 MachineMemOperand &MMO = LoadMI.getMMO();
4142 LLT MemTy = MMO.getMemoryType();
4143 MachineFunction &MF = MIRBuilder.getMF();
4144
4145 unsigned MemSizeInBits = MemTy.getSizeInBits();
4146 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4147
4148 if (MemSizeInBits != MemStoreSizeInBits) {
4149 if (MemTy.isVector())
4150 return UnableToLegalize;
4151
4152 // Promote to a byte-sized load if not loading an integral number of
4153 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4154 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4155 MachineMemOperand *NewMMO =
4156 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4157
4158 Register LoadReg = DstReg;
4159 LLT LoadTy = DstTy;
4160
4161 // If this wasn't already an extending load, we need to widen the result
4162 // register to avoid creating a load with a narrower result than the source.
4163 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4164 LoadTy = WideMemTy;
4165 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4166 }
4167
4168 if (isa<GSExtLoad>(LoadMI)) {
4169 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4170 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4171 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4172 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4173 // The extra bits are guaranteed to be zero, since we stored them that
4174 // way. A zext load from Wide thus automatically gives zext from MemVT.
4175 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4176 } else {
4177 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4178 }
4179
4180 if (DstTy != LoadTy)
4181 MIRBuilder.buildTrunc(DstReg, LoadReg);
4182
4183 LoadMI.eraseFromParent();
4184 return Legalized;
4185 }
4186
4187 // Big endian lowering not implemented.
4188 if (MIRBuilder.getDataLayout().isBigEndian())
4189 return UnableToLegalize;
4190
4191 // This load needs splitting into power of 2 sized loads.
4192 //
4193 // Our strategy here is to generate anyextending loads for the smaller
4194 // types up to next power-2 result type, and then combine the two larger
4195 // result values together, before truncating back down to the non-pow-2
4196 // type.
4197 // E.g. v1 = i24 load =>
4198 // v2 = i32 zextload (2 byte)
4199 // v3 = i32 load (1 byte)
4200 // v4 = i32 shl v3, 16
4201 // v5 = i32 or v4, v2
4202 // v1 = i24 trunc v5
4203 // By doing this we generate the correct truncate which should get
4204 // combined away as an artifact with a matching extend.
4205
4206 uint64_t LargeSplitSize, SmallSplitSize;
4207
4208 if (!isPowerOf2_32(MemSizeInBits)) {
4209 // This load needs splitting into power of 2 sized loads.
4210 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4211 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4212 } else {
4213 // This is already a power of 2, but we still need to split this in half.
4214 //
4215 // Assume we're being asked to decompose an unaligned load.
4216 // TODO: If this requires multiple splits, handle them all at once.
4217 auto &Ctx = MF.getFunction().getContext();
4218 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4219 return UnableToLegalize;
4220
4221 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4222 }
4223
4224 if (MemTy.isVector()) {
4225 // TODO: Handle vector extloads
4226 if (MemTy != DstTy)
4227 return UnableToLegalize;
4228
4229 Align Alignment = LoadMI.getAlign();
4230 // Given an alignment larger than the size of the memory, we can increase
4231 // the size of the load without needing to scalarize it.
4232 if (Alignment.value() * 8 > MemSizeInBits &&
4235 DstTy.getElementType());
4236 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4237 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4238 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4239 NewLoad.getReg(0));
4240 LoadMI.eraseFromParent();
4241 return Legalized;
4242 }
4243
4244 // TODO: We can do better than scalarizing the vector and at least split it
4245 // in half.
4246 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4247 }
4248
4249 MachineMemOperand *LargeMMO =
4250 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4251 MachineMemOperand *SmallMMO =
4252 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4253
4254 LLT PtrTy = MRI.getType(PtrReg);
4255 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4256 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4257 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4258 PtrReg, *LargeMMO);
4259
4260 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4261 LargeSplitSize / 8);
4262 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4263 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4264 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4265 SmallPtr, *SmallMMO);
4266
4267 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4268 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4269
4270 if (AnyExtTy == DstTy)
4271 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4272 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4273 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4274 MIRBuilder.buildTrunc(DstReg, {Or});
4275 } else {
4276 assert(DstTy.isPointer() && "expected pointer");
4277 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4278
4279 // FIXME: We currently consider this to be illegal for non-integral address
4280 // spaces, but we need still need a way to reinterpret the bits.
4281 MIRBuilder.buildIntToPtr(DstReg, Or);
4282 }
4283
4284 LoadMI.eraseFromParent();
4285 return Legalized;
4286}
4287
4289 // Lower a non-power of 2 store into multiple pow-2 stores.
4290 // E.g. split an i24 store into an i16 store + i8 store.
4291 // We do this by first extending the stored value to the next largest power
4292 // of 2 type, and then using truncating stores to store the components.
4293 // By doing this, likewise with G_LOAD, generate an extend that can be
4294 // artifact-combined away instead of leaving behind extracts.
4295 Register SrcReg = StoreMI.getValueReg();
4296 Register PtrReg = StoreMI.getPointerReg();
4297 LLT SrcTy = MRI.getType(SrcReg);
4298 MachineFunction &MF = MIRBuilder.getMF();
4299 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4300 LLT MemTy = MMO.getMemoryType();
4301
4302 unsigned StoreWidth = MemTy.getSizeInBits();
4303 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4304
4305 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4306 // Promote to a byte-sized store with upper bits zero if not
4307 // storing an integral number of bytes. For example, promote
4308 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4309 LLT WideTy = LLT::scalar(StoreSizeInBits);
4310
4311 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4312 // Avoid creating a store with a narrower source than result.
4313 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4314 SrcTy = WideTy;
4315 }
4316
4317 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4318
4319 MachineMemOperand *NewMMO =
4320 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4321 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4322 StoreMI.eraseFromParent();
4323 return Legalized;
4324 }
4325
4326 if (MemTy.isVector()) {
4327 if (MemTy != SrcTy)
4328 return scalarizeVectorBooleanStore(StoreMI);
4329
4330 // TODO: We can do better than scalarizing the vector and at least split it
4331 // in half.
4332 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4333 }
4334
4335 unsigned MemSizeInBits = MemTy.getSizeInBits();
4336 uint64_t LargeSplitSize, SmallSplitSize;
4337
4338 if (!isPowerOf2_32(MemSizeInBits)) {
4339 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4340 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4341 } else {
4342 auto &Ctx = MF.getFunction().getContext();
4343 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4344 return UnableToLegalize; // Don't know what we're being asked to do.
4345
4346 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4347 }
4348
4349 // Extend to the next pow-2. If this store was itself the result of lowering,
4350 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4351 // that's wider than the stored size.
4352 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4353 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4354
4355 if (SrcTy.isPointer()) {
4356 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4357 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4358 }
4359
4360 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4361
4362 // Obtain the smaller value by shifting away the larger value.
4363 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4364 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4365
4366 // Generate the PtrAdd and truncating stores.
4367 LLT PtrTy = MRI.getType(PtrReg);
4368 auto OffsetCst = MIRBuilder.buildConstant(
4369 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4370 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4371
4372 MachineMemOperand *LargeMMO =
4373 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4374 MachineMemOperand *SmallMMO =
4375 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4376 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4377 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4378 StoreMI.eraseFromParent();
4379 return Legalized;
4380}
4381
4384 Register SrcReg = StoreMI.getValueReg();
4385 Register PtrReg = StoreMI.getPointerReg();
4386 LLT SrcTy = MRI.getType(SrcReg);
4387 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4388 LLT MemTy = MMO.getMemoryType();
4389 LLT MemScalarTy = MemTy.getElementType();
4390 MachineFunction &MF = MIRBuilder.getMF();
4391
4392 assert(SrcTy.isVector() && "Expect a vector store type");
4393
4394 if (!MemScalarTy.isByteSized()) {
4395 // We need to build an integer scalar of the vector bit pattern.
4396 // It's not legal for us to add padding when storing a vector.
4397 unsigned NumBits = MemTy.getSizeInBits();
4398 LLT IntTy = LLT::scalar(NumBits);
4399 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4400 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4401
4402 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4403 auto Elt = MIRBuilder.buildExtractVectorElement(
4404 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4405 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4406 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4407 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4408 ? (MemTy.getNumElements() - 1) - I
4409 : I;
4410 auto ShiftAmt = MIRBuilder.buildConstant(
4411 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4412 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4413 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4414 }
4415 auto PtrInfo = MMO.getPointerInfo();
4416 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4417 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4418 StoreMI.eraseFromParent();
4419 return Legalized;
4420 }
4421
4422 // TODO: implement simple scalarization.
4423 return UnableToLegalize;
4424}
4425
4427LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4428 switch (MI.getOpcode()) {
4429 case TargetOpcode::G_LOAD: {
4430 if (TypeIdx != 0)
4431 return UnableToLegalize;
4432 MachineMemOperand &MMO = **MI.memoperands_begin();
4433
4434 // Not sure how to interpret a bitcast of an extending load.
4435 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4436 return UnableToLegalize;
4437
4438 Observer.changingInstr(MI);
4439 bitcastDst(MI, CastTy, 0);
4440 MMO.setType(CastTy);
4441 // The range metadata is no longer valid when reinterpreted as a different
4442 // type.
4443 MMO.clearRanges();
4444 Observer.changedInstr(MI);
4445 return Legalized;
4446 }
4447 case TargetOpcode::G_STORE: {
4448 if (TypeIdx != 0)
4449 return UnableToLegalize;
4450
4451 MachineMemOperand &MMO = **MI.memoperands_begin();
4452
4453 // Not sure how to interpret a bitcast of a truncating store.
4454 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4455 return UnableToLegalize;
4456
4457 Observer.changingInstr(MI);
4458 bitcastSrc(MI, CastTy, 0);
4459 MMO.setType(CastTy);
4460 Observer.changedInstr(MI);
4461 return Legalized;
4462 }
4463 case TargetOpcode::G_SELECT: {
4464 if (TypeIdx != 0)
4465 return UnableToLegalize;
4466
4467 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4468 LLVM_DEBUG(
4469 dbgs() << "bitcast action not implemented for vector select\n");
4470 return UnableToLegalize;
4471 }
4472
4473 Observer.changingInstr(MI);
4474 bitcastSrc(MI, CastTy, 2);
4475 bitcastSrc(MI, CastTy, 3);
4476 bitcastDst(MI, CastTy, 0);
4477 Observer.changedInstr(MI);
4478 return Legalized;
4479 }
4480 case TargetOpcode::G_AND:
4481 case TargetOpcode::G_OR:
4482 case TargetOpcode::G_XOR: {
4483 Observer.changingInstr(MI);
4484 bitcastSrc(MI, CastTy, 1);
4485 bitcastSrc(MI, CastTy, 2);
4486 bitcastDst(MI, CastTy, 0);
4487 Observer.changedInstr(MI);
4488 return Legalized;
4489 }
4490 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4491 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4492 case TargetOpcode::G_INSERT_VECTOR_ELT:
4493 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4494 case TargetOpcode::G_CONCAT_VECTORS:
4495 return bitcastConcatVector(MI, TypeIdx, CastTy);
4496 case TargetOpcode::G_SHUFFLE_VECTOR:
4497 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4498 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4499 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4500 case TargetOpcode::G_INSERT_SUBVECTOR:
4501 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4502 default:
4503 return UnableToLegalize;
4504 }
4505}
4506
4507// Legalize an instruction by changing the opcode in place.
4508void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4510 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4512}
4513
4515LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4516 using namespace TargetOpcode;
4517
4518 switch(MI.getOpcode()) {
4519 default:
4520 return UnableToLegalize;
4521 case TargetOpcode::G_FCONSTANT:
4522 return lowerFConstant(MI);
4523 case TargetOpcode::G_BITCAST:
4524 return lowerBitcast(MI);
4525 case TargetOpcode::G_SREM:
4526 case TargetOpcode::G_UREM: {
4527 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4528 auto Quot =
4529 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4530 {MI.getOperand(1), MI.getOperand(2)});
4531
4532 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4533 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4534 MI.eraseFromParent();
4535 return Legalized;
4536 }
4537 case TargetOpcode::G_SADDO:
4538 case TargetOpcode::G_SSUBO:
4539 return lowerSADDO_SSUBO(MI);
4540 case TargetOpcode::G_SADDE:
4541 return lowerSADDE(MI);
4542 case TargetOpcode::G_SSUBE:
4543 return lowerSSUBE(MI);
4544 case TargetOpcode::G_UMULH:
4545 case TargetOpcode::G_SMULH:
4546 return lowerSMULH_UMULH(MI);
4547 case TargetOpcode::G_SMULO:
4548 case TargetOpcode::G_UMULO: {
4549 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4550 // result.
4551 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4552 LLT Ty = MRI.getType(Res);
4553
4554 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4555 ? TargetOpcode::G_SMULH
4556 : TargetOpcode::G_UMULH;
4557
4558 Observer.changingInstr(MI);
4559 const auto &TII = MIRBuilder.getTII();
4560 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4561 MI.removeOperand(1);
4562 Observer.changedInstr(MI);
4563
4564 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4565 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4566
4567 // Move insert point forward so we can use the Res register if needed.
4568 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4569
4570 // For *signed* multiply, overflow is detected by checking:
4571 // (hi != (lo >> bitwidth-1))
4572 if (Opcode == TargetOpcode::G_SMULH) {
4573 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4574 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4575 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4576 } else {
4577 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4578 }
4579 return Legalized;
4580 }
4581 case TargetOpcode::G_FNEG: {
4582 auto [Res, SubByReg] = MI.getFirst2Regs();
4583 LLT Ty = MRI.getType(Res);
4584
4585 auto SignMask = MIRBuilder.buildConstant(
4586 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4587 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4588 MI.eraseFromParent();
4589 return Legalized;
4590 }
4591 case TargetOpcode::G_FSUB:
4592 case TargetOpcode::G_STRICT_FSUB: {
4593 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4594 LLT Ty = MRI.getType(Res);
4595
4596 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4597 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4598
4599 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4600 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4601 else
4602 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4603
4604 MI.eraseFromParent();
4605 return Legalized;
4606 }
4607 case TargetOpcode::G_FMAD:
4608 return lowerFMad(MI);
4609 case TargetOpcode::G_FFLOOR:
4610 return lowerFFloor(MI);
4611 case TargetOpcode::G_LROUND:
4612 case TargetOpcode::G_LLROUND: {
4613 Register DstReg = MI.getOperand(0).getReg();
4614 Register SrcReg = MI.getOperand(1).getReg();
4615 LLT SrcTy = MRI.getType(SrcReg);
4616 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4617 {SrcReg});
4618 MIRBuilder.buildFPTOSI(DstReg, Round);
4619 MI.eraseFromParent();
4620 return Legalized;
4621 }
4622 case TargetOpcode::G_INTRINSIC_ROUND:
4623 return lowerIntrinsicRound(MI);
4624 case TargetOpcode::G_FRINT: {
4625 // Since round even is the assumed rounding mode for unconstrained FP
4626 // operations, rint and roundeven are the same operation.
4627 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4628 return Legalized;
4629 }
4630 case TargetOpcode::G_INTRINSIC_LRINT:
4631 case TargetOpcode::G_INTRINSIC_LLRINT: {
4632 Register DstReg = MI.getOperand(0).getReg();
4633 Register SrcReg = MI.getOperand(1).getReg();
4634 LLT SrcTy = MRI.getType(SrcReg);
4635 auto Round =
4636 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4637 MIRBuilder.buildFPTOSI(DstReg, Round);
4638 MI.eraseFromParent();
4639 return Legalized;
4640 }
4641 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4642 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4643 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4644 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4645 **MI.memoperands_begin());
4646 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4647 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4648 MI.eraseFromParent();
4649 return Legalized;
4650 }
4651 case TargetOpcode::G_LOAD:
4652 case TargetOpcode::G_SEXTLOAD:
4653 case TargetOpcode::G_ZEXTLOAD:
4654 return lowerLoad(cast<GAnyLoad>(MI));
4655 case TargetOpcode::G_STORE:
4656 return lowerStore(cast<GStore>(MI));
4657 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4658 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4659 case TargetOpcode::G_CTLZ:
4660 case TargetOpcode::G_CTTZ:
4661 case TargetOpcode::G_CTPOP:
4662 return lowerBitCount(MI);
4663 case G_UADDO: {
4664 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4665
4666 Register NewRes = MRI.cloneVirtualRegister(Res);
4667
4668 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4669 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4670
4671 MIRBuilder.buildCopy(Res, NewRes);
4672
4673 MI.eraseFromParent();
4674 return Legalized;
4675 }
4676 case G_UADDE: {
4677 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4678 const LLT CondTy = MRI.getType(CarryOut);
4679 const LLT Ty = MRI.getType(Res);
4680
4681 Register NewRes = MRI.cloneVirtualRegister(Res);
4682
4683 // Initial add of the two operands.
4684 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4685
4686 // Initial check for carry.
4687 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4688
4689 // Add the sum and the carry.
4690 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4691 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4692
4693 // Second check for carry. We can only carry if the initial sum is all 1s
4694 // and the carry is set, resulting in a new sum of 0.
4695 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4696 auto ResEqZero =
4697 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4698 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4699 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4700
4701 MIRBuilder.buildCopy(Res, NewRes);
4702
4703 MI.eraseFromParent();
4704 return Legalized;
4705 }
4706 case G_USUBO: {
4707 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4708
4709 MIRBuilder.buildSub(Res, LHS, RHS);
4710 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4711
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715 case G_USUBE: {
4716 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4717 const LLT CondTy = MRI.getType(BorrowOut);
4718 const LLT Ty = MRI.getType(Res);
4719
4720 // Initial subtract of the two operands.
4721 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4722
4723 // Initial check for borrow.
4724 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4725
4726 // Subtract the borrow from the first subtract.
4727 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4728 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4729
4730 // Second check for borrow. We can only borrow if the initial difference is
4731 // 0 and the borrow is set, resulting in a new difference of all 1s.
4732 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4733 auto TmpResEqZero =
4734 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4735 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4736 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4737
4738 MI.eraseFromParent();
4739 return Legalized;
4740 }
4741 case G_UITOFP:
4742 return lowerUITOFP(MI);
4743 case G_SITOFP:
4744 return lowerSITOFP(MI);
4745 case G_FPTOUI:
4746 return lowerFPTOUI(MI);
4747 case G_FPTOSI:
4748 return lowerFPTOSI(MI);
4749 case G_FPTOUI_SAT:
4750 case G_FPTOSI_SAT:
4751 return lowerFPTOINT_SAT(MI);
4752 case G_FPTRUNC:
4753 return lowerFPTRUNC(MI);
4754 case G_FPOWI:
4755 return lowerFPOWI(MI);
4756 case G_SMIN:
4757 case G_SMAX:
4758 case G_UMIN:
4759 case G_UMAX:
4760 return lowerMinMax(MI);
4761 case G_SCMP:
4762 case G_UCMP:
4763 return lowerThreewayCompare(MI);
4764 case G_FCOPYSIGN:
4765 return lowerFCopySign(MI);
4766 case G_FMINNUM:
4767 case G_FMAXNUM:
4768 case G_FMINIMUMNUM:
4769 case G_FMAXIMUMNUM:
4770 return lowerFMinNumMaxNum(MI);
4771 case G_FMINIMUM:
4772 case G_FMAXIMUM:
4773 return lowerFMinimumMaximum(MI);
4774 case G_MERGE_VALUES:
4775 return lowerMergeValues(MI);
4776 case G_UNMERGE_VALUES:
4777 return lowerUnmergeValues(MI);
4778 case TargetOpcode::G_SEXT_INREG: {
4779 assert(MI.getOperand(2).isImm() && "Expected immediate");
4780 int64_t SizeInBits = MI.getOperand(2).getImm();
4781
4782 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4783 LLT DstTy = MRI.getType(DstReg);
4784 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4785
4786 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4787 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4788 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4789 MI.eraseFromParent();
4790 return Legalized;
4791 }
4792 case G_EXTRACT_VECTOR_ELT:
4793 case G_INSERT_VECTOR_ELT:
4795 case G_SHUFFLE_VECTOR:
4796 return lowerShuffleVector(MI);
4797 case G_VECTOR_COMPRESS:
4798 return lowerVECTOR_COMPRESS(MI);
4799 case G_DYN_STACKALLOC:
4800 return lowerDynStackAlloc(MI);
4801 case G_STACKSAVE:
4802 return lowerStackSave(MI);
4803 case G_STACKRESTORE:
4804 return lowerStackRestore(MI);
4805 case G_EXTRACT:
4806 return lowerExtract(MI);
4807 case G_INSERT:
4808 return lowerInsert(MI);
4809 case G_BSWAP:
4810 return lowerBswap(MI);
4811 case G_BITREVERSE:
4812 return lowerBitreverse(MI);
4813 case G_READ_REGISTER:
4814 case G_WRITE_REGISTER:
4815 return lowerReadWriteRegister(MI);
4816 case G_UADDSAT:
4817 case G_USUBSAT: {
4818 // Try to make a reasonable guess about which lowering strategy to use. The
4819 // target can override this with custom lowering and calling the
4820 // implementation functions.
4821 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4822 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4823 return lowerAddSubSatToMinMax(MI);
4825 }
4826 case G_SADDSAT:
4827 case G_SSUBSAT: {
4828 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4829
4830 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4831 // since it's a shorter expansion. However, we would need to figure out the
4832 // preferred boolean type for the carry out for the query.
4833 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4834 return lowerAddSubSatToMinMax(MI);
4836 }
4837 case G_SSHLSAT:
4838 case G_USHLSAT:
4839 return lowerShlSat(MI);
4840 case G_ABS:
4841 return lowerAbsToAddXor(MI);
4842 case G_ABDS:
4843 case G_ABDU: {
4844 bool IsSigned = MI.getOpcode() == G_ABDS;
4845 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4846 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4847 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4848 return lowerAbsDiffToMinMax(MI);
4849 }
4850 return lowerAbsDiffToSelect(MI);
4851 }
4852 case G_FABS:
4853 return lowerFAbs(MI);
4854 case G_SELECT:
4855 return lowerSelect(MI);
4856 case G_IS_FPCLASS:
4857 return lowerISFPCLASS(MI);
4858 case G_SDIVREM:
4859 case G_UDIVREM:
4860 return lowerDIVREM(MI);
4861 case G_FSHL:
4862 case G_FSHR:
4863 return lowerFunnelShift(MI);
4864 case G_ROTL:
4865 case G_ROTR:
4866 return lowerRotate(MI);
4867 case G_MEMSET:
4868 case G_MEMCPY:
4869 case G_MEMMOVE:
4870 return lowerMemCpyFamily(MI);
4871 case G_MEMCPY_INLINE:
4872 return lowerMemcpyInline(MI);
4873 case G_ZEXT:
4874 case G_SEXT:
4875 case G_ANYEXT:
4876 return lowerEXT(MI);
4877 case G_TRUNC:
4878 return lowerTRUNC(MI);
4880 return lowerVectorReduction(MI);
4881 case G_VAARG:
4882 return lowerVAArg(MI);
4883 case G_ATOMICRMW_SUB: {
4884 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4885 const LLT ValTy = MRI.getType(Val);
4886 MachineMemOperand *MMO = *MI.memoperands_begin();
4887
4888 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4889 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4890 MI.eraseFromParent();
4891 return Legalized;
4892 }
4893 }
4894}
4895
4897 Align MinAlign) const {
4898 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4899 // datalayout for the preferred alignment. Also there should be a target hook
4900 // for this to allow targets to reduce the alignment and ignore the
4901 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4902 // the type.
4903 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4904}
4905
4908 MachinePointerInfo &PtrInfo) {
4909 MachineFunction &MF = MIRBuilder.getMF();
4910 const DataLayout &DL = MIRBuilder.getDataLayout();
4911 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4912
4913 unsigned AddrSpace = DL.getAllocaAddrSpace();
4914 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4915
4916 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4917 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4918}
4919
4921 const SrcOp &Val) {
4922 LLT SrcTy = Val.getLLTTy(MRI);
4923 Align StackTypeAlign =
4924 std::max(getStackTemporaryAlignment(SrcTy),
4926 MachinePointerInfo PtrInfo;
4927 auto StackTemp =
4928 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4929
4930 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4931 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4932}
4933
4935 LLT VecTy) {
4936 LLT IdxTy = B.getMRI()->getType(IdxReg);
4937 unsigned NElts = VecTy.getNumElements();
4938
4939 int64_t IdxVal;
4940 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4941 if (IdxVal < VecTy.getNumElements())
4942 return IdxReg;
4943 // If a constant index would be out of bounds, clamp it as well.
4944 }
4945
4946 if (isPowerOf2_32(NElts)) {
4947 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4948 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4949 }
4950
4951 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4952 .getReg(0);
4953}
4954
4956 Register Index) {
4957 LLT EltTy = VecTy.getElementType();
4958
4959 // Calculate the element offset and add it to the pointer.
4960 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4961 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4962 "Converting bits to bytes lost precision");
4963
4964 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4965
4966 // Convert index to the correct size for the address space.
4967 const DataLayout &DL = MIRBuilder.getDataLayout();
4968 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4969 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4970 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4971 if (IdxTy != MRI.getType(Index))
4972 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4973
4974 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4975 MIRBuilder.buildConstant(IdxTy, EltSize));
4976
4977 LLT PtrTy = MRI.getType(VecPtr);
4978 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4979}
4980
4981#ifndef NDEBUG
4982/// Check that all vector operands have same number of elements. Other operands
4983/// should be listed in NonVecOp.
4986 std::initializer_list<unsigned> NonVecOpIndices) {
4987 if (MI.getNumMemOperands() != 0)
4988 return false;
4989
4990 LLT VecTy = MRI.getType(MI.getReg(0));
4991 if (!VecTy.isVector())
4992 return false;
4993 unsigned NumElts = VecTy.getNumElements();
4994
4995 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4996 MachineOperand &Op = MI.getOperand(OpIdx);
4997 if (!Op.isReg()) {
4998 if (!is_contained(NonVecOpIndices, OpIdx))
4999 return false;
5000 continue;
5001 }
5002
5003 LLT Ty = MRI.getType(Op.getReg());
5004 if (!Ty.isVector()) {
5005 if (!is_contained(NonVecOpIndices, OpIdx))
5006 return false;
5007 continue;
5008 }
5009
5010 if (Ty.getNumElements() != NumElts)
5011 return false;
5012 }
5013
5014 return true;
5015}
5016#endif
5017
5018/// Fill \p DstOps with DstOps that have same number of elements combined as
5019/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5020/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5021/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5022static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5023 unsigned NumElts) {
5024 LLT LeftoverTy;
5025 assert(Ty.isVector() && "Expected vector type");
5026 LLT EltTy = Ty.getElementType();
5027 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
5028 int NumParts, NumLeftover;
5029 std::tie(NumParts, NumLeftover) =
5030 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5031
5032 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5033 for (int i = 0; i < NumParts; ++i) {
5034 DstOps.push_back(NarrowTy);
5035 }
5036
5037 if (LeftoverTy.isValid()) {
5038 assert(NumLeftover == 1 && "expected exactly one leftover");
5039 DstOps.push_back(LeftoverTy);
5040 }
5041}
5042
5043/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5044/// made from \p Op depending on operand type.
5046 MachineOperand &Op) {
5047 for (unsigned i = 0; i < N; ++i) {
5048 if (Op.isReg())
5049 Ops.push_back(Op.getReg());
5050 else if (Op.isImm())
5051 Ops.push_back(Op.getImm());
5052 else if (Op.isPredicate())
5053 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5054 else
5055 llvm_unreachable("Unsupported type");
5056 }
5057}
5058
5059// Handle splitting vector operations which need to have the same number of
5060// elements in each type index, but each type index may have a different element
5061// type.
5062//
5063// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5064// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5065// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5066//
5067// Also handles some irregular breakdown cases, e.g.
5068// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5069// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5070// s64 = G_SHL s64, s32
5073 GenericMachineInstr &MI, unsigned NumElts,
5074 std::initializer_list<unsigned> NonVecOpIndices) {
5075 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5076 "Non-compatible opcode or not specified non-vector operands");
5077 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5078
5079 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5080 unsigned NumDefs = MI.getNumDefs();
5081
5082 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5083 // Build instructions with DstOps to use instruction found by CSE directly.
5084 // CSE copies found instruction into given vreg when building with vreg dest.
5085 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5086 // Output registers will be taken from created instructions.
5087 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5088 for (unsigned i = 0; i < NumDefs; ++i) {
5089 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5090 }
5091
5092 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5093 // Operands listed in NonVecOpIndices will be used as is without splitting;
5094 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5095 // scalar condition (op 1), immediate in sext_inreg (op 2).
5096 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5097 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5098 ++UseIdx, ++UseNo) {
5099 if (is_contained(NonVecOpIndices, UseIdx)) {
5100 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5101 MI.getOperand(UseIdx));
5102 } else {
5103 SmallVector<Register, 8> SplitPieces;
5104 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5105 MRI);
5106 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5107 }
5108 }
5109
5110 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5111
5112 // Take i-th piece of each input operand split and build sub-vector/scalar
5113 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5114 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5116 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5117 Defs.push_back(OutputOpsPieces[DstNo][i]);
5118
5120 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5121 Uses.push_back(InputOpsPieces[InputNo][i]);
5122
5123 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5124 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5125 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5126 }
5127
5128 // Merge small outputs into MI's output for each def operand.
5129 if (NumLeftovers) {
5130 for (unsigned i = 0; i < NumDefs; ++i)
5131 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5132 } else {
5133 for (unsigned i = 0; i < NumDefs; ++i)
5134 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5135 }
5136
5137 MI.eraseFromParent();
5138 return Legalized;
5139}
5140
5143 unsigned NumElts) {
5144 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5145
5146 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5147 unsigned NumDefs = MI.getNumDefs();
5148
5149 SmallVector<DstOp, 8> OutputOpsPieces;
5150 SmallVector<Register, 8> OutputRegs;
5151 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5152
5153 // Instructions that perform register split will be inserted in basic block
5154 // where register is defined (basic block is in the next operand).
5155 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5156 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5157 UseIdx += 2, ++UseNo) {
5158 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5159 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5160 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5161 MIRBuilder, MRI);
5162 }
5163
5164 // Build PHIs with fewer elements.
5165 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5166 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5167 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5168 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5169 Phi.addDef(
5170 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5171 OutputRegs.push_back(Phi.getReg(0));
5172
5173 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5174 Phi.addUse(InputOpsPieces[j][i]);
5175 Phi.add(MI.getOperand(1 + j * 2 + 1));
5176 }
5177 }
5178
5179 // Set the insert point after the existing PHIs
5180 MachineBasicBlock &MBB = *MI.getParent();
5181 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5182
5183 // Merge small outputs into MI's def.
5184 if (NumLeftovers) {
5185 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5186 } else {
5187 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5188 }
5189
5190 MI.eraseFromParent();
5191 return Legalized;
5192}
5193
5196 unsigned TypeIdx,
5197 LLT NarrowTy) {
5198 const int NumDst = MI.getNumOperands() - 1;
5199 const Register SrcReg = MI.getOperand(NumDst).getReg();
5200 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5201 LLT SrcTy = MRI.getType(SrcReg);
5202
5203 if (TypeIdx != 1 || NarrowTy == DstTy)
5204 return UnableToLegalize;
5205
5206 // Requires compatible types. Otherwise SrcReg should have been defined by
5207 // merge-like instruction that would get artifact combined. Most likely
5208 // instruction that defines SrcReg has to perform more/fewer elements
5209 // legalization compatible with NarrowTy.
5210 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5211 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5212
5213 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5214 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5215 return UnableToLegalize;
5216
5217 // This is most likely DstTy (smaller then register size) packed in SrcTy
5218 // (larger then register size) and since unmerge was not combined it will be
5219 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5220 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5221
5222 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5223 //
5224 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5225 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5226 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5227 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5228 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5229 const int PartsPerUnmerge = NumDst / NumUnmerge;
5230
5231 for (int I = 0; I != NumUnmerge; ++I) {
5232 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5233
5234 for (int J = 0; J != PartsPerUnmerge; ++J)
5235 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5236 MIB.addUse(Unmerge.getReg(I));
5237 }
5238
5239 MI.eraseFromParent();
5240 return Legalized;
5241}
5242
5245 LLT NarrowTy) {
5246 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5247 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5248 // that should have been artifact combined. Most likely instruction that uses
5249 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5250 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5251 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5252 if (NarrowTy == SrcTy)
5253 return UnableToLegalize;
5254
5255 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5256 // is for old mir tests. Since the changes to more/fewer elements it should no
5257 // longer be possible to generate MIR like this when starting from llvm-ir
5258 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5259 if (TypeIdx == 1) {
5260 assert(SrcTy.isVector() && "Expected vector types");
5261 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5262 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5263 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5264 return UnableToLegalize;
5265 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5266 //
5267 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5268 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5269 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5270 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5271 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5272 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5273
5275 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5276 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5277 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5278 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5279 Elts.push_back(Unmerge.getReg(j));
5280 }
5281
5282 SmallVector<Register, 8> NarrowTyElts;
5283 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5284 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5285 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5286 ++i, Offset += NumNarrowTyElts) {
5287 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5288 NarrowTyElts.push_back(
5289 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5290 }
5291
5292 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5293 MI.eraseFromParent();
5294 return Legalized;
5295 }
5296
5297 assert(TypeIdx == 0 && "Bad type index");
5298 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5299 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5300 return UnableToLegalize;
5301
5302 // This is most likely SrcTy (smaller then register size) packed in DstTy
5303 // (larger then register size) and since merge was not combined it will be
5304 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5305 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5306
5307 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5308 //
5309 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5310 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5311 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5312 SmallVector<Register, 8> NarrowTyElts;
5313 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5314 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5315 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5316 for (unsigned i = 0; i < NumParts; ++i) {
5318 for (unsigned j = 0; j < NumElts; ++j)
5319 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5320 NarrowTyElts.push_back(
5321 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5322 }
5323
5324 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5325 MI.eraseFromParent();
5326 return Legalized;
5327}
5328
5331 unsigned TypeIdx,
5332 LLT NarrowVecTy) {
5333 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5334 Register InsertVal;
5335 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5336
5337 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5338 if (IsInsert)
5339 InsertVal = MI.getOperand(2).getReg();
5340
5341 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5342 LLT VecTy = MRI.getType(SrcVec);
5343
5344 // If the index is a constant, we can really break this down as you would
5345 // expect, and index into the target size pieces.
5346 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5347 if (MaybeCst) {
5348 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5349 // Avoid out of bounds indexing the pieces.
5350 if (IdxVal >= VecTy.getNumElements()) {
5351 MIRBuilder.buildUndef(DstReg);
5352 MI.eraseFromParent();
5353 return Legalized;
5354 }
5355
5356 if (!NarrowVecTy.isVector()) {
5357 SmallVector<Register, 8> SplitPieces;
5358 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5359 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5360 if (IsInsert) {
5361 SplitPieces[IdxVal] = InsertVal;
5362 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5363 } else {
5364 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5365 }
5366 } else {
5367 SmallVector<Register, 8> VecParts;
5368 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5369
5370 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5371 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5372 TargetOpcode::G_ANYEXT);
5373
5374 unsigned NewNumElts = NarrowVecTy.getNumElements();
5375
5376 LLT IdxTy = MRI.getType(Idx);
5377 int64_t PartIdx = IdxVal / NewNumElts;
5378 auto NewIdx =
5379 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5380
5381 if (IsInsert) {
5382 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5383
5384 // Use the adjusted index to insert into one of the subvectors.
5385 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5386 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5387 VecParts[PartIdx] = InsertPart.getReg(0);
5388
5389 // Recombine the inserted subvector with the others to reform the result
5390 // vector.
5391 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5392 } else {
5393 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5394 }
5395 }
5396
5397 MI.eraseFromParent();
5398 return Legalized;
5399 }
5400
5401 // With a variable index, we can't perform the operation in a smaller type, so
5402 // we're forced to expand this.
5403 //
5404 // TODO: We could emit a chain of compare/select to figure out which piece to
5405 // index.
5407}
5408
5411 LLT NarrowTy) {
5412 // FIXME: Don't know how to handle secondary types yet.
5413 if (TypeIdx != 0)
5414 return UnableToLegalize;
5415
5416 if (!NarrowTy.isByteSized()) {
5417 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5418 return UnableToLegalize;
5419 }
5420
5421 // This implementation doesn't work for atomics. Give up instead of doing
5422 // something invalid.
5423 if (LdStMI.isAtomic())
5424 return UnableToLegalize;
5425
5426 bool IsLoad = isa<GLoad>(LdStMI);
5427 Register ValReg = LdStMI.getReg(0);
5428 Register AddrReg = LdStMI.getPointerReg();
5429 LLT ValTy = MRI.getType(ValReg);
5430
5431 // FIXME: Do we need a distinct NarrowMemory legalize action?
5432 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5433 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5434 return UnableToLegalize;
5435 }
5436
5437 int NumParts = -1;
5438 int NumLeftover = -1;
5439 LLT LeftoverTy;
5440 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5441 if (IsLoad) {
5442 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5443 } else {
5444 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5445 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5446 NumParts = NarrowRegs.size();
5447 NumLeftover = NarrowLeftoverRegs.size();
5448 }
5449 }
5450
5451 if (NumParts == -1)
5452 return UnableToLegalize;
5453
5454 LLT PtrTy = MRI.getType(AddrReg);
5455 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5456
5457 unsigned TotalSize = ValTy.getSizeInBits();
5458
5459 // Split the load/store into PartTy sized pieces starting at Offset. If this
5460 // is a load, return the new registers in ValRegs. For a store, each elements
5461 // of ValRegs should be PartTy. Returns the next offset that needs to be
5462 // handled.
5463 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5464 auto MMO = LdStMI.getMMO();
5465 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5466 unsigned NumParts, unsigned Offset) -> unsigned {
5467 MachineFunction &MF = MIRBuilder.getMF();
5468 unsigned PartSize = PartTy.getSizeInBits();
5469 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5470 ++Idx) {
5471 unsigned ByteOffset = Offset / 8;
5472 Register NewAddrReg;
5473
5474 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5475 ByteOffset);
5476
5477 MachineMemOperand *NewMMO =
5478 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5479
5480 if (IsLoad) {
5481 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5482 ValRegs.push_back(Dst);
5483 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5484 } else {
5485 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5486 }
5487 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5488 }
5489
5490 return Offset;
5491 };
5492
5493 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5494 unsigned HandledOffset =
5495 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5496
5497 // Handle the rest of the register if this isn't an even type breakdown.
5498 if (LeftoverTy.isValid())
5499 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5500
5501 if (IsLoad) {
5502 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5503 LeftoverTy, NarrowLeftoverRegs);
5504 }
5505
5506 LdStMI.eraseFromParent();
5507 return Legalized;
5508}
5509
5512 LLT NarrowTy) {
5513 using namespace TargetOpcode;
5515 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5516
5517 switch (MI.getOpcode()) {
5518 case G_IMPLICIT_DEF:
5519 case G_TRUNC:
5520 case G_AND:
5521 case G_OR:
5522 case G_XOR:
5523 case G_ADD:
5524 case G_SUB:
5525 case G_MUL:
5526 case G_PTR_ADD:
5527 case G_SMULH:
5528 case G_UMULH:
5529 case G_FADD:
5530 case G_FMUL:
5531 case G_FSUB:
5532 case G_FNEG:
5533 case G_FABS:
5534 case G_FCANONICALIZE:
5535 case G_FDIV:
5536 case G_FREM:
5537 case G_FMA:
5538 case G_FMAD:
5539 case G_FPOW:
5540 case G_FEXP:
5541 case G_FEXP2:
5542 case G_FEXP10:
5543 case G_FLOG:
5544 case G_FLOG2:
5545 case G_FLOG10:
5546 case G_FLDEXP:
5547 case G_FNEARBYINT:
5548 case G_FCEIL:
5549 case G_FFLOOR:
5550 case G_FRINT:
5551 case G_INTRINSIC_LRINT:
5552 case G_INTRINSIC_LLRINT:
5553 case G_INTRINSIC_ROUND:
5554 case G_INTRINSIC_ROUNDEVEN:
5555 case G_LROUND:
5556 case G_LLROUND:
5557 case G_INTRINSIC_TRUNC:
5558 case G_FMODF:
5559 case G_FCOS:
5560 case G_FSIN:
5561 case G_FTAN:
5562 case G_FACOS:
5563 case G_FASIN:
5564 case G_FATAN:
5565 case G_FATAN2:
5566 case G_FCOSH:
5567 case G_FSINH:
5568 case G_FTANH:
5569 case G_FSQRT:
5570 case G_BSWAP:
5571 case G_BITREVERSE:
5572 case G_SDIV:
5573 case G_UDIV:
5574 case G_SREM:
5575 case G_UREM:
5576 case G_SDIVREM:
5577 case G_UDIVREM:
5578 case G_SMIN:
5579 case G_SMAX:
5580 case G_UMIN:
5581 case G_UMAX:
5582 case G_ABS:
5583 case G_FMINNUM:
5584 case G_FMAXNUM:
5585 case G_FMINNUM_IEEE:
5586 case G_FMAXNUM_IEEE:
5587 case G_FMINIMUM:
5588 case G_FMAXIMUM:
5589 case G_FMINIMUMNUM:
5590 case G_FMAXIMUMNUM:
5591 case G_FSHL:
5592 case G_FSHR:
5593 case G_ROTL:
5594 case G_ROTR:
5595 case G_FREEZE:
5596 case G_SADDSAT:
5597 case G_SSUBSAT:
5598 case G_UADDSAT:
5599 case G_USUBSAT:
5600 case G_UMULO:
5601 case G_SMULO:
5602 case G_SHL:
5603 case G_LSHR:
5604 case G_ASHR:
5605 case G_SSHLSAT:
5606 case G_USHLSAT:
5607 case G_CTLZ:
5608 case G_CTLZ_ZERO_UNDEF:
5609 case G_CTTZ:
5610 case G_CTTZ_ZERO_UNDEF:
5611 case G_CTPOP:
5612 case G_FCOPYSIGN:
5613 case G_ZEXT:
5614 case G_SEXT:
5615 case G_ANYEXT:
5616 case G_FPEXT:
5617 case G_FPTRUNC:
5618 case G_SITOFP:
5619 case G_UITOFP:
5620 case G_FPTOSI:
5621 case G_FPTOUI:
5622 case G_FPTOSI_SAT:
5623 case G_FPTOUI_SAT:
5624 case G_INTTOPTR:
5625 case G_PTRTOINT:
5626 case G_ADDRSPACE_CAST:
5627 case G_UADDO:
5628 case G_USUBO:
5629 case G_UADDE:
5630 case G_USUBE:
5631 case G_SADDO:
5632 case G_SSUBO:
5633 case G_SADDE:
5634 case G_SSUBE:
5635 case G_STRICT_FADD:
5636 case G_STRICT_FSUB:
5637 case G_STRICT_FMUL:
5638 case G_STRICT_FMA:
5639 case G_STRICT_FLDEXP:
5640 case G_FFREXP:
5641 return fewerElementsVectorMultiEltType(GMI, NumElts);
5642 case G_ICMP:
5643 case G_FCMP:
5644 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5645 case G_IS_FPCLASS:
5646 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5647 case G_SELECT:
5648 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5649 return fewerElementsVectorMultiEltType(GMI, NumElts);
5650 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5651 case G_PHI:
5652 return fewerElementsVectorPhi(GMI, NumElts);
5653 case G_UNMERGE_VALUES:
5654 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5655 case G_BUILD_VECTOR:
5656 assert(TypeIdx == 0 && "not a vector type index");
5657 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5658 case G_CONCAT_VECTORS:
5659 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5660 return UnableToLegalize;
5661 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5662 case G_EXTRACT_VECTOR_ELT:
5663 case G_INSERT_VECTOR_ELT:
5664 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5665 case G_LOAD:
5666 case G_STORE:
5667 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5668 case G_SEXT_INREG:
5669 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5671 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5672 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5673 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5674 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5675 case G_SHUFFLE_VECTOR:
5676 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5677 case G_FPOWI:
5678 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5679 case G_BITCAST:
5680 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5681 case G_INTRINSIC_FPTRUNC_ROUND:
5682 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5683 default:
5684 return UnableToLegalize;
5685 }
5686}
5687
5690 LLT NarrowTy) {
5691 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5692 "Not a bitcast operation");
5693
5694 if (TypeIdx != 0)
5695 return UnableToLegalize;
5696
5697 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5698
5699 unsigned NewElemCount =
5700 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5701 SmallVector<Register> SrcVRegs, BitcastVRegs;
5702 if (NewElemCount == 1) {
5703 LLT SrcNarrowTy = SrcTy.getElementType();
5704
5705 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5706 getUnmergeResults(SrcVRegs, *Unmerge);
5707 } else {
5708 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5709
5710 // Split the Src and Dst Reg into smaller registers
5711 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5712 return UnableToLegalize;
5713 }
5714
5715 // Build new smaller bitcast instructions
5716 // Not supporting Leftover types for now but will have to
5717 for (Register Reg : SrcVRegs)
5718 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5719
5720 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5721 MI.eraseFromParent();
5722 return Legalized;
5723}
5724
5726 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5727 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5728 if (TypeIdx != 0)
5729 return UnableToLegalize;
5730
5731 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5732 MI.getFirst3RegLLTs();
5733 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5734 // The shuffle should be canonicalized by now.
5735 if (DstTy != Src1Ty)
5736 return UnableToLegalize;
5737 if (DstTy != Src2Ty)
5738 return UnableToLegalize;
5739
5740 if (!isPowerOf2_32(DstTy.getNumElements()))
5741 return UnableToLegalize;
5742
5743 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5744 // Further legalization attempts will be needed to do split further.
5745 NarrowTy =
5746 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5747 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5748
5749 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5750 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5751 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5752 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5753 SplitSrc2Regs[1]};
5754
5755 Register Hi, Lo;
5756
5757 // If Lo or Hi uses elements from at most two of the four input vectors, then
5758 // express it as a vector shuffle of those two inputs. Otherwise extract the
5759 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5761 for (unsigned High = 0; High < 2; ++High) {
5762 Register &Output = High ? Hi : Lo;
5763
5764 // Build a shuffle mask for the output, discovering on the fly which
5765 // input vectors to use as shuffle operands (recorded in InputUsed).
5766 // If building a suitable shuffle vector proves too hard, then bail
5767 // out with useBuildVector set.
5768 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5769 unsigned FirstMaskIdx = High * NewElts;
5770 bool UseBuildVector = false;
5771 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5772 // The mask element. This indexes into the input.
5773 int Idx = Mask[FirstMaskIdx + MaskOffset];
5774
5775 // The input vector this mask element indexes into.
5776 unsigned Input = (unsigned)Idx / NewElts;
5777
5778 if (Input >= std::size(Inputs)) {
5779 // The mask element does not index into any input vector.
5780 Ops.push_back(-1);
5781 continue;
5782 }
5783
5784 // Turn the index into an offset from the start of the input vector.
5785 Idx -= Input * NewElts;
5786
5787 // Find or create a shuffle vector operand to hold this input.
5788 unsigned OpNo;
5789 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5790 if (InputUsed[OpNo] == Input) {
5791 // This input vector is already an operand.
5792 break;
5793 } else if (InputUsed[OpNo] == -1U) {
5794 // Create a new operand for this input vector.
5795 InputUsed[OpNo] = Input;
5796 break;
5797 }
5798 }
5799
5800 if (OpNo >= std::size(InputUsed)) {
5801 // More than two input vectors used! Give up on trying to create a
5802 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5803 UseBuildVector = true;
5804 break;
5805 }
5806
5807 // Add the mask index for the new shuffle vector.
5808 Ops.push_back(Idx + OpNo * NewElts);
5809 }
5810
5811 if (UseBuildVector) {
5812 LLT EltTy = NarrowTy.getElementType();
5814
5815 // Extract the input elements by hand.
5816 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5817 // The mask element. This indexes into the input.
5818 int Idx = Mask[FirstMaskIdx + MaskOffset];
5819
5820 // The input vector this mask element indexes into.
5821 unsigned Input = (unsigned)Idx / NewElts;
5822
5823 if (Input >= std::size(Inputs)) {
5824 // The mask element is "undef" or indexes off the end of the input.
5825 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5826 continue;
5827 }
5828
5829 // Turn the index into an offset from the start of the input vector.
5830 Idx -= Input * NewElts;
5831
5832 // Extract the vector element by hand.
5833 SVOps.push_back(MIRBuilder
5834 .buildExtractVectorElement(
5835 EltTy, Inputs[Input],
5836 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5837 .getReg(0));
5838 }
5839
5840 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5841 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5842 } else if (InputUsed[0] == -1U) {
5843 // No input vectors were used! The result is undefined.
5844 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5845 } else if (NewElts == 1) {
5846 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5847 } else {
5848 Register Op0 = Inputs[InputUsed[0]];
5849 // If only one input was used, use an undefined vector for the other.
5850 Register Op1 = InputUsed[1] == -1U
5851 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5852 : Inputs[InputUsed[1]];
5853 // At least one input vector was used. Create a new shuffle vector.
5854 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5855 }
5856
5857 Ops.clear();
5858 }
5859
5860 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5861 MI.eraseFromParent();
5862 return Legalized;
5863}
5864
5866 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5867 auto &RdxMI = cast<GVecReduce>(MI);
5868
5869 if (TypeIdx != 1)
5870 return UnableToLegalize;
5871
5872 // The semantics of the normal non-sequential reductions allow us to freely
5873 // re-associate the operation.
5874 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5875
5876 if (NarrowTy.isVector() &&
5877 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5878 return UnableToLegalize;
5879
5880 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5881 SmallVector<Register> SplitSrcs;
5882 // If NarrowTy is a scalar then we're being asked to scalarize.
5883 const unsigned NumParts =
5884 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5885 : SrcTy.getNumElements();
5886
5887 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5888 if (NarrowTy.isScalar()) {
5889 if (DstTy != NarrowTy)
5890 return UnableToLegalize; // FIXME: handle implicit extensions.
5891
5892 if (isPowerOf2_32(NumParts)) {
5893 // Generate a tree of scalar operations to reduce the critical path.
5894 SmallVector<Register> PartialResults;
5895 unsigned NumPartsLeft = NumParts;
5896 while (NumPartsLeft > 1) {
5897 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5898 PartialResults.emplace_back(
5900 .buildInstr(ScalarOpc, {NarrowTy},
5901 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5902 .getReg(0));
5903 }
5904 SplitSrcs = PartialResults;
5905 PartialResults.clear();
5906 NumPartsLeft = SplitSrcs.size();
5907 }
5908 assert(SplitSrcs.size() == 1);
5909 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5910 MI.eraseFromParent();
5911 return Legalized;
5912 }
5913 // If we can't generate a tree, then just do sequential operations.
5914 Register Acc = SplitSrcs[0];
5915 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5916 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5917 .getReg(0);
5918 MIRBuilder.buildCopy(DstReg, Acc);
5919 MI.eraseFromParent();
5920 return Legalized;
5921 }
5922 SmallVector<Register> PartialReductions;
5923 for (unsigned Part = 0; Part < NumParts; ++Part) {
5924 PartialReductions.push_back(
5925 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5926 .getReg(0));
5927 }
5928
5929 // If the types involved are powers of 2, we can generate intermediate vector
5930 // ops, before generating a final reduction operation.
5931 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5932 isPowerOf2_32(NarrowTy.getNumElements())) {
5933 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5934 }
5935
5936 Register Acc = PartialReductions[0];
5937 for (unsigned Part = 1; Part < NumParts; ++Part) {
5938 if (Part == NumParts - 1) {
5939 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5940 {Acc, PartialReductions[Part]});
5941 } else {
5942 Acc = MIRBuilder
5943 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5944 .getReg(0);
5945 }
5946 }
5947 MI.eraseFromParent();
5948 return Legalized;
5949}
5950
5953 unsigned int TypeIdx,
5954 LLT NarrowTy) {
5955 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5956 MI.getFirst3RegLLTs();
5957 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5958 DstTy != NarrowTy)
5959 return UnableToLegalize;
5960
5961 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5962 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5963 "Unexpected vecreduce opcode");
5964 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5965 ? TargetOpcode::G_FADD
5966 : TargetOpcode::G_FMUL;
5967
5968 SmallVector<Register> SplitSrcs;
5969 unsigned NumParts = SrcTy.getNumElements();
5970 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5971 Register Acc = ScalarReg;
5972 for (unsigned i = 0; i < NumParts; i++)
5973 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5974 .getReg(0);
5975
5976 MIRBuilder.buildCopy(DstReg, Acc);
5977 MI.eraseFromParent();
5978 return Legalized;
5979}
5980
5982LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5983 LLT SrcTy, LLT NarrowTy,
5984 unsigned ScalarOpc) {
5985 SmallVector<Register> SplitSrcs;
5986 // Split the sources into NarrowTy size pieces.
5987 extractParts(SrcReg, NarrowTy,
5988 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5989 MIRBuilder, MRI);
5990 // We're going to do a tree reduction using vector operations until we have
5991 // one NarrowTy size value left.
5992 while (SplitSrcs.size() > 1) {
5993 SmallVector<Register> PartialRdxs;
5994 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5995 Register LHS = SplitSrcs[Idx];
5996 Register RHS = SplitSrcs[Idx + 1];
5997 // Create the intermediate vector op.
5998 Register Res =
5999 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6000 PartialRdxs.push_back(Res);
6001 }
6002 SplitSrcs = std::move(PartialRdxs);
6003 }
6004 // Finally generate the requested NarrowTy based reduction.
6005 Observer.changingInstr(MI);
6006 MI.getOperand(1).setReg(SplitSrcs[0]);
6007 Observer.changedInstr(MI);
6008 return Legalized;
6009}
6010
6013 const LLT HalfTy, const LLT AmtTy) {
6014
6015 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6016 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6017 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6018
6019 if (Amt.isZero()) {
6020 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6021 MI.eraseFromParent();
6022 return Legalized;
6023 }
6024
6025 LLT NVT = HalfTy;
6026 unsigned NVTBits = HalfTy.getSizeInBits();
6027 unsigned VTBits = 2 * NVTBits;
6028
6029 SrcOp Lo(Register(0)), Hi(Register(0));
6030 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6031 if (Amt.ugt(VTBits)) {
6032 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6033 } else if (Amt.ugt(NVTBits)) {
6034 Lo = MIRBuilder.buildConstant(NVT, 0);
6035 Hi = MIRBuilder.buildShl(NVT, InL,
6036 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6037 } else if (Amt == NVTBits) {
6038 Lo = MIRBuilder.buildConstant(NVT, 0);
6039 Hi = InL;
6040 } else {
6041 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6042 auto OrLHS =
6043 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6044 auto OrRHS = MIRBuilder.buildLShr(
6045 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6046 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6047 }
6048 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6049 if (Amt.ugt(VTBits)) {
6050 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6051 } else if (Amt.ugt(NVTBits)) {
6052 Lo = MIRBuilder.buildLShr(NVT, InH,
6053 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6054 Hi = MIRBuilder.buildConstant(NVT, 0);
6055 } else if (Amt == NVTBits) {
6056 Lo = InH;
6057 Hi = MIRBuilder.buildConstant(NVT, 0);
6058 } else {
6059 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6060
6061 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6062 auto OrRHS = MIRBuilder.buildShl(
6063 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6064
6065 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6066 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6067 }
6068 } else {
6069 if (Amt.ugt(VTBits)) {
6070 Hi = Lo = MIRBuilder.buildAShr(
6071 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6072 } else if (Amt.ugt(NVTBits)) {
6073 Lo = MIRBuilder.buildAShr(NVT, InH,
6074 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6075 Hi = MIRBuilder.buildAShr(NVT, InH,
6076 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6077 } else if (Amt == NVTBits) {
6078 Lo = InH;
6079 Hi = MIRBuilder.buildAShr(NVT, InH,
6080 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6081 } else {
6082 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6083
6084 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6085 auto OrRHS = MIRBuilder.buildShl(
6086 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6087
6088 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6089 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6090 }
6091 }
6092
6093 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6094 MI.eraseFromParent();
6095
6096 return Legalized;
6097}
6098
6101 LLT RequestedTy) {
6102 if (TypeIdx == 1) {
6103 Observer.changingInstr(MI);
6104 narrowScalarSrc(MI, RequestedTy, 2);
6105 Observer.changedInstr(MI);
6106 return Legalized;
6107 }
6108
6109 Register DstReg = MI.getOperand(0).getReg();
6110 LLT DstTy = MRI.getType(DstReg);
6111 if (DstTy.isVector())
6112 return UnableToLegalize;
6113
6114 Register Amt = MI.getOperand(2).getReg();
6115 LLT ShiftAmtTy = MRI.getType(Amt);
6116 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6117 if (DstEltSize % 2 != 0)
6118 return UnableToLegalize;
6119
6120 // Check if we should use multi-way splitting instead of recursive binary
6121 // splitting.
6122 //
6123 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6124 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6125 // and dependency chains created by usual binary splitting approach
6126 // (128->64->32).
6127 //
6128 // The >= 8 parts threshold ensures we only use this optimization when binary
6129 // splitting would require multiple recursive passes, avoiding overhead for
6130 // simple 2-way splits where binary approach is sufficient.
6131 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6132 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6133 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6134 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6135 // steps).
6136 if (NumParts >= 8)
6137 return narrowScalarShiftMultiway(MI, RequestedTy);
6138 }
6139
6140 // Fall back to binary splitting:
6141 // Ignore the input type. We can only go to exactly half the size of the
6142 // input. If that isn't small enough, the resulting pieces will be further
6143 // legalized.
6144 const unsigned NewBitSize = DstEltSize / 2;
6145 const LLT HalfTy = LLT::scalar(NewBitSize);
6146 const LLT CondTy = LLT::scalar(1);
6147
6148 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6149 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6150 ShiftAmtTy);
6151 }
6152
6153 // TODO: Expand with known bits.
6154
6155 // Handle the fully general expansion by an unknown amount.
6156 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6157
6158 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6159 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6160 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6161
6162 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6163 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6164
6165 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6166 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6167 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6168
6169 Register ResultRegs[2];
6170 switch (MI.getOpcode()) {
6171 case TargetOpcode::G_SHL: {
6172 // Short: ShAmt < NewBitSize
6173 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6174
6175 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6176 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6177 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6178
6179 // Long: ShAmt >= NewBitSize
6180 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6181 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6182
6183 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6184 auto Hi = MIRBuilder.buildSelect(
6185 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6186
6187 ResultRegs[0] = Lo.getReg(0);
6188 ResultRegs[1] = Hi.getReg(0);
6189 break;
6190 }
6191 case TargetOpcode::G_LSHR:
6192 case TargetOpcode::G_ASHR: {
6193 // Short: ShAmt < NewBitSize
6194 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6195
6196 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6197 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6198 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6199
6200 // Long: ShAmt >= NewBitSize
6202 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6203 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6204 } else {
6205 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6206 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6207 }
6208 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6209 {InH, AmtExcess}); // Lo from Hi part.
6210
6211 auto Lo = MIRBuilder.buildSelect(
6212 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6213
6214 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6215
6216 ResultRegs[0] = Lo.getReg(0);
6217 ResultRegs[1] = Hi.getReg(0);
6218 break;
6219 }
6220 default:
6221 llvm_unreachable("not a shift");
6222 }
6223
6224 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6225 MI.eraseFromParent();
6226 return Legalized;
6227}
6228
6230 unsigned PartIdx,
6231 unsigned NumParts,
6232 ArrayRef<Register> SrcParts,
6233 const ShiftParams &Params,
6234 LLT TargetTy, LLT ShiftAmtTy) {
6235 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6236 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6237 assert(WordShiftConst && BitShiftConst && "Expected constants");
6238
6239 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6240 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6241 const bool NeedsInterWordShift = ShiftBits != 0;
6242
6243 switch (Opcode) {
6244 case TargetOpcode::G_SHL: {
6245 // Data moves from lower indices to higher indices
6246 // If this part would come from a source beyond our range, it's zero
6247 if (PartIdx < ShiftWords)
6248 return Params.Zero;
6249
6250 unsigned SrcIdx = PartIdx - ShiftWords;
6251 if (!NeedsInterWordShift)
6252 return SrcParts[SrcIdx];
6253
6254 // Combine shifted main part with carry from previous part
6255 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6256 if (SrcIdx > 0) {
6257 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6258 Params.InvBitShift);
6259 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6260 }
6261 return Hi.getReg(0);
6262 }
6263
6264 case TargetOpcode::G_LSHR: {
6265 unsigned SrcIdx = PartIdx + ShiftWords;
6266 if (SrcIdx >= NumParts)
6267 return Params.Zero;
6268 if (!NeedsInterWordShift)
6269 return SrcParts[SrcIdx];
6270
6271 // Combine shifted main part with carry from next part
6272 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6273 if (SrcIdx + 1 < NumParts) {
6274 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6275 Params.InvBitShift);
6276 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6277 }
6278 return Lo.getReg(0);
6279 }
6280
6281 case TargetOpcode::G_ASHR: {
6282 // Like LSHR but preserves sign bit
6283 unsigned SrcIdx = PartIdx + ShiftWords;
6284 if (SrcIdx >= NumParts)
6285 return Params.SignBit;
6286 if (!NeedsInterWordShift)
6287 return SrcParts[SrcIdx];
6288
6289 // Only the original MSB part uses arithmetic shift to preserve sign. All
6290 // other parts use logical shift since they're just moving data bits.
6291 auto Lo =
6292 (SrcIdx == NumParts - 1)
6293 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6294 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6295 Register HiSrc =
6296 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6297 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6298 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6299 }
6300
6301 default:
6302 llvm_unreachable("not a shift");
6303 }
6304}
6305
6307 Register MainOperand,
6308 Register ShiftAmt,
6309 LLT TargetTy,
6310 Register CarryOperand) {
6311 // This helper generates a single output part for variable shifts by combining
6312 // the main operand (shifted by BitShift) with carry bits from an adjacent
6313 // part.
6314
6315 // For G_ASHR, individual parts don't have their own sign bit, only the
6316 // complete value does. So we use LSHR for the main operand shift in ASHR
6317 // context.
6318 unsigned MainOpcode =
6319 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6320
6321 // Perform the primary shift on the main operand
6322 Register MainShifted =
6323 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6324 .getReg(0);
6325
6326 // No carry operand available
6327 if (!CarryOperand.isValid())
6328 return MainShifted;
6329
6330 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6331 // so carry bits aren't needed.
6332 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6333 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6334 LLT BoolTy = LLT::scalar(1);
6335 auto IsZeroBitShift =
6336 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6337
6338 // Extract bits from the adjacent part that will "carry over" into this part.
6339 // The carry direction is opposite to the main shift direction, so we can
6340 // align the two shifted values before combining them with OR.
6341
6342 // Determine the carry shift opcode (opposite direction)
6343 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6344 : TargetOpcode::G_SHL;
6345
6346 // Calculate inverse shift amount: BitWidth - ShiftAmt
6347 auto TargetBitsConst =
6348 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6349 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6350
6351 // Shift the carry operand
6352 Register CarryBits =
6354 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6355 .getReg(0);
6356
6357 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6358 // TargetBits which would be poison for the individual carry shift operation).
6359 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6360 Register SafeCarryBits =
6361 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6362 .getReg(0);
6363
6364 // Combine the main shifted part with the carry bits
6365 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6366}
6367
6370 const APInt &Amt,
6371 LLT TargetTy,
6372 LLT ShiftAmtTy) {
6373 // Any wide shift can be decomposed into WordShift + BitShift components.
6374 // When shift amount is known constant, directly compute the decomposition
6375 // values and generate constant registers.
6376 Register DstReg = MI.getOperand(0).getReg();
6377 Register SrcReg = MI.getOperand(1).getReg();
6378 LLT DstTy = MRI.getType(DstReg);
6379
6380 const unsigned DstBits = DstTy.getScalarSizeInBits();
6381 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6382 const unsigned NumParts = DstBits / TargetBits;
6383
6384 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6385
6386 // When the shift amount is known at compile time, we just calculate which
6387 // source parts contribute to each output part.
6388
6389 SmallVector<Register, 8> SrcParts;
6390 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6391
6392 if (Amt.isZero()) {
6393 // No shift needed, just copy
6394 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6395 MI.eraseFromParent();
6396 return Legalized;
6397 }
6398
6399 ShiftParams Params;
6400 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6401 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6402
6403 // Generate constants and values needed by all shift types
6404 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6405 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6406 Params.InvBitShift =
6407 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6408 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6409
6410 // For ASHR, we need the sign-extended value to fill shifted-out positions
6411 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6412 Params.SignBit =
6414 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6415 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6416 .getReg(0);
6417
6418 SmallVector<Register, 8> DstParts(NumParts);
6419 for (unsigned I = 0; I < NumParts; ++I)
6420 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6421 Params, TargetTy, ShiftAmtTy);
6422
6423 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6424 MI.eraseFromParent();
6425 return Legalized;
6426}
6427
6430 Register DstReg = MI.getOperand(0).getReg();
6431 Register SrcReg = MI.getOperand(1).getReg();
6432 Register AmtReg = MI.getOperand(2).getReg();
6433 LLT DstTy = MRI.getType(DstReg);
6434 LLT ShiftAmtTy = MRI.getType(AmtReg);
6435
6436 const unsigned DstBits = DstTy.getScalarSizeInBits();
6437 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6438 const unsigned NumParts = DstBits / TargetBits;
6439
6440 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6441 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6442
6443 // If the shift amount is known at compile time, we can use direct indexing
6444 // instead of generating select chains in the general case.
6445 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6446 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6447 ShiftAmtTy);
6448
6449 // For runtime-variable shift amounts, we must generate a more complex
6450 // sequence that handles all possible shift values using select chains.
6451
6452 // Split the input into target-sized pieces
6453 SmallVector<Register, 8> SrcParts;
6454 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6455
6456 // Shifting by zero should be a no-op.
6457 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6458 LLT BoolTy = LLT::scalar(1);
6459 auto IsZeroShift =
6460 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6461
6462 // Any wide shift can be decomposed into two components:
6463 // 1. WordShift: number of complete target-sized words to shift
6464 // 2. BitShift: number of bits to shift within each word
6465 //
6466 // Example: 128-bit >> 50 with 32-bit target:
6467 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6468 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6469 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6470 auto TargetBitsLog2Const =
6471 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6472 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6473
6474 Register WordShift =
6475 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6476 Register BitShift =
6477 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6478
6479 // Fill values:
6480 // - SHL/LSHR: fill with zeros
6481 // - ASHR: fill with sign-extended MSB
6482 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6483
6484 Register FillValue;
6485 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6486 auto TargetBitsMinusOneConst =
6487 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6488 FillValue = MIRBuilder
6489 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6490 TargetBitsMinusOneConst)
6491 .getReg(0);
6492 } else {
6493 FillValue = ZeroReg;
6494 }
6495
6496 SmallVector<Register, 8> DstParts(NumParts);
6497
6498 // For each output part, generate a select chain that chooses the correct
6499 // result based on the runtime WordShift value. This handles all possible
6500 // word shift amounts by pre-calculating what each would produce.
6501 for (unsigned I = 0; I < NumParts; ++I) {
6502 // Initialize with appropriate default value for this shift type
6503 Register InBoundsResult = FillValue;
6504
6505 // clang-format off
6506 // Build a branchless select chain by pre-computing results for all possible
6507 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6508 //
6509 // K=0: select(WordShift==0, result0, FillValue)
6510 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6511 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6512 // clang-format on
6513 for (unsigned K = 0; K < NumParts; ++K) {
6514 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6515 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6516 WordShift, WordShiftKConst);
6517
6518 // Calculate source indices for this word shift
6519 //
6520 // For 4-part 128-bit value with K=1 word shift:
6521 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6522 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6523 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6524 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6525 int MainSrcIdx;
6526 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6527
6528 switch (MI.getOpcode()) {
6529 case TargetOpcode::G_SHL:
6530 MainSrcIdx = (int)I - (int)K;
6531 CarrySrcIdx = MainSrcIdx - 1;
6532 break;
6533 case TargetOpcode::G_LSHR:
6534 case TargetOpcode::G_ASHR:
6535 MainSrcIdx = (int)I + (int)K;
6536 CarrySrcIdx = MainSrcIdx + 1;
6537 break;
6538 default:
6539 llvm_unreachable("Not a shift");
6540 }
6541
6542 // Check bounds and build the result for this word shift
6543 Register ResultForK;
6544 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6545 Register MainOp = SrcParts[MainSrcIdx];
6546 Register CarryOp;
6547
6548 // Determine carry operand with bounds checking
6549 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6550 CarryOp = SrcParts[CarrySrcIdx];
6551 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6552 CarrySrcIdx >= (int)NumParts)
6553 CarryOp = FillValue; // Use sign extension
6554
6555 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6556 TargetTy, CarryOp);
6557 } else {
6558 // Out of bounds - use fill value for this k
6559 ResultForK = FillValue;
6560 }
6561
6562 // Select this result if WordShift equals k
6563 InBoundsResult =
6565 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6566 .getReg(0);
6567 }
6568
6569 // Handle zero-shift special case: if shift is 0, use original input
6570 DstParts[I] =
6572 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6573 .getReg(0);
6574 }
6575
6576 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6577 MI.eraseFromParent();
6578 return Legalized;
6579}
6580
6583 LLT MoreTy) {
6584 assert(TypeIdx == 0 && "Expecting only Idx 0");
6585
6586 Observer.changingInstr(MI);
6587 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6588 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6589 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6590 moreElementsVectorSrc(MI, MoreTy, I);
6591 }
6592
6593 MachineBasicBlock &MBB = *MI.getParent();
6594 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6595 moreElementsVectorDst(MI, MoreTy, 0);
6596 Observer.changedInstr(MI);
6597 return Legalized;
6598}
6599
6600MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6601 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6602 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6603
6604 switch (Opcode) {
6605 default:
6607 "getNeutralElementForVecReduce called with invalid opcode!");
6608 case TargetOpcode::G_VECREDUCE_ADD:
6609 case TargetOpcode::G_VECREDUCE_OR:
6610 case TargetOpcode::G_VECREDUCE_XOR:
6611 case TargetOpcode::G_VECREDUCE_UMAX:
6612 return MIRBuilder.buildConstant(Ty, 0);
6613 case TargetOpcode::G_VECREDUCE_MUL:
6614 return MIRBuilder.buildConstant(Ty, 1);
6615 case TargetOpcode::G_VECREDUCE_AND:
6616 case TargetOpcode::G_VECREDUCE_UMIN:
6618 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6619 case TargetOpcode::G_VECREDUCE_SMAX:
6621 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6622 case TargetOpcode::G_VECREDUCE_SMIN:
6624 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6625 case TargetOpcode::G_VECREDUCE_FADD:
6626 return MIRBuilder.buildFConstant(Ty, -0.0);
6627 case TargetOpcode::G_VECREDUCE_FMUL:
6628 return MIRBuilder.buildFConstant(Ty, 1.0);
6629 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6630 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6631 assert(false && "getNeutralElementForVecReduce unimplemented for "
6632 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6633 }
6634 llvm_unreachable("switch expected to return!");
6635}
6636
6639 LLT MoreTy) {
6640 unsigned Opc = MI.getOpcode();
6641 switch (Opc) {
6642 case TargetOpcode::G_IMPLICIT_DEF:
6643 case TargetOpcode::G_LOAD: {
6644 if (TypeIdx != 0)
6645 return UnableToLegalize;
6646 Observer.changingInstr(MI);
6647 moreElementsVectorDst(MI, MoreTy, 0);
6648 Observer.changedInstr(MI);
6649 return Legalized;
6650 }
6651 case TargetOpcode::G_STORE:
6652 if (TypeIdx != 0)
6653 return UnableToLegalize;
6654 Observer.changingInstr(MI);
6655 moreElementsVectorSrc(MI, MoreTy, 0);
6656 Observer.changedInstr(MI);
6657 return Legalized;
6658 case TargetOpcode::G_AND:
6659 case TargetOpcode::G_OR:
6660 case TargetOpcode::G_XOR:
6661 case TargetOpcode::G_ADD:
6662 case TargetOpcode::G_SUB:
6663 case TargetOpcode::G_MUL:
6664 case TargetOpcode::G_FADD:
6665 case TargetOpcode::G_FSUB:
6666 case TargetOpcode::G_FMUL:
6667 case TargetOpcode::G_FDIV:
6668 case TargetOpcode::G_FCOPYSIGN:
6669 case TargetOpcode::G_UADDSAT:
6670 case TargetOpcode::G_USUBSAT:
6671 case TargetOpcode::G_SADDSAT:
6672 case TargetOpcode::G_SSUBSAT:
6673 case TargetOpcode::G_SMIN:
6674 case TargetOpcode::G_SMAX:
6675 case TargetOpcode::G_UMIN:
6676 case TargetOpcode::G_UMAX:
6677 case TargetOpcode::G_FMINNUM:
6678 case TargetOpcode::G_FMAXNUM:
6679 case TargetOpcode::G_FMINNUM_IEEE:
6680 case TargetOpcode::G_FMAXNUM_IEEE:
6681 case TargetOpcode::G_FMINIMUM:
6682 case TargetOpcode::G_FMAXIMUM:
6683 case TargetOpcode::G_FMINIMUMNUM:
6684 case TargetOpcode::G_FMAXIMUMNUM:
6685 case TargetOpcode::G_STRICT_FADD:
6686 case TargetOpcode::G_STRICT_FSUB:
6687 case TargetOpcode::G_STRICT_FMUL: {
6688 Observer.changingInstr(MI);
6689 moreElementsVectorSrc(MI, MoreTy, 1);
6690 moreElementsVectorSrc(MI, MoreTy, 2);
6691 moreElementsVectorDst(MI, MoreTy, 0);
6692 Observer.changedInstr(MI);
6693 return Legalized;
6694 }
6695 case TargetOpcode::G_SHL:
6696 case TargetOpcode::G_ASHR:
6697 case TargetOpcode::G_LSHR: {
6698 Observer.changingInstr(MI);
6699 moreElementsVectorSrc(MI, MoreTy, 1);
6700 // The shift operand may have a different scalar type from the source and
6701 // destination operands.
6702 LLT ShiftMoreTy = MoreTy.changeElementType(
6703 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6704 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6705 moreElementsVectorDst(MI, MoreTy, 0);
6706 Observer.changedInstr(MI);
6707 return Legalized;
6708 }
6709 case TargetOpcode::G_FMA:
6710 case TargetOpcode::G_STRICT_FMA:
6711 case TargetOpcode::G_FSHR:
6712 case TargetOpcode::G_FSHL: {
6713 Observer.changingInstr(MI);
6714 moreElementsVectorSrc(MI, MoreTy, 1);
6715 moreElementsVectorSrc(MI, MoreTy, 2);
6716 moreElementsVectorSrc(MI, MoreTy, 3);
6717 moreElementsVectorDst(MI, MoreTy, 0);
6718 Observer.changedInstr(MI);
6719 return Legalized;
6720 }
6721 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6722 case TargetOpcode::G_EXTRACT:
6723 if (TypeIdx != 1)
6724 return UnableToLegalize;
6725 Observer.changingInstr(MI);
6726 moreElementsVectorSrc(MI, MoreTy, 1);
6727 Observer.changedInstr(MI);
6728 return Legalized;
6729 case TargetOpcode::G_INSERT:
6730 case TargetOpcode::G_INSERT_VECTOR_ELT:
6731 case TargetOpcode::G_FREEZE:
6732 case TargetOpcode::G_FNEG:
6733 case TargetOpcode::G_FABS:
6734 case TargetOpcode::G_FSQRT:
6735 case TargetOpcode::G_FCEIL:
6736 case TargetOpcode::G_FFLOOR:
6737 case TargetOpcode::G_FNEARBYINT:
6738 case TargetOpcode::G_FRINT:
6739 case TargetOpcode::G_INTRINSIC_ROUND:
6740 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6741 case TargetOpcode::G_INTRINSIC_TRUNC:
6742 case TargetOpcode::G_BITREVERSE:
6743 case TargetOpcode::G_BSWAP:
6744 case TargetOpcode::G_FCANONICALIZE:
6745 case TargetOpcode::G_SEXT_INREG:
6746 case TargetOpcode::G_ABS:
6747 case TargetOpcode::G_CTLZ:
6748 case TargetOpcode::G_CTPOP:
6749 if (TypeIdx != 0)
6750 return UnableToLegalize;
6751 Observer.changingInstr(MI);
6752 moreElementsVectorSrc(MI, MoreTy, 1);
6753 moreElementsVectorDst(MI, MoreTy, 0);
6754 Observer.changedInstr(MI);
6755 return Legalized;
6756 case TargetOpcode::G_SELECT: {
6757 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6758 if (TypeIdx == 1) {
6759 if (!CondTy.isScalar() ||
6760 DstTy.getElementCount() != MoreTy.getElementCount())
6761 return UnableToLegalize;
6762
6763 // This is turning a scalar select of vectors into a vector
6764 // select. Broadcast the select condition.
6765 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6766 Observer.changingInstr(MI);
6767 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6768 Observer.changedInstr(MI);
6769 return Legalized;
6770 }
6771
6772 if (CondTy.isVector())
6773 return UnableToLegalize;
6774
6775 Observer.changingInstr(MI);
6776 moreElementsVectorSrc(MI, MoreTy, 2);
6777 moreElementsVectorSrc(MI, MoreTy, 3);
6778 moreElementsVectorDst(MI, MoreTy, 0);
6779 Observer.changedInstr(MI);
6780 return Legalized;
6781 }
6782 case TargetOpcode::G_UNMERGE_VALUES:
6783 return UnableToLegalize;
6784 case TargetOpcode::G_PHI:
6785 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6786 case TargetOpcode::G_SHUFFLE_VECTOR:
6787 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6788 case TargetOpcode::G_BUILD_VECTOR: {
6790 for (auto Op : MI.uses()) {
6791 Elts.push_back(Op.getReg());
6792 }
6793
6794 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6795 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6796 }
6797
6798 MIRBuilder.buildDeleteTrailingVectorElements(
6799 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6800 MI.eraseFromParent();
6801 return Legalized;
6802 }
6803 case TargetOpcode::G_SEXT:
6804 case TargetOpcode::G_ZEXT:
6805 case TargetOpcode::G_ANYEXT:
6806 case TargetOpcode::G_TRUNC:
6807 case TargetOpcode::G_FPTRUNC:
6808 case TargetOpcode::G_FPEXT:
6809 case TargetOpcode::G_FPTOSI:
6810 case TargetOpcode::G_FPTOUI:
6811 case TargetOpcode::G_FPTOSI_SAT:
6812 case TargetOpcode::G_FPTOUI_SAT:
6813 case TargetOpcode::G_SITOFP:
6814 case TargetOpcode::G_UITOFP: {
6815 Observer.changingInstr(MI);
6816 LLT SrcExtTy;
6817 LLT DstExtTy;
6818 if (TypeIdx == 0) {
6819 DstExtTy = MoreTy;
6820 SrcExtTy = MoreTy.changeElementType(
6821 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6822 } else {
6823 DstExtTy = MoreTy.changeElementType(
6824 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6825 SrcExtTy = MoreTy;
6826 }
6827 moreElementsVectorSrc(MI, SrcExtTy, 1);
6828 moreElementsVectorDst(MI, DstExtTy, 0);
6829 Observer.changedInstr(MI);
6830 return Legalized;
6831 }
6832 case TargetOpcode::G_ICMP:
6833 case TargetOpcode::G_FCMP: {
6834 if (TypeIdx != 1)
6835 return UnableToLegalize;
6836
6837 Observer.changingInstr(MI);
6838 moreElementsVectorSrc(MI, MoreTy, 2);
6839 moreElementsVectorSrc(MI, MoreTy, 3);
6840 LLT CondTy = LLT::fixed_vector(
6841 MoreTy.getNumElements(),
6842 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6843 moreElementsVectorDst(MI, CondTy, 0);
6844 Observer.changedInstr(MI);
6845 return Legalized;
6846 }
6847 case TargetOpcode::G_BITCAST: {
6848 if (TypeIdx != 0)
6849 return UnableToLegalize;
6850
6851 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6852 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6853
6854 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6855 if (coefficient % DstTy.getNumElements() != 0)
6856 return UnableToLegalize;
6857
6858 coefficient = coefficient / DstTy.getNumElements();
6859
6860 LLT NewTy = SrcTy.changeElementCount(
6861 ElementCount::get(coefficient, MoreTy.isScalable()));
6862 Observer.changingInstr(MI);
6863 moreElementsVectorSrc(MI, NewTy, 1);
6864 moreElementsVectorDst(MI, MoreTy, 0);
6865 Observer.changedInstr(MI);
6866 return Legalized;
6867 }
6868 case TargetOpcode::G_VECREDUCE_FADD:
6869 case TargetOpcode::G_VECREDUCE_FMUL:
6870 case TargetOpcode::G_VECREDUCE_ADD:
6871 case TargetOpcode::G_VECREDUCE_MUL:
6872 case TargetOpcode::G_VECREDUCE_AND:
6873 case TargetOpcode::G_VECREDUCE_OR:
6874 case TargetOpcode::G_VECREDUCE_XOR:
6875 case TargetOpcode::G_VECREDUCE_SMAX:
6876 case TargetOpcode::G_VECREDUCE_SMIN:
6877 case TargetOpcode::G_VECREDUCE_UMAX:
6878 case TargetOpcode::G_VECREDUCE_UMIN: {
6879 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6880 MachineOperand &MO = MI.getOperand(1);
6881 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6882 auto NeutralElement = getNeutralElementForVecReduce(
6883 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6884
6885 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6886 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6887 i != e; i++) {
6888 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6889 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6890 NeutralElement, Idx);
6891 }
6892
6893 Observer.changingInstr(MI);
6894 MO.setReg(NewVec.getReg(0));
6895 Observer.changedInstr(MI);
6896 return Legalized;
6897 }
6898
6899 default:
6900 return UnableToLegalize;
6901 }
6902}
6903
6906 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6907 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6908 unsigned MaskNumElts = Mask.size();
6909 unsigned SrcNumElts = SrcTy.getNumElements();
6910 LLT DestEltTy = DstTy.getElementType();
6911
6912 if (MaskNumElts == SrcNumElts)
6913 return Legalized;
6914
6915 if (MaskNumElts < SrcNumElts) {
6916 // Extend mask to match new destination vector size with
6917 // undef values.
6918 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6919 llvm::copy(Mask, NewMask.begin());
6920
6921 moreElementsVectorDst(MI, SrcTy, 0);
6922 MIRBuilder.setInstrAndDebugLoc(MI);
6923 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6924 MI.getOperand(1).getReg(),
6925 MI.getOperand(2).getReg(), NewMask);
6926 MI.eraseFromParent();
6927
6928 return Legalized;
6929 }
6930
6931 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6932 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6933 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6934
6935 // Create new source vectors by concatenating the initial
6936 // source vectors with undefined vectors of the same size.
6937 auto Undef = MIRBuilder.buildUndef(SrcTy);
6938 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6939 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6940 MOps1[0] = MI.getOperand(1).getReg();
6941 MOps2[0] = MI.getOperand(2).getReg();
6942
6943 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6944 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6945
6946 // Readjust mask for new input vector length.
6947 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6948 for (unsigned I = 0; I != MaskNumElts; ++I) {
6949 int Idx = Mask[I];
6950 if (Idx >= static_cast<int>(SrcNumElts))
6951 Idx += PaddedMaskNumElts - SrcNumElts;
6952 MappedOps[I] = Idx;
6953 }
6954
6955 // If we got more elements than required, extract subvector.
6956 if (MaskNumElts != PaddedMaskNumElts) {
6957 auto Shuffle =
6958 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6959
6960 SmallVector<Register, 16> Elts(MaskNumElts);
6961 for (unsigned I = 0; I < MaskNumElts; ++I) {
6962 Elts[I] =
6963 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6964 .getReg(0);
6965 }
6966 MIRBuilder.buildBuildVector(DstReg, Elts);
6967 } else {
6968 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6969 }
6970
6971 MI.eraseFromParent();
6973}
6974
6977 unsigned int TypeIdx, LLT MoreTy) {
6978 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6979 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6980 unsigned NumElts = DstTy.getNumElements();
6981 unsigned WidenNumElts = MoreTy.getNumElements();
6982
6983 if (DstTy.isVector() && Src1Ty.isVector() &&
6984 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6986 }
6987
6988 if (TypeIdx != 0)
6989 return UnableToLegalize;
6990
6991 // Expect a canonicalized shuffle.
6992 if (DstTy != Src1Ty || DstTy != Src2Ty)
6993 return UnableToLegalize;
6994
6995 moreElementsVectorSrc(MI, MoreTy, 1);
6996 moreElementsVectorSrc(MI, MoreTy, 2);
6997
6998 // Adjust mask based on new input vector length.
6999 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7000 for (unsigned I = 0; I != NumElts; ++I) {
7001 int Idx = Mask[I];
7002 if (Idx < static_cast<int>(NumElts))
7003 NewMask[I] = Idx;
7004 else
7005 NewMask[I] = Idx - NumElts + WidenNumElts;
7006 }
7007 moreElementsVectorDst(MI, MoreTy, 0);
7008 MIRBuilder.setInstrAndDebugLoc(MI);
7009 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7010 MI.getOperand(1).getReg(),
7011 MI.getOperand(2).getReg(), NewMask);
7012 MI.eraseFromParent();
7013 return Legalized;
7014}
7015
7016void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7017 ArrayRef<Register> Src1Regs,
7018 ArrayRef<Register> Src2Regs,
7019 LLT NarrowTy) {
7021 unsigned SrcParts = Src1Regs.size();
7022 unsigned DstParts = DstRegs.size();
7023
7024 unsigned DstIdx = 0; // Low bits of the result.
7025 Register FactorSum =
7026 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7027 DstRegs[DstIdx] = FactorSum;
7028
7029 Register CarrySumPrevDstIdx;
7031
7032 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7033 // Collect low parts of muls for DstIdx.
7034 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7035 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7037 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7038 Factors.push_back(Mul.getReg(0));
7039 }
7040 // Collect high parts of muls from previous DstIdx.
7041 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7042 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7043 MachineInstrBuilder Umulh =
7044 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7045 Factors.push_back(Umulh.getReg(0));
7046 }
7047 // Add CarrySum from additions calculated for previous DstIdx.
7048 if (DstIdx != 1) {
7049 Factors.push_back(CarrySumPrevDstIdx);
7050 }
7051
7052 Register CarrySum;
7053 // Add all factors and accumulate all carries into CarrySum.
7054 if (DstIdx != DstParts - 1) {
7055 MachineInstrBuilder Uaddo =
7056 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7057 FactorSum = Uaddo.getReg(0);
7058 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7059 for (unsigned i = 2; i < Factors.size(); ++i) {
7060 MachineInstrBuilder Uaddo =
7061 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7062 FactorSum = Uaddo.getReg(0);
7063 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7064 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7065 }
7066 } else {
7067 // Since value for the next index is not calculated, neither is CarrySum.
7068 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7069 for (unsigned i = 2; i < Factors.size(); ++i)
7070 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7071 }
7072
7073 CarrySumPrevDstIdx = CarrySum;
7074 DstRegs[DstIdx] = FactorSum;
7075 Factors.clear();
7076 }
7077}
7078
7081 LLT NarrowTy) {
7082 if (TypeIdx != 0)
7083 return UnableToLegalize;
7084
7085 Register DstReg = MI.getOperand(0).getReg();
7086 LLT DstType = MRI.getType(DstReg);
7087 // FIXME: add support for vector types
7088 if (DstType.isVector())
7089 return UnableToLegalize;
7090
7091 unsigned Opcode = MI.getOpcode();
7092 unsigned OpO, OpE, OpF;
7093 switch (Opcode) {
7094 case TargetOpcode::G_SADDO:
7095 case TargetOpcode::G_SADDE:
7096 case TargetOpcode::G_UADDO:
7097 case TargetOpcode::G_UADDE:
7098 case TargetOpcode::G_ADD:
7099 OpO = TargetOpcode::G_UADDO;
7100 OpE = TargetOpcode::G_UADDE;
7101 OpF = TargetOpcode::G_UADDE;
7102 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7103 OpF = TargetOpcode::G_SADDE;
7104 break;
7105 case TargetOpcode::G_SSUBO:
7106 case TargetOpcode::G_SSUBE:
7107 case TargetOpcode::G_USUBO:
7108 case TargetOpcode::G_USUBE:
7109 case TargetOpcode::G_SUB:
7110 OpO = TargetOpcode::G_USUBO;
7111 OpE = TargetOpcode::G_USUBE;
7112 OpF = TargetOpcode::G_USUBE;
7113 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7114 OpF = TargetOpcode::G_SSUBE;
7115 break;
7116 default:
7117 llvm_unreachable("Unexpected add/sub opcode!");
7118 }
7119
7120 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7121 unsigned NumDefs = MI.getNumExplicitDefs();
7122 Register Src1 = MI.getOperand(NumDefs).getReg();
7123 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7124 Register CarryDst, CarryIn;
7125 if (NumDefs == 2)
7126 CarryDst = MI.getOperand(1).getReg();
7127 if (MI.getNumOperands() == NumDefs + 3)
7128 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7129
7130 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7131 LLT LeftoverTy, DummyTy;
7132 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7133 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7134 MIRBuilder, MRI);
7135 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7136 MRI);
7137
7138 int NarrowParts = Src1Regs.size();
7139 Src1Regs.append(Src1Left);
7140 Src2Regs.append(Src2Left);
7141 DstRegs.reserve(Src1Regs.size());
7142
7143 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7144 Register DstReg =
7145 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7146 Register CarryOut;
7147 // Forward the final carry-out to the destination register
7148 if (i == e - 1 && CarryDst)
7149 CarryOut = CarryDst;
7150 else
7151 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7152
7153 if (!CarryIn) {
7154 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7155 {Src1Regs[i], Src2Regs[i]});
7156 } else if (i == e - 1) {
7157 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7158 {Src1Regs[i], Src2Regs[i], CarryIn});
7159 } else {
7160 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7161 {Src1Regs[i], Src2Regs[i], CarryIn});
7162 }
7163
7164 DstRegs.push_back(DstReg);
7165 CarryIn = CarryOut;
7166 }
7167 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7168 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7169 ArrayRef(DstRegs).drop_front(NarrowParts));
7170
7171 MI.eraseFromParent();
7172 return Legalized;
7173}
7174
7177 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7178
7179 LLT Ty = MRI.getType(DstReg);
7180 if (Ty.isVector())
7181 return UnableToLegalize;
7182
7183 unsigned Size = Ty.getSizeInBits();
7184 unsigned NarrowSize = NarrowTy.getSizeInBits();
7185 if (Size % NarrowSize != 0)
7186 return UnableToLegalize;
7187
7188 unsigned NumParts = Size / NarrowSize;
7189 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7190 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7191
7192 SmallVector<Register, 2> Src1Parts, Src2Parts;
7193 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7194 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7195 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7196 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7197
7198 // Take only high half of registers if this is high mul.
7199 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7200 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7201 MI.eraseFromParent();
7202 return Legalized;
7203}
7204
7207 LLT NarrowTy) {
7208 if (TypeIdx != 0)
7209 return UnableToLegalize;
7210
7211 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7212
7213 Register Src = MI.getOperand(1).getReg();
7214 LLT SrcTy = MRI.getType(Src);
7215
7216 // If all finite floats fit into the narrowed integer type, we can just swap
7217 // out the result type. This is practically only useful for conversions from
7218 // half to at least 16-bits, so just handle the one case.
7219 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7220 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7221 return UnableToLegalize;
7222
7223 Observer.changingInstr(MI);
7224 narrowScalarDst(MI, NarrowTy, 0,
7225 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7226 Observer.changedInstr(MI);
7227 return Legalized;
7228}
7229
7232 LLT NarrowTy) {
7233 if (TypeIdx != 1)
7234 return UnableToLegalize;
7235
7236 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7237
7238 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7239 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7240 // NarrowSize.
7241 if (SizeOp1 % NarrowSize != 0)
7242 return UnableToLegalize;
7243 int NumParts = SizeOp1 / NarrowSize;
7244
7245 SmallVector<Register, 2> SrcRegs, DstRegs;
7246 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7247 MIRBuilder, MRI);
7248
7249 Register OpReg = MI.getOperand(0).getReg();
7250 uint64_t OpStart = MI.getOperand(2).getImm();
7251 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7252 for (int i = 0; i < NumParts; ++i) {
7253 unsigned SrcStart = i * NarrowSize;
7254
7255 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7256 // No part of the extract uses this subregister, ignore it.
7257 continue;
7258 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7259 // The entire subregister is extracted, forward the value.
7260 DstRegs.push_back(SrcRegs[i]);
7261 continue;
7262 }
7263
7264 // OpSegStart is where this destination segment would start in OpReg if it
7265 // extended infinitely in both directions.
7266 int64_t ExtractOffset;
7267 uint64_t SegSize;
7268 if (OpStart < SrcStart) {
7269 ExtractOffset = 0;
7270 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7271 } else {
7272 ExtractOffset = OpStart - SrcStart;
7273 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7274 }
7275
7276 Register SegReg = SrcRegs[i];
7277 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7278 // A genuine extract is needed.
7279 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7280 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7281 }
7282
7283 DstRegs.push_back(SegReg);
7284 }
7285
7286 Register DstReg = MI.getOperand(0).getReg();
7287 if (MRI.getType(DstReg).isVector())
7288 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7289 else if (DstRegs.size() > 1)
7290 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7291 else
7292 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7293 MI.eraseFromParent();
7294 return Legalized;
7295}
7296
7299 LLT NarrowTy) {
7300 // FIXME: Don't know how to handle secondary types yet.
7301 if (TypeIdx != 0)
7302 return UnableToLegalize;
7303
7304 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7305 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7306 LLT LeftoverTy;
7307 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7308 LeftoverRegs, MIRBuilder, MRI);
7309
7310 SrcRegs.append(LeftoverRegs);
7311
7312 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7313 Register OpReg = MI.getOperand(2).getReg();
7314 uint64_t OpStart = MI.getOperand(3).getImm();
7315 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7316 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7317 unsigned DstStart = I * NarrowSize;
7318
7319 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7320 // The entire subregister is defined by this insert, forward the new
7321 // value.
7322 DstRegs.push_back(OpReg);
7323 continue;
7324 }
7325
7326 Register SrcReg = SrcRegs[I];
7327 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7328 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7329 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7330 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7331 }
7332
7333 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7334 // No part of the insert affects this subregister, forward the original.
7335 DstRegs.push_back(SrcReg);
7336 continue;
7337 }
7338
7339 // OpSegStart is where this destination segment would start in OpReg if it
7340 // extended infinitely in both directions.
7341 int64_t ExtractOffset, InsertOffset;
7342 uint64_t SegSize;
7343 if (OpStart < DstStart) {
7344 InsertOffset = 0;
7345 ExtractOffset = DstStart - OpStart;
7346 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7347 } else {
7348 InsertOffset = OpStart - DstStart;
7349 ExtractOffset = 0;
7350 SegSize =
7351 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7352 }
7353
7354 Register SegReg = OpReg;
7355 if (ExtractOffset != 0 || SegSize != OpSize) {
7356 // A genuine extract is needed.
7357 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7358 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7359 }
7360
7361 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7362 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7363 DstRegs.push_back(DstReg);
7364 }
7365
7366 uint64_t WideSize = DstRegs.size() * NarrowSize;
7367 Register DstReg = MI.getOperand(0).getReg();
7368 if (WideSize > RegTy.getSizeInBits()) {
7369 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7370 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7371 MIRBuilder.buildTrunc(DstReg, MergeReg);
7372 } else
7373 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7374
7375 MI.eraseFromParent();
7376 return Legalized;
7377}
7378
7381 LLT NarrowTy) {
7382 Register DstReg = MI.getOperand(0).getReg();
7383 LLT DstTy = MRI.getType(DstReg);
7384
7385 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7386
7387 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7388 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7389 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7390 LLT LeftoverTy;
7391 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7392 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7393 return UnableToLegalize;
7394
7395 LLT Unused;
7396 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7397 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7398 llvm_unreachable("inconsistent extractParts result");
7399
7400 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7401 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7402 {Src0Regs[I], Src1Regs[I]});
7403 DstRegs.push_back(Inst.getReg(0));
7404 }
7405
7406 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7407 auto Inst = MIRBuilder.buildInstr(
7408 MI.getOpcode(),
7409 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7410 DstLeftoverRegs.push_back(Inst.getReg(0));
7411 }
7412
7413 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7414 LeftoverTy, DstLeftoverRegs);
7415
7416 MI.eraseFromParent();
7417 return Legalized;
7418}
7419
7422 LLT NarrowTy) {
7423 if (TypeIdx != 0)
7424 return UnableToLegalize;
7425
7426 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7427
7428 LLT DstTy = MRI.getType(DstReg);
7429 if (DstTy.isVector())
7430 return UnableToLegalize;
7431
7433 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7434 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7435 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7436
7437 MI.eraseFromParent();
7438 return Legalized;
7439}
7440
7443 LLT NarrowTy) {
7444 if (TypeIdx != 0)
7445 return UnableToLegalize;
7446
7447 Register CondReg = MI.getOperand(1).getReg();
7448 LLT CondTy = MRI.getType(CondReg);
7449 if (CondTy.isVector()) // TODO: Handle vselect
7450 return UnableToLegalize;
7451
7452 Register DstReg = MI.getOperand(0).getReg();
7453 LLT DstTy = MRI.getType(DstReg);
7454
7455 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7456 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7457 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7458 LLT LeftoverTy;
7459 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7460 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7461 return UnableToLegalize;
7462
7463 LLT Unused;
7464 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7465 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7466 llvm_unreachable("inconsistent extractParts result");
7467
7468 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7469 auto Select = MIRBuilder.buildSelect(NarrowTy,
7470 CondReg, Src1Regs[I], Src2Regs[I]);
7471 DstRegs.push_back(Select.getReg(0));
7472 }
7473
7474 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7475 auto Select = MIRBuilder.buildSelect(
7476 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7477 DstLeftoverRegs.push_back(Select.getReg(0));
7478 }
7479
7480 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7481 LeftoverTy, DstLeftoverRegs);
7482
7483 MI.eraseFromParent();
7484 return Legalized;
7485}
7486
7489 LLT NarrowTy) {
7490 if (TypeIdx != 1)
7491 return UnableToLegalize;
7492
7493 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7494 unsigned NarrowSize = NarrowTy.getSizeInBits();
7495
7496 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7497 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7498
7500 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7501 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7502 auto C_0 = B.buildConstant(NarrowTy, 0);
7503 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7504 UnmergeSrc.getReg(1), C_0);
7505 auto LoCTLZ = IsUndef ?
7506 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7507 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7508 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7509 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7510 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7511 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7512
7513 MI.eraseFromParent();
7514 return Legalized;
7515 }
7516
7517 return UnableToLegalize;
7518}
7519
7522 LLT NarrowTy) {
7523 if (TypeIdx != 1)
7524 return UnableToLegalize;
7525
7526 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7527 unsigned NarrowSize = NarrowTy.getSizeInBits();
7528
7529 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7530 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7531
7533 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7534 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7535 auto C_0 = B.buildConstant(NarrowTy, 0);
7536 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7537 UnmergeSrc.getReg(0), C_0);
7538 auto HiCTTZ = IsUndef ?
7539 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7540 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7541 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7542 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7543 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7544 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7545
7546 MI.eraseFromParent();
7547 return Legalized;
7548 }
7549
7550 return UnableToLegalize;
7551}
7552
7555 LLT NarrowTy) {
7556 if (TypeIdx != 1)
7557 return UnableToLegalize;
7558
7559 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7560 unsigned NarrowSize = NarrowTy.getSizeInBits();
7561
7562 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7563 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7564
7565 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7566 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7567 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7568
7569 MI.eraseFromParent();
7570 return Legalized;
7571 }
7572
7573 return UnableToLegalize;
7574}
7575
7578 LLT NarrowTy) {
7579 if (TypeIdx != 1)
7580 return UnableToLegalize;
7581
7583 Register ExpReg = MI.getOperand(2).getReg();
7584 LLT ExpTy = MRI.getType(ExpReg);
7585
7586 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7587
7588 // Clamp the exponent to the range of the target type.
7589 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7590 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7591 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7592 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7593
7594 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7595 Observer.changingInstr(MI);
7596 MI.getOperand(2).setReg(Trunc.getReg(0));
7597 Observer.changedInstr(MI);
7598 return Legalized;
7599}
7600
7603 unsigned Opc = MI.getOpcode();
7604 const auto &TII = MIRBuilder.getTII();
7605 auto isSupported = [this](const LegalityQuery &Q) {
7606 auto QAction = LI.getAction(Q).Action;
7607 return QAction == Legal || QAction == Libcall || QAction == Custom;
7608 };
7609 switch (Opc) {
7610 default:
7611 return UnableToLegalize;
7612 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7613 // This trivially expands to CTLZ.
7614 Observer.changingInstr(MI);
7615 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7616 Observer.changedInstr(MI);
7617 return Legalized;
7618 }
7619 case TargetOpcode::G_CTLZ: {
7620 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7621 unsigned Len = SrcTy.getScalarSizeInBits();
7622
7623 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7624 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7625 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7626 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7627 auto ICmp = MIRBuilder.buildICmp(
7628 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7629 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7630 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7631 MI.eraseFromParent();
7632 return Legalized;
7633 }
7634 // for now, we do this:
7635 // NewLen = NextPowerOf2(Len);
7636 // x = x | (x >> 1);
7637 // x = x | (x >> 2);
7638 // ...
7639 // x = x | (x >>16);
7640 // x = x | (x >>32); // for 64-bit input
7641 // Upto NewLen/2
7642 // return Len - popcount(x);
7643 //
7644 // Ref: "Hacker's Delight" by Henry Warren
7645 Register Op = SrcReg;
7646 unsigned NewLen = PowerOf2Ceil(Len);
7647 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7648 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7649 auto MIBOp = MIRBuilder.buildOr(
7650 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7651 Op = MIBOp.getReg(0);
7652 }
7653 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7654 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7655 MIBPop);
7656 MI.eraseFromParent();
7657 return Legalized;
7658 }
7659 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7660 // This trivially expands to CTTZ.
7661 Observer.changingInstr(MI);
7662 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7663 Observer.changedInstr(MI);
7664 return Legalized;
7665 }
7666 case TargetOpcode::G_CTTZ: {
7667 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7668
7669 unsigned Len = SrcTy.getScalarSizeInBits();
7670 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7671 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7672 // zero.
7673 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7674 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7675 auto ICmp = MIRBuilder.buildICmp(
7676 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7677 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7678 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7679 MI.eraseFromParent();
7680 return Legalized;
7681 }
7682 // for now, we use: { return popcount(~x & (x - 1)); }
7683 // unless the target has ctlz but not ctpop, in which case we use:
7684 // { return 32 - nlz(~x & (x-1)); }
7685 // Ref: "Hacker's Delight" by Henry Warren
7686 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7687 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7688 auto MIBTmp = MIRBuilder.buildAnd(
7689 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7690 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7691 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7692 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7693 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7694 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7695 MI.eraseFromParent();
7696 return Legalized;
7697 }
7698 Observer.changingInstr(MI);
7699 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7700 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7701 Observer.changedInstr(MI);
7702 return Legalized;
7703 }
7704 case TargetOpcode::G_CTPOP: {
7705 Register SrcReg = MI.getOperand(1).getReg();
7706 LLT Ty = MRI.getType(SrcReg);
7707 unsigned Size = Ty.getScalarSizeInBits();
7709
7710 // Bail out on irregular type lengths.
7711 if (Size > 128 || Size % 8 != 0)
7712 return UnableToLegalize;
7713
7714 // Count set bits in blocks of 2 bits. Default approach would be
7715 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7716 // We use following formula instead:
7717 // B2Count = val - { (val >> 1) & 0x55555555 }
7718 // since it gives same result in blocks of 2 with one instruction less.
7719 auto C_1 = B.buildConstant(Ty, 1);
7720 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7721 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7722 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7723 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7724 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7725
7726 // In order to get count in blocks of 4 add values from adjacent block of 2.
7727 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7728 auto C_2 = B.buildConstant(Ty, 2);
7729 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7730 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7731 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7732 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7733 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7734 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7735
7736 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7737 // addition since count value sits in range {0,...,8} and 4 bits are enough
7738 // to hold such binary values. After addition high 4 bits still hold count
7739 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7740 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7741 auto C_4 = B.buildConstant(Ty, 4);
7742 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7743 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7744 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7745 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7746 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7747
7748 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7749 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7750 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7751 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7752
7753 // Shift count result from 8 high bits to low bits.
7754 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7755
7756 auto IsMulSupported = [this](const LLT Ty) {
7757 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7758 return Action == Legal || Action == WidenScalar || Action == Custom;
7759 };
7760 if (IsMulSupported(Ty)) {
7761 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7762 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7763 } else {
7764 auto ResTmp = B8Count;
7765 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7766 auto ShiftC = B.buildConstant(Ty, Shift);
7767 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7768 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7769 }
7770 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7771 }
7772 MI.eraseFromParent();
7773 return Legalized;
7774 }
7775 }
7776}
7777
7778// Check that (every element of) Reg is undef or not an exact multiple of BW.
7780 Register Reg, unsigned BW) {
7781 return matchUnaryPredicate(
7782 MRI, Reg,
7783 [=](const Constant *C) {
7784 // Null constant here means an undef.
7786 return !CI || CI->getValue().urem(BW) != 0;
7787 },
7788 /*AllowUndefs*/ true);
7789}
7790
7793 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7794 LLT Ty = MRI.getType(Dst);
7795 LLT ShTy = MRI.getType(Z);
7796
7797 unsigned BW = Ty.getScalarSizeInBits();
7798
7799 if (!isPowerOf2_32(BW))
7800 return UnableToLegalize;
7801
7802 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7803 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7804
7805 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7806 // fshl X, Y, Z -> fshr X, Y, -Z
7807 // fshr X, Y, Z -> fshl X, Y, -Z
7808 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7809 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7810 } else {
7811 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7812 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7813 auto One = MIRBuilder.buildConstant(ShTy, 1);
7814 if (IsFSHL) {
7815 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7816 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7817 } else {
7818 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7819 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7820 }
7821
7822 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7823 }
7824
7825 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7826 MI.eraseFromParent();
7827 return Legalized;
7828}
7829
7832 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7833 LLT Ty = MRI.getType(Dst);
7834 LLT ShTy = MRI.getType(Z);
7835
7836 const unsigned BW = Ty.getScalarSizeInBits();
7837 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7838
7839 Register ShX, ShY;
7840 Register ShAmt, InvShAmt;
7841
7842 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7843 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7844 // fshl: X << C | Y >> (BW - C)
7845 // fshr: X << (BW - C) | Y >> C
7846 // where C = Z % BW is not zero
7847 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7848 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7849 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7850 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7851 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7852 } else {
7853 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7854 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7855 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7856 if (isPowerOf2_32(BW)) {
7857 // Z % BW -> Z & (BW - 1)
7858 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7859 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7860 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7861 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7862 } else {
7863 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7864 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7865 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7866 }
7867
7868 auto One = MIRBuilder.buildConstant(ShTy, 1);
7869 if (IsFSHL) {
7870 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7871 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7872 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7873 } else {
7874 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7875 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7876 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7877 }
7878 }
7879
7880 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7881 MI.eraseFromParent();
7882 return Legalized;
7883}
7884
7887 // These operations approximately do the following (while avoiding undefined
7888 // shifts by BW):
7889 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7890 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7891 Register Dst = MI.getOperand(0).getReg();
7892 LLT Ty = MRI.getType(Dst);
7893 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7894
7895 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7896 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7897
7898 // TODO: Use smarter heuristic that accounts for vector legalization.
7899 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7900 return lowerFunnelShiftAsShifts(MI);
7901
7902 // This only works for powers of 2, fallback to shifts if it fails.
7903 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7904 if (Result == UnableToLegalize)
7905 return lowerFunnelShiftAsShifts(MI);
7906 return Result;
7907}
7908
7910 auto [Dst, Src] = MI.getFirst2Regs();
7911 LLT DstTy = MRI.getType(Dst);
7912 LLT SrcTy = MRI.getType(Src);
7913
7914 uint32_t DstTySize = DstTy.getSizeInBits();
7915 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7916 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7917
7918 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7919 !isPowerOf2_32(SrcTyScalarSize))
7920 return UnableToLegalize;
7921
7922 // The step between extend is too large, split it by creating an intermediate
7923 // extend instruction
7924 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7925 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7926 // If the destination type is illegal, split it into multiple statements
7927 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7928 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7929 // Unmerge the vector
7930 LLT EltTy = MidTy.changeElementCount(
7932 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7933
7934 // ZExt the vectors
7935 LLT ZExtResTy = DstTy.changeElementCount(
7937 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7938 {UnmergeSrc.getReg(0)});
7939 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7940 {UnmergeSrc.getReg(1)});
7941
7942 // Merge the ending vectors
7943 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7944
7945 MI.eraseFromParent();
7946 return Legalized;
7947 }
7948 return UnableToLegalize;
7949}
7950
7952 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7953 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7954 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7955 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7956 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7957 // %lo16(<4 x s16>) = G_TRUNC %inlo
7958 // %hi16(<4 x s16>) = G_TRUNC %inhi
7959 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7960 // %res(<8 x s8>) = G_TRUNC %in16
7961
7962 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7963
7964 Register DstReg = MI.getOperand(0).getReg();
7965 Register SrcReg = MI.getOperand(1).getReg();
7966 LLT DstTy = MRI.getType(DstReg);
7967 LLT SrcTy = MRI.getType(SrcReg);
7968
7969 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7971 isPowerOf2_32(SrcTy.getNumElements()) &&
7972 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7973 // Split input type.
7974 LLT SplitSrcTy = SrcTy.changeElementCount(
7975 SrcTy.getElementCount().divideCoefficientBy(2));
7976
7977 // First, split the source into two smaller vectors.
7978 SmallVector<Register, 2> SplitSrcs;
7979 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7980
7981 // Truncate the splits into intermediate narrower elements.
7982 LLT InterTy;
7983 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7984 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7985 else
7986 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7987 for (Register &Src : SplitSrcs)
7988 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7989
7990 // Combine the new truncates into one vector
7991 auto Merge = MIRBuilder.buildMergeLikeInstr(
7992 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7993
7994 // Truncate the new vector to the final result type
7995 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7996 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7997 else
7998 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7999
8000 MI.eraseFromParent();
8001
8002 return Legalized;
8003 }
8004 return UnableToLegalize;
8005}
8006
8009 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8010 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8011 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8012 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8013 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8014 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8015 MI.eraseFromParent();
8016 return Legalized;
8017}
8018
8020 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8021
8022 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8023 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8024
8025 MIRBuilder.setInstrAndDebugLoc(MI);
8026
8027 // If a rotate in the other direction is supported, use it.
8028 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8029 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8030 isPowerOf2_32(EltSizeInBits))
8031 return lowerRotateWithReverseRotate(MI);
8032
8033 // If a funnel shift is supported, use it.
8034 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8035 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8036 bool IsFShLegal = false;
8037 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8038 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8039 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8040 Register R3) {
8041 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8042 MI.eraseFromParent();
8043 return Legalized;
8044 };
8045 // If a funnel shift in the other direction is supported, use it.
8046 if (IsFShLegal) {
8047 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8048 } else if (isPowerOf2_32(EltSizeInBits)) {
8049 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8050 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8051 }
8052 }
8053
8054 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8055 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8056 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8057 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8058 Register ShVal;
8059 Register RevShiftVal;
8060 if (isPowerOf2_32(EltSizeInBits)) {
8061 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8062 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8063 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8064 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8065 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8066 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8067 RevShiftVal =
8068 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8069 } else {
8070 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8071 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8072 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8073 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8074 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8075 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8076 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8077 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8078 RevShiftVal =
8079 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8080 }
8081 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8082 MI.eraseFromParent();
8083 return Legalized;
8084}
8085
8086// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8087// representation.
8090 auto [Dst, Src] = MI.getFirst2Regs();
8091 const LLT S64 = LLT::scalar(64);
8092 const LLT S32 = LLT::scalar(32);
8093 const LLT S1 = LLT::scalar(1);
8094
8095 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8096
8097 // unsigned cul2f(ulong u) {
8098 // uint lz = clz(u);
8099 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8100 // u = (u << lz) & 0x7fffffffffffffffUL;
8101 // ulong t = u & 0xffffffffffUL;
8102 // uint v = (e << 23) | (uint)(u >> 40);
8103 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8104 // return as_float(v + r);
8105 // }
8106
8107 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8108 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8109
8110 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8111
8112 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8113 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8114
8115 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8116 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8117
8118 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8119 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8120
8121 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8122
8123 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8124 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8125
8126 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8127 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8128 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8129
8130 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8131 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8132 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8133 auto One = MIRBuilder.buildConstant(S32, 1);
8134
8135 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8136 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8137 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8138 MIRBuilder.buildAdd(Dst, V, R);
8139
8140 MI.eraseFromParent();
8141 return Legalized;
8142}
8143
8144// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8145// operations and G_SITOFP
8148 auto [Dst, Src] = MI.getFirst2Regs();
8149 const LLT S64 = LLT::scalar(64);
8150 const LLT S32 = LLT::scalar(32);
8151 const LLT S1 = LLT::scalar(1);
8152
8153 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8154
8155 // For i64 < INT_MAX we simply reuse SITOFP.
8156 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8157 // saved before division, convert to float by SITOFP, multiply the result
8158 // by 2.
8159 auto One = MIRBuilder.buildConstant(S64, 1);
8160 auto Zero = MIRBuilder.buildConstant(S64, 0);
8161 // Result if Src < INT_MAX
8162 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8163 // Result if Src >= INT_MAX
8164 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8165 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8166 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8167 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8168 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8169 // Check if the original value is larger than INT_MAX by comparing with
8170 // zero to pick one of the two conversions.
8171 auto IsLarge =
8172 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8173 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8174
8175 MI.eraseFromParent();
8176 return Legalized;
8177}
8178
8179// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8180// IEEE double representation.
8183 auto [Dst, Src] = MI.getFirst2Regs();
8184 const LLT S64 = LLT::scalar(64);
8185 const LLT S32 = LLT::scalar(32);
8186
8187 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8188
8189 // We create double value from 32 bit parts with 32 exponent difference.
8190 // Note that + and - are float operations that adjust the implicit leading
8191 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8192 //
8193 // X = 2^52 * 1.0...LowBits
8194 // Y = 2^84 * 1.0...HighBits
8195 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8196 // = - 2^52 * 1.0...HighBits
8197 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8198 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8199 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8200 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8201 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8202 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8203
8204 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8205 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8206 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8207 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8208 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8209 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8210 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8211
8212 MI.eraseFromParent();
8213 return Legalized;
8214}
8215
8216/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8217/// convert fpround f64->f16 without double-rounding, so we manually perform the
8218/// lowering here where we know it is valid.
8221 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8222 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8223 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8224 : MIRBuilder.buildSITOFP(SrcTy, Src);
8225 LLT S32Ty = SrcTy.changeElementSize(32);
8226 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8227 MIRBuilder.buildFPTrunc(Dst, M2);
8228 MI.eraseFromParent();
8230}
8231
8233 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8234
8235 if (SrcTy == LLT::scalar(1)) {
8236 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8237 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8238 MIRBuilder.buildSelect(Dst, Src, True, False);
8239 MI.eraseFromParent();
8240 return Legalized;
8241 }
8242
8243 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8244 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8245
8246 if (SrcTy != LLT::scalar(64))
8247 return UnableToLegalize;
8248
8249 if (DstTy == LLT::scalar(32))
8250 // TODO: SelectionDAG has several alternative expansions to port which may
8251 // be more reasonable depending on the available instructions. We also need
8252 // a more advanced mechanism to choose an optimal version depending on
8253 // target features such as sitofp or CTLZ availability.
8255
8256 if (DstTy == LLT::scalar(64))
8258
8259 return UnableToLegalize;
8260}
8261
8263 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8264
8265 const LLT S64 = LLT::scalar(64);
8266 const LLT S32 = LLT::scalar(32);
8267 const LLT S1 = LLT::scalar(1);
8268
8269 if (SrcTy == S1) {
8270 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8271 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8272 MIRBuilder.buildSelect(Dst, Src, True, False);
8273 MI.eraseFromParent();
8274 return Legalized;
8275 }
8276
8277 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8278 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8279
8280 if (SrcTy != S64)
8281 return UnableToLegalize;
8282
8283 if (DstTy == S32) {
8284 // signed cl2f(long l) {
8285 // long s = l >> 63;
8286 // float r = cul2f((l + s) ^ s);
8287 // return s ? -r : r;
8288 // }
8289 Register L = Src;
8290 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8291 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8292
8293 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8294 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8295 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8296
8297 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8298 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8299 MIRBuilder.buildConstant(S64, 0));
8300 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8301 MI.eraseFromParent();
8302 return Legalized;
8303 }
8304
8305 return UnableToLegalize;
8306}
8307
8309 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8310 const LLT S64 = LLT::scalar(64);
8311 const LLT S32 = LLT::scalar(32);
8312
8313 if (SrcTy != S64 && SrcTy != S32)
8314 return UnableToLegalize;
8315 if (DstTy != S32 && DstTy != S64)
8316 return UnableToLegalize;
8317
8318 // FPTOSI gives same result as FPTOUI for positive signed integers.
8319 // FPTOUI needs to deal with fp values that convert to unsigned integers
8320 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8321
8322 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8323 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8325 APInt::getZero(SrcTy.getSizeInBits()));
8326 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8327
8328 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8329
8330 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8331 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8332 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8333 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8334 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8335 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8336 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8337
8338 const LLT S1 = LLT::scalar(1);
8339
8340 MachineInstrBuilder FCMP =
8341 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8342 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8343
8344 MI.eraseFromParent();
8345 return Legalized;
8346}
8347
8349 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8350 const LLT S64 = LLT::scalar(64);
8351 const LLT S32 = LLT::scalar(32);
8352
8353 // FIXME: Only f32 to i64 conversions are supported.
8354 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8355 return UnableToLegalize;
8356
8357 // Expand f32 -> i64 conversion
8358 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8359 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8360
8361 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8362
8363 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8364 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8365
8366 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8367 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8368
8369 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8370 APInt::getSignMask(SrcEltBits));
8371 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8372 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8373 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8374 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8375
8376 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8377 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8378 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8379
8380 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8381 R = MIRBuilder.buildZExt(DstTy, R);
8382
8383 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8384 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8385 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8386 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8387
8388 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8389 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8390
8391 const LLT S1 = LLT::scalar(1);
8392 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8393 S1, Exponent, ExponentLoBit);
8394
8395 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8396
8397 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8398 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8399
8400 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8401
8402 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8403 S1, Exponent, ZeroSrcTy);
8404
8405 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8406 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8407
8408 MI.eraseFromParent();
8409 return Legalized;
8410}
8411
8414 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8415
8416 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8417 unsigned SatWidth = DstTy.getScalarSizeInBits();
8418
8419 // Determine minimum and maximum integer values and their corresponding
8420 // floating-point values.
8421 APInt MinInt, MaxInt;
8422 if (IsSigned) {
8423 MinInt = APInt::getSignedMinValue(SatWidth);
8424 MaxInt = APInt::getSignedMaxValue(SatWidth);
8425 } else {
8426 MinInt = APInt::getMinValue(SatWidth);
8427 MaxInt = APInt::getMaxValue(SatWidth);
8428 }
8429
8430 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8431 APFloat MinFloat(Semantics);
8432 APFloat MaxFloat(Semantics);
8433
8434 APFloat::opStatus MinStatus =
8435 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8436 APFloat::opStatus MaxStatus =
8437 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8438 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8439 !(MaxStatus & APFloat::opStatus::opInexact);
8440
8441 // If the integer bounds are exactly representable as floats, emit a
8442 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8443 // and selects.
8444 if (AreExactFloatBounds) {
8445 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8446 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8447 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8448 SrcTy.changeElementSize(1), Src, MaxC);
8449 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8450 // Clamp by MaxFloat from above. NaN cannot occur.
8451 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8452 auto MinP =
8453 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8455 auto Min =
8456 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8457 // Convert clamped value to integer. In the unsigned case we're done,
8458 // because we mapped NaN to MinFloat, which will cast to zero.
8459 if (!IsSigned) {
8460 MIRBuilder.buildFPTOUI(Dst, Min);
8461 MI.eraseFromParent();
8462 return Legalized;
8463 }
8464
8465 // Otherwise, select 0 if Src is NaN.
8466 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8467 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8468 DstTy.changeElementSize(1), Src, Src);
8469 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8470 FpToInt);
8471 MI.eraseFromParent();
8472 return Legalized;
8473 }
8474
8475 // Result of direct conversion. The assumption here is that the operation is
8476 // non-trapping and it's fine to apply it to an out-of-range value if we
8477 // select it away later.
8478 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8479 : MIRBuilder.buildFPTOUI(DstTy, Src);
8480
8481 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8482 // MinInt if Src is NaN.
8483 auto ULT =
8484 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8485 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8486 auto Max = MIRBuilder.buildSelect(
8487 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8488 // If Src OGT MaxFloat, select MaxInt.
8489 auto OGT =
8490 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8491 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8492
8493 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8494 // is already zero.
8495 if (!IsSigned) {
8496 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8497 Max);
8498 MI.eraseFromParent();
8499 return Legalized;
8500 }
8501
8502 // Otherwise, select 0 if Src is NaN.
8503 auto Min = MIRBuilder.buildSelect(
8504 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8505 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8506 DstTy.changeElementSize(1), Src, Src);
8507 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8508 MI.eraseFromParent();
8509 return Legalized;
8510}
8511
8512// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8515 const LLT S1 = LLT::scalar(1);
8516 const LLT S32 = LLT::scalar(32);
8517
8518 auto [Dst, Src] = MI.getFirst2Regs();
8519 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8520 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8521
8522 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8523 return UnableToLegalize;
8524
8525 if (MI.getFlag(MachineInstr::FmAfn)) {
8526 unsigned Flags = MI.getFlags();
8527 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8528 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8529 MI.eraseFromParent();
8530 return Legalized;
8531 }
8532
8533 const unsigned ExpMask = 0x7ff;
8534 const unsigned ExpBiasf64 = 1023;
8535 const unsigned ExpBiasf16 = 15;
8536
8537 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8538 Register U = Unmerge.getReg(0);
8539 Register UH = Unmerge.getReg(1);
8540
8541 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8542 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8543
8544 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8545 // add the f16 bias (15) to get the biased exponent for the f16 format.
8546 E = MIRBuilder.buildAdd(
8547 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8548
8549 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8550 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8551
8552 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8553 MIRBuilder.buildConstant(S32, 0x1ff));
8554 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8555
8556 auto Zero = MIRBuilder.buildConstant(S32, 0);
8557 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8558 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8559 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8560
8561 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8562 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8563 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8564 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8565
8566 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8567 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8568
8569 // N = M | (E << 12);
8570 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8571 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8572
8573 // B = clamp(1-E, 0, 13);
8574 auto One = MIRBuilder.buildConstant(S32, 1);
8575 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8576 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8577 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8578
8579 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8580 MIRBuilder.buildConstant(S32, 0x1000));
8581
8582 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8583 auto D0 = MIRBuilder.buildShl(S32, D, B);
8584
8585 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8586 D0, SigSetHigh);
8587 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8588 D = MIRBuilder.buildOr(S32, D, D1);
8589
8590 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8591 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8592
8593 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8594 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8595
8596 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8597 MIRBuilder.buildConstant(S32, 3));
8598 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8599
8600 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8601 MIRBuilder.buildConstant(S32, 5));
8602 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8603
8604 V1 = MIRBuilder.buildOr(S32, V0, V1);
8605 V = MIRBuilder.buildAdd(S32, V, V1);
8606
8607 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8608 E, MIRBuilder.buildConstant(S32, 30));
8609 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8610 MIRBuilder.buildConstant(S32, 0x7c00), V);
8611
8612 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8613 E, MIRBuilder.buildConstant(S32, 1039));
8614 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8615
8616 // Extract the sign bit.
8617 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8618 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8619
8620 // Insert the sign bit
8621 V = MIRBuilder.buildOr(S32, Sign, V);
8622
8623 MIRBuilder.buildTrunc(Dst, V);
8624 MI.eraseFromParent();
8625 return Legalized;
8626}
8627
8630 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8631 const LLT S64 = LLT::scalar(64);
8632 const LLT S16 = LLT::scalar(16);
8633
8634 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8636
8637 return UnableToLegalize;
8638}
8639
8641 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8642 LLT Ty = MRI.getType(Dst);
8643
8644 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8645 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8646 MI.eraseFromParent();
8647 return Legalized;
8648}
8649
8651 switch (Opc) {
8652 case TargetOpcode::G_SMIN:
8653 return CmpInst::ICMP_SLT;
8654 case TargetOpcode::G_SMAX:
8655 return CmpInst::ICMP_SGT;
8656 case TargetOpcode::G_UMIN:
8657 return CmpInst::ICMP_ULT;
8658 case TargetOpcode::G_UMAX:
8659 return CmpInst::ICMP_UGT;
8660 default:
8661 llvm_unreachable("not in integer min/max");
8662 }
8663}
8664
8666 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8667
8668 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8669 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8670
8671 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8672 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8673
8674 MI.eraseFromParent();
8675 return Legalized;
8676}
8677
8680 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8681
8682 Register Dst = Cmp->getReg(0);
8683 LLT DstTy = MRI.getType(Dst);
8684 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8685 LLT CmpTy = DstTy.changeElementSize(1);
8686
8687 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8690 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8693
8694 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8695 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8696 Cmp->getRHSReg());
8697 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8698 Cmp->getRHSReg());
8699
8700 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8701 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8702 if (TLI.preferSelectsOverBooleanArithmetic(
8703 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8705 auto One = MIRBuilder.buildConstant(DstTy, 1);
8706 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8707
8708 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8709 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8710 } else {
8712 std::swap(IsGT, IsLT);
8713 // Extend boolean results to DstTy, which is at least i2, before subtracting
8714 // them.
8715 unsigned BoolExtOp =
8716 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8717 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8718 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8719 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8720 }
8721
8722 MI.eraseFromParent();
8723 return Legalized;
8724}
8725
8728 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8729 const int Src0Size = Src0Ty.getScalarSizeInBits();
8730 const int Src1Size = Src1Ty.getScalarSizeInBits();
8731
8732 auto SignBitMask = MIRBuilder.buildConstant(
8733 Src0Ty, APInt::getSignMask(Src0Size));
8734
8735 auto NotSignBitMask = MIRBuilder.buildConstant(
8736 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8737
8738 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8739 Register And1;
8740 if (Src0Ty == Src1Ty) {
8741 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8742 } else if (Src0Size > Src1Size) {
8743 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8744 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8745 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8746 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8747 } else {
8748 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8749 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8750 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8751 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8752 }
8753
8754 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8755 // constants are a nan and -0.0, but the final result should preserve
8756 // everything.
8757 unsigned Flags = MI.getFlags();
8758
8759 // We masked the sign bit and the not-sign bit, so these are disjoint.
8760 Flags |= MachineInstr::Disjoint;
8761
8762 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8763
8764 MI.eraseFromParent();
8765 return Legalized;
8766}
8767
8770 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8771 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8772 // depend on fminnum/fmaxnum.
8773
8774 unsigned NewOp;
8775 switch (MI.getOpcode()) {
8776 case TargetOpcode::G_FMINNUM:
8777 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8778 break;
8779 case TargetOpcode::G_FMINIMUMNUM:
8780 NewOp = TargetOpcode::G_FMINNUM;
8781 break;
8782 case TargetOpcode::G_FMAXNUM:
8783 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8784 break;
8785 case TargetOpcode::G_FMAXIMUMNUM:
8786 NewOp = TargetOpcode::G_FMAXNUM;
8787 break;
8788 default:
8789 llvm_unreachable("unexpected min/max opcode");
8790 }
8791
8792 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8793 LLT Ty = MRI.getType(Dst);
8794
8795 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8796 // Insert canonicalizes if it's possible we need to quiet to get correct
8797 // sNaN behavior.
8798
8799 // Note this must be done here, and not as an optimization combine in the
8800 // absence of a dedicate quiet-snan instruction as we're using an
8801 // omni-purpose G_FCANONICALIZE.
8802 if (!isKnownNeverSNaN(Src0, MRI))
8803 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8804
8805 if (!isKnownNeverSNaN(Src1, MRI))
8806 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8807 }
8808
8809 // If there are no nans, it's safe to simply replace this with the non-IEEE
8810 // version.
8811 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8812 MI.eraseFromParent();
8813 return Legalized;
8814}
8815
8818 unsigned Opc = MI.getOpcode();
8819 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8820 LLT Ty = MRI.getType(Dst);
8821 LLT CmpTy = Ty.changeElementSize(1);
8822
8823 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8824 unsigned OpcIeee =
8825 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8826 unsigned OpcNonIeee =
8827 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8828 bool MinMaxMustRespectOrderedZero = false;
8829 Register Res;
8830
8831 // IEEE variants don't need canonicalization
8832 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8833 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8834 MinMaxMustRespectOrderedZero = true;
8835 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8836 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8837 } else {
8838 auto Compare = MIRBuilder.buildFCmp(
8839 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8840 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8841 }
8842
8843 // Propagate any NaN of both operands
8844 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8845 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8846 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8847
8848 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8849 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8850 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8851 if (Ty.isVector())
8852 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8853
8854 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8855 }
8856
8857 // fminimum/fmaximum requires -0.0 less than +0.0
8858 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8859 GISelValueTracking VT(MIRBuilder.getMF());
8860 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8861 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8862
8863 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8864 const unsigned Flags = MI.getFlags();
8865 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8866 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8867
8868 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8869
8870 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8871 auto LHSSelect =
8872 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8873
8874 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8875 auto RHSSelect =
8876 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8877
8878 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8879 }
8880 }
8881
8882 MIRBuilder.buildCopy(Dst, Res);
8883 MI.eraseFromParent();
8884 return Legalized;
8885}
8886
8888 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8889 Register DstReg = MI.getOperand(0).getReg();
8890 LLT Ty = MRI.getType(DstReg);
8891 unsigned Flags = MI.getFlags();
8892
8893 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8894 Flags);
8895 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8896 MI.eraseFromParent();
8897 return Legalized;
8898}
8899
8902 auto [DstReg, X] = MI.getFirst2Regs();
8903 const unsigned Flags = MI.getFlags();
8904 const LLT Ty = MRI.getType(DstReg);
8905 const LLT CondTy = Ty.changeElementSize(1);
8906
8907 // round(x) =>
8908 // t = trunc(x);
8909 // d = fabs(x - t);
8910 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8911 // return t + o;
8912
8913 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8914
8915 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8916 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8917
8918 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8919 auto Cmp =
8920 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8921
8922 // Could emit G_UITOFP instead
8923 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8924 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8925 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8926 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8927
8928 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8929
8930 MI.eraseFromParent();
8931 return Legalized;
8932}
8933
8935 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8936 unsigned Flags = MI.getFlags();
8937 LLT Ty = MRI.getType(DstReg);
8938 const LLT CondTy = Ty.changeElementSize(1);
8939
8940 // result = trunc(src);
8941 // if (src < 0.0 && src != result)
8942 // result += -1.0.
8943
8944 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8945 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8946
8947 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8948 SrcReg, Zero, Flags);
8949 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8950 SrcReg, Trunc, Flags);
8951 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8952 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8953
8954 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8955 MI.eraseFromParent();
8956 return Legalized;
8957}
8958
8961 const unsigned NumOps = MI.getNumOperands();
8962 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8963 unsigned PartSize = Src0Ty.getSizeInBits();
8964
8965 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8966 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8967
8968 for (unsigned I = 2; I != NumOps; ++I) {
8969 const unsigned Offset = (I - 1) * PartSize;
8970
8971 Register SrcReg = MI.getOperand(I).getReg();
8972 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8973
8974 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8975 MRI.createGenericVirtualRegister(WideTy);
8976
8977 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8978 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8979 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8980 ResultReg = NextResult;
8981 }
8982
8983 if (DstTy.isPointer()) {
8984 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8985 DstTy.getAddressSpace())) {
8986 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8987 return UnableToLegalize;
8988 }
8989
8990 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8991 }
8992
8993 MI.eraseFromParent();
8994 return Legalized;
8995}
8996
8999 const unsigned NumDst = MI.getNumOperands() - 1;
9000 Register SrcReg = MI.getOperand(NumDst).getReg();
9001 Register Dst0Reg = MI.getOperand(0).getReg();
9002 LLT DstTy = MRI.getType(Dst0Reg);
9003 if (DstTy.isPointer())
9004 return UnableToLegalize; // TODO
9005
9006 SrcReg = coerceToScalar(SrcReg);
9007 if (!SrcReg)
9008 return UnableToLegalize;
9009
9010 // Expand scalarizing unmerge as bitcast to integer and shift.
9011 LLT IntTy = MRI.getType(SrcReg);
9012
9013 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9014
9015 const unsigned DstSize = DstTy.getSizeInBits();
9016 unsigned Offset = DstSize;
9017 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9018 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9019 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9020 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9021 }
9022
9023 MI.eraseFromParent();
9024 return Legalized;
9025}
9026
9027/// Lower a vector extract or insert by writing the vector to a stack temporary
9028/// and reloading the element or vector.
9029///
9030/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9031/// =>
9032/// %stack_temp = G_FRAME_INDEX
9033/// G_STORE %vec, %stack_temp
9034/// %idx = clamp(%idx, %vec.getNumElements())
9035/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9036/// %dst = G_LOAD %element_ptr
9039 Register DstReg = MI.getOperand(0).getReg();
9040 Register SrcVec = MI.getOperand(1).getReg();
9041 Register InsertVal;
9042 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9043 InsertVal = MI.getOperand(2).getReg();
9044
9045 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9046
9047 LLT VecTy = MRI.getType(SrcVec);
9048 LLT EltTy = VecTy.getElementType();
9049 unsigned NumElts = VecTy.getNumElements();
9050
9051 int64_t IdxVal;
9052 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9054 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9055
9056 if (InsertVal) {
9057 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9058 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9059 } else {
9060 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9061 }
9062
9063 MI.eraseFromParent();
9064 return Legalized;
9065 }
9066
9067 if (!EltTy.isByteSized()) { // Not implemented.
9068 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9069 return UnableToLegalize;
9070 }
9071
9072 unsigned EltBytes = EltTy.getSizeInBytes();
9073 Align VecAlign = getStackTemporaryAlignment(VecTy);
9074 Align EltAlign;
9075
9076 MachinePointerInfo PtrInfo;
9077 auto StackTemp = createStackTemporary(
9078 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9079 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9080
9081 // Get the pointer to the element, and be sure not to hit undefined behavior
9082 // if the index is out of bounds.
9083 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9084
9085 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9086 int64_t Offset = IdxVal * EltBytes;
9087 PtrInfo = PtrInfo.getWithOffset(Offset);
9088 EltAlign = commonAlignment(VecAlign, Offset);
9089 } else {
9090 // We lose information with a variable offset.
9091 EltAlign = getStackTemporaryAlignment(EltTy);
9092 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9093 }
9094
9095 if (InsertVal) {
9096 // Write the inserted element
9097 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9098
9099 // Reload the whole vector.
9100 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9101 } else {
9102 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9103 }
9104
9105 MI.eraseFromParent();
9106 return Legalized;
9107}
9108
9111 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9112 MI.getFirst3RegLLTs();
9113 LLT IdxTy = LLT::scalar(32);
9114
9115 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9116 Register Undef;
9118 LLT EltTy = DstTy.getScalarType();
9119
9120 DenseMap<unsigned, Register> CachedExtract;
9121
9122 for (int Idx : Mask) {
9123 if (Idx < 0) {
9124 if (!Undef.isValid())
9125 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9126 BuildVec.push_back(Undef);
9127 continue;
9128 }
9129
9130 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9131
9132 int NumElts = Src0Ty.getNumElements();
9133 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9134 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9135 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9136 if (Inserted) {
9137 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9138 It->second =
9139 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9140 }
9141 BuildVec.push_back(It->second);
9142 }
9143
9144 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9145 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9146 MI.eraseFromParent();
9147 return Legalized;
9148}
9149
9152 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9153 MI.getFirst4RegLLTs();
9154
9155 if (VecTy.isScalableVector())
9156 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9157
9158 Align VecAlign = getStackTemporaryAlignment(VecTy);
9159 MachinePointerInfo PtrInfo;
9160 Register StackPtr =
9161 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9162 PtrInfo)
9163 .getReg(0);
9164 MachinePointerInfo ValPtrInfo =
9166
9167 LLT IdxTy = LLT::scalar(32);
9168 LLT ValTy = VecTy.getElementType();
9169 Align ValAlign = getStackTemporaryAlignment(ValTy);
9170
9171 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9172
9173 bool HasPassthru =
9174 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9175
9176 if (HasPassthru)
9177 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9178
9179 Register LastWriteVal;
9180 std::optional<APInt> PassthruSplatVal =
9181 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9182
9183 if (PassthruSplatVal.has_value()) {
9184 LastWriteVal =
9185 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9186 } else if (HasPassthru) {
9187 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9188 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9189 {LLT::scalar(32)}, {Popcount});
9190
9191 Register LastElmtPtr =
9192 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9193 LastWriteVal =
9194 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9195 .getReg(0);
9196 }
9197
9198 unsigned NumElmts = VecTy.getNumElements();
9199 for (unsigned I = 0; I < NumElmts; ++I) {
9200 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9201 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9202 Register ElmtPtr =
9203 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9204 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9205
9206 LLT MaskITy = MaskTy.getElementType();
9207 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9208 if (MaskITy.getSizeInBits() > 1)
9209 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9210
9211 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9212 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9213
9214 if (HasPassthru && I == NumElmts - 1) {
9215 auto EndOfVector =
9216 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9217 auto AllLanesSelected = MIRBuilder.buildICmp(
9218 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9219 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9220 {OutPos, EndOfVector});
9221 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9222
9223 LastWriteVal =
9224 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9225 .getReg(0);
9226 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9227 }
9228 }
9229
9230 // TODO: Use StackPtr's FrameIndex alignment.
9231 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9232
9233 MI.eraseFromParent();
9234 return Legalized;
9235}
9236
9238 Register AllocSize,
9239 Align Alignment,
9240 LLT PtrTy) {
9241 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9242
9243 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9244 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9245
9246 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9247 // have to generate an extra instruction to negate the alloc and then use
9248 // G_PTR_ADD to add the negative offset.
9249 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9250 if (Alignment > Align(1)) {
9251 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9252 AlignMask.negate();
9253 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9254 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9255 }
9256
9257 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9258}
9259
9262 const auto &MF = *MI.getMF();
9263 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9264 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9265 return UnableToLegalize;
9266
9267 Register Dst = MI.getOperand(0).getReg();
9268 Register AllocSize = MI.getOperand(1).getReg();
9269 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9270
9271 LLT PtrTy = MRI.getType(Dst);
9272 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9273 Register SPTmp =
9274 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9275
9276 MIRBuilder.buildCopy(SPReg, SPTmp);
9277 MIRBuilder.buildCopy(Dst, SPTmp);
9278
9279 MI.eraseFromParent();
9280 return Legalized;
9281}
9282
9285 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9286 if (!StackPtr)
9287 return UnableToLegalize;
9288
9289 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9290 MI.eraseFromParent();
9291 return Legalized;
9292}
9293
9296 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9297 if (!StackPtr)
9298 return UnableToLegalize;
9299
9300 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9301 MI.eraseFromParent();
9302 return Legalized;
9303}
9304
9307 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9308 unsigned Offset = MI.getOperand(2).getImm();
9309
9310 // Extract sub-vector or one element
9311 if (SrcTy.isVector()) {
9312 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9313 unsigned DstSize = DstTy.getSizeInBits();
9314
9315 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9316 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9317 // Unmerge and allow access to each Src element for the artifact combiner.
9318 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9319
9320 // Take element(s) we need to extract and copy it (merge them).
9321 SmallVector<Register, 8> SubVectorElts;
9322 for (unsigned Idx = Offset / SrcEltSize;
9323 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9324 SubVectorElts.push_back(Unmerge.getReg(Idx));
9325 }
9326 if (SubVectorElts.size() == 1)
9327 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9328 else
9329 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9330
9331 MI.eraseFromParent();
9332 return Legalized;
9333 }
9334 }
9335
9336 if (DstTy.isScalar() &&
9337 (SrcTy.isScalar() ||
9338 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9339 LLT SrcIntTy = SrcTy;
9340 if (!SrcTy.isScalar()) {
9341 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9342 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9343 }
9344
9345 if (Offset == 0)
9346 MIRBuilder.buildTrunc(DstReg, SrcReg);
9347 else {
9348 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9349 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9350 MIRBuilder.buildTrunc(DstReg, Shr);
9351 }
9352
9353 MI.eraseFromParent();
9354 return Legalized;
9355 }
9356
9357 return UnableToLegalize;
9358}
9359
9361 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9362 uint64_t Offset = MI.getOperand(3).getImm();
9363
9364 LLT DstTy = MRI.getType(Src);
9365 LLT InsertTy = MRI.getType(InsertSrc);
9366
9367 // Insert sub-vector or one element
9368 if (DstTy.isVector() && !InsertTy.isPointer()) {
9369 LLT EltTy = DstTy.getElementType();
9370 unsigned EltSize = EltTy.getSizeInBits();
9371 unsigned InsertSize = InsertTy.getSizeInBits();
9372
9373 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9374 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9375 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9377 unsigned Idx = 0;
9378 // Elements from Src before insert start Offset
9379 for (; Idx < Offset / EltSize; ++Idx) {
9380 DstElts.push_back(UnmergeSrc.getReg(Idx));
9381 }
9382
9383 // Replace elements in Src with elements from InsertSrc
9384 if (InsertTy.getSizeInBits() > EltSize) {
9385 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9386 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9387 ++Idx, ++i) {
9388 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9389 }
9390 } else {
9391 DstElts.push_back(InsertSrc);
9392 ++Idx;
9393 }
9394
9395 // Remaining elements from Src after insert
9396 for (; Idx < DstTy.getNumElements(); ++Idx) {
9397 DstElts.push_back(UnmergeSrc.getReg(Idx));
9398 }
9399
9400 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9401 MI.eraseFromParent();
9402 return Legalized;
9403 }
9404 }
9405
9406 if (InsertTy.isVector() ||
9407 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9408 return UnableToLegalize;
9409
9410 const DataLayout &DL = MIRBuilder.getDataLayout();
9411 if ((DstTy.isPointer() &&
9412 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9413 (InsertTy.isPointer() &&
9414 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9415 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9416 return UnableToLegalize;
9417 }
9418
9419 LLT IntDstTy = DstTy;
9420
9421 if (!DstTy.isScalar()) {
9422 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9423 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9424 }
9425
9426 if (!InsertTy.isScalar()) {
9427 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9428 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9429 }
9430
9431 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9432 if (Offset != 0) {
9433 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9434 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9435 }
9436
9438 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9439
9440 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9441 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9442 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9443
9444 MIRBuilder.buildCast(Dst, Or);
9445 MI.eraseFromParent();
9446 return Legalized;
9447}
9448
9451 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9452 MI.getFirst4RegLLTs();
9453 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9454
9455 LLT Ty = Dst0Ty;
9456 LLT BoolTy = Dst1Ty;
9457
9458 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9459
9460 if (IsAdd)
9461 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9462 else
9463 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9464
9465 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9466
9467 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9468
9469 // For an addition, the result should be less than one of the operands (LHS)
9470 // if and only if the other operand (RHS) is negative, otherwise there will
9471 // be overflow.
9472 // For a subtraction, the result should be less than one of the operands
9473 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9474 // otherwise there will be overflow.
9475 auto ResultLowerThanLHS =
9476 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9477 auto ConditionRHS = MIRBuilder.buildICmp(
9478 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9479
9480 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9481
9482 MIRBuilder.buildCopy(Dst0, NewDst0);
9483 MI.eraseFromParent();
9484
9485 return Legalized;
9486}
9487
9489 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9490 const LLT Ty = MRI.getType(Res);
9491
9492 // sum = LHS + RHS + zext(CarryIn)
9493 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9494 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9495 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9496 MIRBuilder.buildCopy(Res, Sum);
9497
9498 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9499 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9500 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9501 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9502
9503 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9504 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9505
9506 MI.eraseFromParent();
9507 return Legalized;
9508}
9509
9511 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9512 const LLT Ty = MRI.getType(Res);
9513
9514 // Diff = LHS - (RHS + zext(CarryIn))
9515 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9516 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9517 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9518 MIRBuilder.buildCopy(Res, Diff);
9519
9520 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9521 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9522 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9523 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9524 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9525 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9526
9527 MI.eraseFromParent();
9528 return Legalized;
9529}
9530
9533 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9534 LLT Ty = MRI.getType(Res);
9535 bool IsSigned;
9536 bool IsAdd;
9537 unsigned BaseOp;
9538 switch (MI.getOpcode()) {
9539 default:
9540 llvm_unreachable("unexpected addsat/subsat opcode");
9541 case TargetOpcode::G_UADDSAT:
9542 IsSigned = false;
9543 IsAdd = true;
9544 BaseOp = TargetOpcode::G_ADD;
9545 break;
9546 case TargetOpcode::G_SADDSAT:
9547 IsSigned = true;
9548 IsAdd = true;
9549 BaseOp = TargetOpcode::G_ADD;
9550 break;
9551 case TargetOpcode::G_USUBSAT:
9552 IsSigned = false;
9553 IsAdd = false;
9554 BaseOp = TargetOpcode::G_SUB;
9555 break;
9556 case TargetOpcode::G_SSUBSAT:
9557 IsSigned = true;
9558 IsAdd = false;
9559 BaseOp = TargetOpcode::G_SUB;
9560 break;
9561 }
9562
9563 if (IsSigned) {
9564 // sadd.sat(a, b) ->
9565 // hi = 0x7fffffff - smax(a, 0)
9566 // lo = 0x80000000 - smin(a, 0)
9567 // a + smin(smax(lo, b), hi)
9568 // ssub.sat(a, b) ->
9569 // lo = smax(a, -1) - 0x7fffffff
9570 // hi = smin(a, -1) - 0x80000000
9571 // a - smin(smax(lo, b), hi)
9572 // TODO: AMDGPU can use a "median of 3" instruction here:
9573 // a +/- med3(lo, b, hi)
9574 uint64_t NumBits = Ty.getScalarSizeInBits();
9575 auto MaxVal =
9576 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9577 auto MinVal =
9578 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9580 if (IsAdd) {
9581 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9582 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9583 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9584 } else {
9585 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9586 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9587 MaxVal);
9588 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9589 MinVal);
9590 }
9591 auto RHSClamped =
9592 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9593 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9594 } else {
9595 // uadd.sat(a, b) -> a + umin(~a, b)
9596 // usub.sat(a, b) -> a - umin(a, b)
9597 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9598 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9599 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9600 }
9601
9602 MI.eraseFromParent();
9603 return Legalized;
9604}
9605
9608 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9609 LLT Ty = MRI.getType(Res);
9610 LLT BoolTy = Ty.changeElementSize(1);
9611 bool IsSigned;
9612 bool IsAdd;
9613 unsigned OverflowOp;
9614 switch (MI.getOpcode()) {
9615 default:
9616 llvm_unreachable("unexpected addsat/subsat opcode");
9617 case TargetOpcode::G_UADDSAT:
9618 IsSigned = false;
9619 IsAdd = true;
9620 OverflowOp = TargetOpcode::G_UADDO;
9621 break;
9622 case TargetOpcode::G_SADDSAT:
9623 IsSigned = true;
9624 IsAdd = true;
9625 OverflowOp = TargetOpcode::G_SADDO;
9626 break;
9627 case TargetOpcode::G_USUBSAT:
9628 IsSigned = false;
9629 IsAdd = false;
9630 OverflowOp = TargetOpcode::G_USUBO;
9631 break;
9632 case TargetOpcode::G_SSUBSAT:
9633 IsSigned = true;
9634 IsAdd = false;
9635 OverflowOp = TargetOpcode::G_SSUBO;
9636 break;
9637 }
9638
9639 auto OverflowRes =
9640 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9641 Register Tmp = OverflowRes.getReg(0);
9642 Register Ov = OverflowRes.getReg(1);
9643 MachineInstrBuilder Clamp;
9644 if (IsSigned) {
9645 // sadd.sat(a, b) ->
9646 // {tmp, ov} = saddo(a, b)
9647 // ov ? (tmp >>s 31) + 0x80000000 : r
9648 // ssub.sat(a, b) ->
9649 // {tmp, ov} = ssubo(a, b)
9650 // ov ? (tmp >>s 31) + 0x80000000 : r
9651 uint64_t NumBits = Ty.getScalarSizeInBits();
9652 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9653 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9654 auto MinVal =
9655 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9656 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9657 } else {
9658 // uadd.sat(a, b) ->
9659 // {tmp, ov} = uaddo(a, b)
9660 // ov ? 0xffffffff : tmp
9661 // usub.sat(a, b) ->
9662 // {tmp, ov} = usubo(a, b)
9663 // ov ? 0 : tmp
9664 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9665 }
9666 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9667
9668 MI.eraseFromParent();
9669 return Legalized;
9670}
9671
9674 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9675 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9676 "Expected shlsat opcode!");
9677 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9678 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9679 LLT Ty = MRI.getType(Res);
9680 LLT BoolTy = Ty.changeElementSize(1);
9681
9682 unsigned BW = Ty.getScalarSizeInBits();
9683 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9684 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9685 : MIRBuilder.buildLShr(Ty, Result, RHS);
9686
9687 MachineInstrBuilder SatVal;
9688 if (IsSigned) {
9689 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9690 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9691 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9692 MIRBuilder.buildConstant(Ty, 0));
9693 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9694 } else {
9695 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9696 }
9697 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9698 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9699
9700 MI.eraseFromParent();
9701 return Legalized;
9702}
9703
9705 auto [Dst, Src] = MI.getFirst2Regs();
9706 const LLT Ty = MRI.getType(Src);
9707 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9708 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9709
9710 // Swap most and least significant byte, set remaining bytes in Res to zero.
9711 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9712 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9713 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9714 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9715
9716 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9717 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9718 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9719 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9720 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9721 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9722 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9723 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9724 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9725 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9726 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9727 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9728 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9729 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9730 }
9731 Res.getInstr()->getOperand(0).setReg(Dst);
9732
9733 MI.eraseFromParent();
9734 return Legalized;
9735}
9736
9737//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9739 MachineInstrBuilder Src, const APInt &Mask) {
9740 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9741 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9742 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9743 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9744 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9745 return B.buildOr(Dst, LHS, RHS);
9746}
9747
9750 auto [Dst, Src] = MI.getFirst2Regs();
9751 const LLT SrcTy = MRI.getType(Src);
9752 unsigned Size = SrcTy.getScalarSizeInBits();
9753 unsigned VSize = SrcTy.getSizeInBits();
9754
9755 if (Size >= 8) {
9756 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9757 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9758 {LLT::fixed_vector(VSize / 8, 8),
9759 LLT::fixed_vector(VSize / 8, 8)}}))) {
9760 // If bitreverse is legal for i8 vector of the same size, then cast
9761 // to i8 vector type.
9762 // e.g. v4s32 -> v16s8
9763 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9764 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9765 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9766 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9767 MIRBuilder.buildBitcast(Dst, RBIT);
9768 } else {
9769 MachineInstrBuilder BSWAP =
9770 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9771
9772 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9773 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9774 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9775 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9776 APInt::getSplat(Size, APInt(8, 0xF0)));
9777
9778 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9779 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9780 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9781 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9782 APInt::getSplat(Size, APInt(8, 0xCC)));
9783
9784 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9785 // 6|7
9786 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9787 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9788 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9789 }
9790 } else {
9791 // Expand bitreverse for types smaller than 8 bits.
9793 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9795 if (I < J) {
9796 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9797 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9798 } else {
9799 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9800 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9801 }
9802
9803 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9804 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9805 if (I == 0)
9806 Tmp = Tmp2;
9807 else
9808 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9809 }
9810 MIRBuilder.buildCopy(Dst, Tmp);
9811 }
9812
9813 MI.eraseFromParent();
9814 return Legalized;
9815}
9816
9819 MachineFunction &MF = MIRBuilder.getMF();
9820
9821 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9822 int NameOpIdx = IsRead ? 1 : 0;
9823 int ValRegIndex = IsRead ? 0 : 1;
9824
9825 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9826 const LLT Ty = MRI.getType(ValReg);
9827 const MDString *RegStr = cast<MDString>(
9828 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9829
9830 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9831 if (!PhysReg) {
9832 const Function &Fn = MF.getFunction();
9834 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9835 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9836 Fn, MI.getDebugLoc()));
9837 if (IsRead)
9838 MIRBuilder.buildUndef(ValReg);
9839
9840 MI.eraseFromParent();
9841 return Legalized;
9842 }
9843
9844 if (IsRead)
9845 MIRBuilder.buildCopy(ValReg, PhysReg);
9846 else
9847 MIRBuilder.buildCopy(PhysReg, ValReg);
9848
9849 MI.eraseFromParent();
9850 return Legalized;
9851}
9852
9855 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9856 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9857 Register Result = MI.getOperand(0).getReg();
9858 LLT OrigTy = MRI.getType(Result);
9859 auto SizeInBits = OrigTy.getScalarSizeInBits();
9860 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9861
9862 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9863 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9864 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9865 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9866
9867 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9868 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9869 MIRBuilder.buildTrunc(Result, Shifted);
9870
9871 MI.eraseFromParent();
9872 return Legalized;
9873}
9874
9877 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9878 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9879
9880 if (Mask == fcNone) {
9881 MIRBuilder.buildConstant(DstReg, 0);
9882 MI.eraseFromParent();
9883 return Legalized;
9884 }
9885 if (Mask == fcAllFlags) {
9886 MIRBuilder.buildConstant(DstReg, 1);
9887 MI.eraseFromParent();
9888 return Legalized;
9889 }
9890
9891 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9892 // version
9893
9894 unsigned BitSize = SrcTy.getScalarSizeInBits();
9895 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9896
9897 LLT IntTy = LLT::scalar(BitSize);
9898 if (SrcTy.isVector())
9899 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9900 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9901
9902 // Various masks.
9903 APInt SignBit = APInt::getSignMask(BitSize);
9904 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9905 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9906 APInt ExpMask = Inf;
9907 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9908 APInt QNaNBitMask =
9909 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9910 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9911
9912 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9913 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9914 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9915 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9916 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9917
9918 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9919 auto Sign =
9920 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9921
9922 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9923 // Clang doesn't support capture of structured bindings:
9924 LLT DstTyCopy = DstTy;
9925 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9926 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9927 };
9928
9929 // Tests that involve more than one class should be processed first.
9930 if ((Mask & fcFinite) == fcFinite) {
9931 // finite(V) ==> abs(V) u< exp_mask
9932 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9933 ExpMaskC));
9934 Mask &= ~fcFinite;
9935 } else if ((Mask & fcFinite) == fcPosFinite) {
9936 // finite(V) && V > 0 ==> V u< exp_mask
9937 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9938 ExpMaskC));
9939 Mask &= ~fcPosFinite;
9940 } else if ((Mask & fcFinite) == fcNegFinite) {
9941 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9942 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9943 ExpMaskC);
9944 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9945 appendToRes(And);
9946 Mask &= ~fcNegFinite;
9947 }
9948
9949 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9950 // fcZero | fcSubnormal => test all exponent bits are 0
9951 // TODO: Handle sign bit specific cases
9952 // TODO: Handle inverted case
9953 if (PartialCheck == (fcZero | fcSubnormal)) {
9954 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9955 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9956 ExpBits, ZeroC));
9957 Mask &= ~PartialCheck;
9958 }
9959 }
9960
9961 // Check for individual classes.
9962 if (FPClassTest PartialCheck = Mask & fcZero) {
9963 if (PartialCheck == fcPosZero)
9964 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9965 AsInt, ZeroC));
9966 else if (PartialCheck == fcZero)
9967 appendToRes(
9968 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9969 else // fcNegZero
9970 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9971 AsInt, SignBitC));
9972 }
9973
9974 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9975 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9976 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9977 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9978 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9979 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9980 auto SubnormalRes =
9981 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9982 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9983 if (PartialCheck == fcNegSubnormal)
9984 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9985 appendToRes(SubnormalRes);
9986 }
9987
9988 if (FPClassTest PartialCheck = Mask & fcInf) {
9989 if (PartialCheck == fcPosInf)
9990 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9991 AsInt, InfC));
9992 else if (PartialCheck == fcInf)
9993 appendToRes(
9994 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9995 else { // fcNegInf
9996 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9997 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9998 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9999 AsInt, NegInfC));
10000 }
10001 }
10002
10003 if (FPClassTest PartialCheck = Mask & fcNan) {
10004 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10005 if (PartialCheck == fcNan) {
10006 // isnan(V) ==> abs(V) u> int(inf)
10007 appendToRes(
10008 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10009 } else if (PartialCheck == fcQNan) {
10010 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10011 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10012 InfWithQnanBitC));
10013 } else { // fcSNan
10014 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10015 // abs(V) u< (unsigned(Inf) | quiet_bit)
10016 auto IsNan =
10017 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10018 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10019 Abs, InfWithQnanBitC);
10020 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10021 }
10022 }
10023
10024 if (FPClassTest PartialCheck = Mask & fcNormal) {
10025 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10026 // (max_exp-1))
10027 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10028 auto ExpMinusOne = MIRBuilder.buildSub(
10029 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10030 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10031 auto NormalRes =
10032 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10033 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10034 if (PartialCheck == fcNegNormal)
10035 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10036 else if (PartialCheck == fcPosNormal) {
10037 auto PosSign = MIRBuilder.buildXor(
10038 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10039 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10040 }
10041 appendToRes(NormalRes);
10042 }
10043
10044 MIRBuilder.buildCopy(DstReg, Res);
10045 MI.eraseFromParent();
10046 return Legalized;
10047}
10048
10050 // Implement G_SELECT in terms of XOR, AND, OR.
10051 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10052 MI.getFirst4RegLLTs();
10053
10054 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10055 if (IsEltPtr) {
10056 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10057 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10058 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10059 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10060 DstTy = NewTy;
10061 }
10062
10063 if (MaskTy.isScalar()) {
10064 // Turn the scalar condition into a vector condition mask if needed.
10065
10066 Register MaskElt = MaskReg;
10067
10068 // The condition was potentially zero extended before, but we want a sign
10069 // extended boolean.
10070 if (MaskTy != LLT::scalar(1))
10071 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10072
10073 // Continue the sign extension (or truncate) to match the data type.
10074 MaskElt =
10075 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10076
10077 if (DstTy.isVector()) {
10078 // Generate a vector splat idiom.
10079 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10080 MaskReg = ShufSplat.getReg(0);
10081 } else {
10082 MaskReg = MaskElt;
10083 }
10084 MaskTy = DstTy;
10085 } else if (!DstTy.isVector()) {
10086 // Cannot handle the case that mask is a vector and dst is a scalar.
10087 return UnableToLegalize;
10088 }
10089
10090 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10091 return UnableToLegalize;
10092 }
10093
10094 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10095 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10096 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10097 if (IsEltPtr) {
10098 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10099 MIRBuilder.buildIntToPtr(DstReg, Or);
10100 } else {
10101 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10102 }
10103 MI.eraseFromParent();
10104 return Legalized;
10105}
10106
10108 // Split DIVREM into individual instructions.
10109 unsigned Opcode = MI.getOpcode();
10110
10111 MIRBuilder.buildInstr(
10112 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10113 : TargetOpcode::G_UDIV,
10114 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10115 MIRBuilder.buildInstr(
10116 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10117 : TargetOpcode::G_UREM,
10118 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10119 MI.eraseFromParent();
10120 return Legalized;
10121}
10122
10125 // Expand %res = G_ABS %a into:
10126 // %v1 = G_ASHR %a, scalar_size-1
10127 // %v2 = G_ADD %a, %v1
10128 // %res = G_XOR %v2, %v1
10129 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10130 Register OpReg = MI.getOperand(1).getReg();
10131 auto ShiftAmt =
10132 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10133 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10134 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10135 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10136 MI.eraseFromParent();
10137 return Legalized;
10138}
10139
10142 // Expand %res = G_ABS %a into:
10143 // %v1 = G_CONSTANT 0
10144 // %v2 = G_SUB %v1, %a
10145 // %res = G_SMAX %a, %v2
10146 Register SrcReg = MI.getOperand(1).getReg();
10147 LLT Ty = MRI.getType(SrcReg);
10148 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10149 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10150 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10151 MI.eraseFromParent();
10152 return Legalized;
10153}
10154
10157 Register SrcReg = MI.getOperand(1).getReg();
10158 Register DestReg = MI.getOperand(0).getReg();
10159 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10160 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10161 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10162 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10163 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10164 MI.eraseFromParent();
10165 return Legalized;
10166}
10167
10170 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10171 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10172 "Expected G_ABDS or G_ABDU instruction");
10173
10174 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10175 LLT Ty = MRI.getType(LHS);
10176
10177 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10178 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10179 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10180 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10181 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10184 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10185 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10186
10187 MI.eraseFromParent();
10188 return Legalized;
10189}
10190
10193 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10194 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10195 "Expected G_ABDS or G_ABDU instruction");
10196
10197 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10198 LLT Ty = MRI.getType(LHS);
10199
10200 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10201 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10202 Register MaxReg, MinReg;
10203 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10204 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10205 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10206 } else {
10207 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10208 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10209 }
10210 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10211
10212 MI.eraseFromParent();
10213 return Legalized;
10214}
10215
10217 Register SrcReg = MI.getOperand(1).getReg();
10218 Register DstReg = MI.getOperand(0).getReg();
10219
10220 LLT Ty = MRI.getType(DstReg);
10221
10222 // Reset sign bit
10223 MIRBuilder.buildAnd(
10224 DstReg, SrcReg,
10225 MIRBuilder.buildConstant(
10226 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10227
10228 MI.eraseFromParent();
10229 return Legalized;
10230}
10231
10234 Register SrcReg = MI.getOperand(1).getReg();
10235 LLT SrcTy = MRI.getType(SrcReg);
10236 LLT DstTy = MRI.getType(SrcReg);
10237
10238 // The source could be a scalar if the IR type was <1 x sN>.
10239 if (SrcTy.isScalar()) {
10240 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10241 return UnableToLegalize; // FIXME: handle extension.
10242 // This can be just a plain copy.
10243 Observer.changingInstr(MI);
10244 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10245 Observer.changedInstr(MI);
10246 return Legalized;
10247 }
10248 return UnableToLegalize;
10249}
10250
10252 MachineFunction &MF = *MI.getMF();
10253 const DataLayout &DL = MIRBuilder.getDataLayout();
10254 LLVMContext &Ctx = MF.getFunction().getContext();
10255 Register ListPtr = MI.getOperand(1).getReg();
10256 LLT PtrTy = MRI.getType(ListPtr);
10257
10258 // LstPtr is a pointer to the head of the list. Get the address
10259 // of the head of the list.
10260 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10261 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10262 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10263 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10264
10265 const Align A(MI.getOperand(2).getImm());
10266 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10267 if (A > TLI.getMinStackArgumentAlignment()) {
10268 Register AlignAmt =
10269 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10270 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10271 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10272 VAList = AndDst.getReg(0);
10273 }
10274
10275 // Increment the pointer, VAList, to the next vaarg
10276 // The list should be bumped by the size of element in the current head of
10277 // list.
10278 Register Dst = MI.getOperand(0).getReg();
10279 LLT LLTTy = MRI.getType(Dst);
10280 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10281 auto IncAmt =
10282 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10283 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10284
10285 // Store the increment VAList to the legalized pointer
10287 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10288 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10289 // Load the actual argument out of the pointer VAList
10290 Align EltAlignment = DL.getABITypeAlign(Ty);
10291 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10292 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10293 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10294
10295 MI.eraseFromParent();
10296 return Legalized;
10297}
10298
10300 // On Darwin, -Os means optimize for size without hurting performance, so
10301 // only really optimize for size when -Oz (MinSize) is used.
10303 return MF.getFunction().hasMinSize();
10304 return MF.getFunction().hasOptSize();
10305}
10306
10307// Returns a list of types to use for memory op lowering in MemOps. A partial
10308// port of findOptimalMemOpLowering in TargetLowering.
10309static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10310 unsigned Limit, const MemOp &Op,
10311 unsigned DstAS, unsigned SrcAS,
10312 const AttributeList &FuncAttributes,
10313 const TargetLowering &TLI) {
10314 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10315 return false;
10316
10317 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10318
10319 if (Ty == LLT()) {
10320 // Use the largest scalar type whose alignment constraints are satisfied.
10321 // We only need to check DstAlign here as SrcAlign is always greater or
10322 // equal to DstAlign (or zero).
10323 Ty = LLT::scalar(64);
10324 if (Op.isFixedDstAlign())
10325 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10326 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10327 Ty = LLT::scalar(Ty.getSizeInBytes());
10328 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10329 // FIXME: check for the largest legal type we can load/store to.
10330 }
10331
10332 unsigned NumMemOps = 0;
10333 uint64_t Size = Op.size();
10334 while (Size) {
10335 unsigned TySize = Ty.getSizeInBytes();
10336 while (TySize > Size) {
10337 // For now, only use non-vector load / store's for the left-over pieces.
10338 LLT NewTy = Ty;
10339 // FIXME: check for mem op safety and legality of the types. Not all of
10340 // SDAGisms map cleanly to GISel concepts.
10341 if (NewTy.isVector())
10342 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10343 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10344 unsigned NewTySize = NewTy.getSizeInBytes();
10345 assert(NewTySize > 0 && "Could not find appropriate type");
10346
10347 // If the new LLT cannot cover all of the remaining bits, then consider
10348 // issuing a (or a pair of) unaligned and overlapping load / store.
10349 unsigned Fast;
10350 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10351 MVT VT = getMVTForLLT(Ty);
10352 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10354 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10356 Fast)
10357 TySize = Size;
10358 else {
10359 Ty = NewTy;
10360 TySize = NewTySize;
10361 }
10362 }
10363
10364 if (++NumMemOps > Limit)
10365 return false;
10366
10367 MemOps.push_back(Ty);
10368 Size -= TySize;
10369 }
10370
10371 return true;
10372}
10373
10374// Get a vectorized representation of the memset value operand, GISel edition.
10376 MachineRegisterInfo &MRI = *MIB.getMRI();
10377 unsigned NumBits = Ty.getScalarSizeInBits();
10378 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10379 if (!Ty.isVector() && ValVRegAndVal) {
10380 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10381 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10382 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10383 }
10384
10385 // Extend the byte value to the larger type, and then multiply by a magic
10386 // value 0x010101... in order to replicate it across every byte.
10387 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10388 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10389 return MIB.buildConstant(Ty, 0).getReg(0);
10390 }
10391
10392 LLT ExtType = Ty.getScalarType();
10393 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10394 if (NumBits > 8) {
10395 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10396 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10397 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10398 }
10399
10400 // For vector types create a G_BUILD_VECTOR.
10401 if (Ty.isVector())
10402 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10403
10404 return Val;
10405}
10406
10408LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10409 uint64_t KnownLen, Align Alignment,
10410 bool IsVolatile) {
10411 auto &MF = *MI.getParent()->getParent();
10412 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10413 auto &DL = MF.getDataLayout();
10414 LLVMContext &C = MF.getFunction().getContext();
10415
10416 assert(KnownLen != 0 && "Have a zero length memset length!");
10417
10418 bool DstAlignCanChange = false;
10419 MachineFrameInfo &MFI = MF.getFrameInfo();
10420 bool OptSize = shouldLowerMemFuncForSize(MF);
10421
10422 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10423 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10424 DstAlignCanChange = true;
10425
10426 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10427 std::vector<LLT> MemOps;
10428
10429 const auto &DstMMO = **MI.memoperands_begin();
10430 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10431
10432 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10433 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10434
10435 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10436 MemOp::Set(KnownLen, DstAlignCanChange,
10437 Alignment,
10438 /*IsZeroMemset=*/IsZeroVal,
10439 /*IsVolatile=*/IsVolatile),
10440 DstPtrInfo.getAddrSpace(), ~0u,
10441 MF.getFunction().getAttributes(), TLI))
10442 return UnableToLegalize;
10443
10444 if (DstAlignCanChange) {
10445 // Get an estimate of the type from the LLT.
10446 Type *IRTy = getTypeForLLT(MemOps[0], C);
10447 Align NewAlign = DL.getABITypeAlign(IRTy);
10448 if (NewAlign > Alignment) {
10449 Alignment = NewAlign;
10450 unsigned FI = FIDef->getOperand(1).getIndex();
10451 // Give the stack frame object a larger alignment if needed.
10452 if (MFI.getObjectAlign(FI) < Alignment)
10453 MFI.setObjectAlignment(FI, Alignment);
10454 }
10455 }
10456
10457 MachineIRBuilder MIB(MI);
10458 // Find the largest store and generate the bit pattern for it.
10459 LLT LargestTy = MemOps[0];
10460 for (unsigned i = 1; i < MemOps.size(); i++)
10461 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10462 LargestTy = MemOps[i];
10463
10464 // The memset stored value is always defined as an s8, so in order to make it
10465 // work with larger store types we need to repeat the bit pattern across the
10466 // wider type.
10467 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10468
10469 if (!MemSetValue)
10470 return UnableToLegalize;
10471
10472 // Generate the stores. For each store type in the list, we generate the
10473 // matching store of that type to the destination address.
10474 LLT PtrTy = MRI.getType(Dst);
10475 unsigned DstOff = 0;
10476 unsigned Size = KnownLen;
10477 for (unsigned I = 0; I < MemOps.size(); I++) {
10478 LLT Ty = MemOps[I];
10479 unsigned TySize = Ty.getSizeInBytes();
10480 if (TySize > Size) {
10481 // Issuing an unaligned load / store pair that overlaps with the previous
10482 // pair. Adjust the offset accordingly.
10483 assert(I == MemOps.size() - 1 && I != 0);
10484 DstOff -= TySize - Size;
10485 }
10486
10487 // If this store is smaller than the largest store see whether we can get
10488 // the smaller value for free with a truncate.
10489 Register Value = MemSetValue;
10490 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10491 MVT VT = getMVTForLLT(Ty);
10492 MVT LargestVT = getMVTForLLT(LargestTy);
10493 if (!LargestTy.isVector() && !Ty.isVector() &&
10494 TLI.isTruncateFree(LargestVT, VT))
10495 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10496 else
10497 Value = getMemsetValue(Val, Ty, MIB);
10498 if (!Value)
10499 return UnableToLegalize;
10500 }
10501
10502 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10503
10504 Register Ptr = Dst;
10505 if (DstOff != 0) {
10506 auto Offset =
10507 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10508 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10509 }
10510
10511 MIB.buildStore(Value, Ptr, *StoreMMO);
10512 DstOff += Ty.getSizeInBytes();
10513 Size -= TySize;
10514 }
10515
10516 MI.eraseFromParent();
10517 return Legalized;
10518}
10519
10521LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10522 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10523
10524 auto [Dst, Src, Len] = MI.getFirst3Regs();
10525
10526 const auto *MMOIt = MI.memoperands_begin();
10527 const MachineMemOperand *MemOp = *MMOIt;
10528 bool IsVolatile = MemOp->isVolatile();
10529
10530 // See if this is a constant length copy
10531 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10532 // FIXME: support dynamically sized G_MEMCPY_INLINE
10533 assert(LenVRegAndVal &&
10534 "inline memcpy with dynamic size is not yet supported");
10535 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10536 if (KnownLen == 0) {
10537 MI.eraseFromParent();
10538 return Legalized;
10539 }
10540
10541 const auto &DstMMO = **MI.memoperands_begin();
10542 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10543 Align DstAlign = DstMMO.getBaseAlign();
10544 Align SrcAlign = SrcMMO.getBaseAlign();
10545
10546 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10547 IsVolatile);
10548}
10549
10551LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10552 uint64_t KnownLen, Align DstAlign,
10553 Align SrcAlign, bool IsVolatile) {
10554 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10555 return lowerMemcpy(MI, Dst, Src, KnownLen,
10556 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10557 IsVolatile);
10558}
10559
10561LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10562 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10563 Align SrcAlign, bool IsVolatile) {
10564 auto &MF = *MI.getParent()->getParent();
10565 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10566 auto &DL = MF.getDataLayout();
10568
10569 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10570
10571 bool DstAlignCanChange = false;
10572 MachineFrameInfo &MFI = MF.getFrameInfo();
10573 Align Alignment = std::min(DstAlign, SrcAlign);
10574
10575 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10576 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10577 DstAlignCanChange = true;
10578
10579 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10580 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10581 // if the memcpy is in a tail call position.
10582
10583 std::vector<LLT> MemOps;
10584
10585 const auto &DstMMO = **MI.memoperands_begin();
10586 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10587 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10588 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10589
10591 MemOps, Limit,
10592 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10593 IsVolatile),
10594 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10595 MF.getFunction().getAttributes(), TLI))
10596 return UnableToLegalize;
10597
10598 if (DstAlignCanChange) {
10599 // Get an estimate of the type from the LLT.
10600 Type *IRTy = getTypeForLLT(MemOps[0], C);
10601 Align NewAlign = DL.getABITypeAlign(IRTy);
10602
10603 // Don't promote to an alignment that would require dynamic stack
10604 // realignment.
10606 if (!TRI->hasStackRealignment(MF))
10607 if (MaybeAlign StackAlign = DL.getStackAlignment())
10608 NewAlign = std::min(NewAlign, *StackAlign);
10609
10610 if (NewAlign > Alignment) {
10611 Alignment = NewAlign;
10612 unsigned FI = FIDef->getOperand(1).getIndex();
10613 // Give the stack frame object a larger alignment if needed.
10614 if (MFI.getObjectAlign(FI) < Alignment)
10615 MFI.setObjectAlignment(FI, Alignment);
10616 }
10617 }
10618
10619 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10620
10621 MachineIRBuilder MIB(MI);
10622 // Now we need to emit a pair of load and stores for each of the types we've
10623 // collected. I.e. for each type, generate a load from the source pointer of
10624 // that type width, and then generate a corresponding store to the dest buffer
10625 // of that value loaded. This can result in a sequence of loads and stores
10626 // mixed types, depending on what the target specifies as good types to use.
10627 unsigned CurrOffset = 0;
10628 unsigned Size = KnownLen;
10629 for (auto CopyTy : MemOps) {
10630 // Issuing an unaligned load / store pair that overlaps with the previous
10631 // pair. Adjust the offset accordingly.
10632 if (CopyTy.getSizeInBytes() > Size)
10633 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10634
10635 // Construct MMOs for the accesses.
10636 auto *LoadMMO =
10637 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10638 auto *StoreMMO =
10639 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10640
10641 // Create the load.
10642 Register LoadPtr = Src;
10644 if (CurrOffset != 0) {
10645 LLT SrcTy = MRI.getType(Src);
10646 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10647 .getReg(0);
10648 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10649 }
10650 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10651
10652 // Create the store.
10653 Register StorePtr = Dst;
10654 if (CurrOffset != 0) {
10655 LLT DstTy = MRI.getType(Dst);
10656 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10657 }
10658 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10659 CurrOffset += CopyTy.getSizeInBytes();
10660 Size -= CopyTy.getSizeInBytes();
10661 }
10662
10663 MI.eraseFromParent();
10664 return Legalized;
10665}
10666
10668LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10669 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10670 bool IsVolatile) {
10671 auto &MF = *MI.getParent()->getParent();
10672 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10673 auto &DL = MF.getDataLayout();
10674 LLVMContext &C = MF.getFunction().getContext();
10675
10676 assert(KnownLen != 0 && "Have a zero length memmove length!");
10677
10678 bool DstAlignCanChange = false;
10679 MachineFrameInfo &MFI = MF.getFrameInfo();
10680 bool OptSize = shouldLowerMemFuncForSize(MF);
10681 Align Alignment = std::min(DstAlign, SrcAlign);
10682
10683 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10684 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10685 DstAlignCanChange = true;
10686
10687 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10688 std::vector<LLT> MemOps;
10689
10690 const auto &DstMMO = **MI.memoperands_begin();
10691 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10692 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10693 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10694
10695 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10696 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10697 // same thing here.
10699 MemOps, Limit,
10700 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10701 /*IsVolatile*/ true),
10702 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10703 MF.getFunction().getAttributes(), TLI))
10704 return UnableToLegalize;
10705
10706 if (DstAlignCanChange) {
10707 // Get an estimate of the type from the LLT.
10708 Type *IRTy = getTypeForLLT(MemOps[0], C);
10709 Align NewAlign = DL.getABITypeAlign(IRTy);
10710
10711 // Don't promote to an alignment that would require dynamic stack
10712 // realignment.
10713 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10714 if (!TRI->hasStackRealignment(MF))
10715 if (MaybeAlign StackAlign = DL.getStackAlignment())
10716 NewAlign = std::min(NewAlign, *StackAlign);
10717
10718 if (NewAlign > Alignment) {
10719 Alignment = NewAlign;
10720 unsigned FI = FIDef->getOperand(1).getIndex();
10721 // Give the stack frame object a larger alignment if needed.
10722 if (MFI.getObjectAlign(FI) < Alignment)
10723 MFI.setObjectAlignment(FI, Alignment);
10724 }
10725 }
10726
10727 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10728
10729 MachineIRBuilder MIB(MI);
10730 // Memmove requires that we perform the loads first before issuing the stores.
10731 // Apart from that, this loop is pretty much doing the same thing as the
10732 // memcpy codegen function.
10733 unsigned CurrOffset = 0;
10734 SmallVector<Register, 16> LoadVals;
10735 for (auto CopyTy : MemOps) {
10736 // Construct MMO for the load.
10737 auto *LoadMMO =
10738 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10739
10740 // Create the load.
10741 Register LoadPtr = Src;
10742 if (CurrOffset != 0) {
10743 LLT SrcTy = MRI.getType(Src);
10744 auto Offset =
10745 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10746 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10747 }
10748 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10749 CurrOffset += CopyTy.getSizeInBytes();
10750 }
10751
10752 CurrOffset = 0;
10753 for (unsigned I = 0; I < MemOps.size(); ++I) {
10754 LLT CopyTy = MemOps[I];
10755 // Now store the values loaded.
10756 auto *StoreMMO =
10757 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10758
10759 Register StorePtr = Dst;
10760 if (CurrOffset != 0) {
10761 LLT DstTy = MRI.getType(Dst);
10762 auto Offset =
10763 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10764 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10765 }
10766 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10767 CurrOffset += CopyTy.getSizeInBytes();
10768 }
10769 MI.eraseFromParent();
10770 return Legalized;
10771}
10772
10775 const unsigned Opc = MI.getOpcode();
10776 // This combine is fairly complex so it's not written with a separate
10777 // matcher function.
10778 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10779 Opc == TargetOpcode::G_MEMSET) &&
10780 "Expected memcpy like instruction");
10781
10782 auto MMOIt = MI.memoperands_begin();
10783 const MachineMemOperand *MemOp = *MMOIt;
10784
10785 Align DstAlign = MemOp->getBaseAlign();
10786 Align SrcAlign;
10787 auto [Dst, Src, Len] = MI.getFirst3Regs();
10788
10789 if (Opc != TargetOpcode::G_MEMSET) {
10790 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10791 MemOp = *(++MMOIt);
10792 SrcAlign = MemOp->getBaseAlign();
10793 }
10794
10795 // See if this is a constant length copy
10796 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10797 if (!LenVRegAndVal)
10798 return UnableToLegalize;
10799 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10800
10801 if (KnownLen == 0) {
10802 MI.eraseFromParent();
10803 return Legalized;
10804 }
10805
10806 if (MaxLen && KnownLen > MaxLen)
10807 return UnableToLegalize;
10808
10809 bool IsVolatile = MemOp->isVolatile();
10810 if (Opc == TargetOpcode::G_MEMCPY) {
10811 auto &MF = *MI.getParent()->getParent();
10812 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10813 bool OptSize = shouldLowerMemFuncForSize(MF);
10814 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10815 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10816 IsVolatile);
10817 }
10818 if (Opc == TargetOpcode::G_MEMMOVE)
10819 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10820 if (Opc == TargetOpcode::G_MEMSET)
10821 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10822 return UnableToLegalize;
10823}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1469
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:627
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2039
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1569
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1626
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1193
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:349
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1281
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:610
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)